source: proiecte/HadoopJUnit/hadoop-0.20.1/src/core/org/apache/hadoop/util/GenericOptionsParser.java @ 120

Last change on this file since 120 was 120, checked in by (none), 14 years ago

Added the mail files for the Hadoop JUNit Project

  • Property svn:executable set to *
File size: 14.2 KB
Line 
1/**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements.  See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership.  The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License.  You may obtain a copy of the License at
9 *
10 *     http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18package org.apache.hadoop.util;
19
20import java.io.FileNotFoundException;
21import java.io.IOException;
22import java.io.PrintStream;
23import java.net.URI;
24import java.net.URL;
25import java.net.URLClassLoader;
26
27import org.apache.commons.cli.CommandLine;
28import org.apache.commons.cli.CommandLineParser;
29import org.apache.commons.cli.GnuParser;
30import org.apache.commons.cli.HelpFormatter;
31import org.apache.commons.cli.Option;
32import org.apache.commons.cli.OptionBuilder;
33import org.apache.commons.cli.Options;
34import org.apache.commons.cli.ParseException;
35import org.apache.commons.logging.Log;
36import org.apache.commons.logging.LogFactory;
37import org.apache.hadoop.conf.Configuration;
38import org.apache.hadoop.fs.FileSystem;
39import org.apache.hadoop.fs.Path;
40
41/**
42 * <code>GenericOptionsParser</code> is a utility to parse command line
43 * arguments generic to the Hadoop framework.
44 *
45 * <code>GenericOptionsParser</code> recognizes several standarad command
46 * line arguments, enabling applications to easily specify a namenode, a
47 * jobtracker, additional configuration resources etc.
48 *
49 * <h4 id="GenericOptions">Generic Options</h4>
50 *
51 * <p>The supported generic options are:</p>
52 * <p><blockquote><pre>
53 *     -conf &lt;configuration file&gt;     specify a configuration file
54 *     -D &lt;property=value&gt;            use value for given property
55 *     -fs &lt;local|namenode:port&gt;      specify a namenode
56 *     -jt &lt;local|jobtracker:port&gt;    specify a job tracker
57 *     -files &lt;comma separated list of files&gt;    specify comma separated
58 *                            files to be copied to the map reduce cluster
59 *     -libjars &lt;comma separated list of jars&gt;   specify comma separated
60 *                            jar files to include in the classpath.
61 *     -archives &lt;comma separated list of archives&gt;    specify comma
62 *             separated archives to be unarchived on the compute machines.
63
64 * </pre></blockquote></p>
65 *
66 * <p>The general command line syntax is:</p>
67 * <p><tt><pre>
68 * bin/hadoop command [genericOptions] [commandOptions]
69 * </pre></tt></p>
70 *
71 * <p>Generic command line arguments <strong>might</strong> modify
72 * <code>Configuration </code> objects, given to constructors.</p>
73 *
74 * <p>The functionality is implemented using Commons CLI.</p>
75 *
76 * <p>Examples:</p>
77 * <p><blockquote><pre>
78 * $ bin/hadoop dfs -fs darwin:8020 -ls /data
79 * list /data directory in dfs with namenode darwin:8020
80 *
81 * $ bin/hadoop dfs -D fs.default.name=darwin:8020 -ls /data
82 * list /data directory in dfs with namenode darwin:8020
83 *     
84 * $ bin/hadoop dfs -conf hadoop-site.xml -ls /data
85 * list /data directory in dfs with conf specified in hadoop-site.xml
86 *     
87 * $ bin/hadoop job -D mapred.job.tracker=darwin:50020 -submit job.xml
88 * submit a job to job tracker darwin:50020
89 *     
90 * $ bin/hadoop job -jt darwin:50020 -submit job.xml
91 * submit a job to job tracker darwin:50020
92 *     
93 * $ bin/hadoop job -jt local -submit job.xml
94 * submit a job to local runner
95 *
96 * $ bin/hadoop jar -libjars testlib.jar
97 * -archives test.tgz -files file.txt inputjar args
98 * job submission with libjars, files and archives
99 * </pre></blockquote></p>
100 *
101 * @see Tool
102 * @see ToolRunner
103 */
104public class GenericOptionsParser {
105
106  private static final Log LOG = LogFactory.getLog(GenericOptionsParser.class);
107  private Configuration conf;
108  private CommandLine commandLine;
109
110  /**
111   * Create an options parser with the given options to parse the args.
112   * @param opts the options
113   * @param args the command line arguments
114   */
115  public GenericOptionsParser(Options opts, String[] args) {
116    this(new Configuration(), new Options(), args);
117  }
118
119  /**
120   * Create an options parser to parse the args.
121   * @param args the command line arguments
122   */
123  public GenericOptionsParser(String[] args) {
124    this(new Configuration(), new Options(), args);
125  }
126 
127  /**
128   * Create a <code>GenericOptionsParser<code> to parse only the generic Hadoop 
129   * arguments.
130   *
131   * The array of string arguments other than the generic arguments can be
132   * obtained by {@link #getRemainingArgs()}.
133   *
134   * @param conf the <code>Configuration</code> to modify.
135   * @param args command-line arguments.
136   */
137  public GenericOptionsParser(Configuration conf, String[] args) {
138    this(conf, new Options(), args); 
139  }
140
141  /**
142   * Create a <code>GenericOptionsParser</code> to parse given options as well
143   * as generic Hadoop options.
144   *
145   * The resulting <code>CommandLine</code> object can be obtained by
146   * {@link #getCommandLine()}.
147   *
148   * @param conf the configuration to modify 
149   * @param options options built by the caller
150   * @param args User-specified arguments
151   */
152  public GenericOptionsParser(Configuration conf, Options options, String[] args) {
153    parseGeneralOptions(options, conf, args);
154    this.conf = conf;
155  }
156
157  /**
158   * Returns an array of Strings containing only application-specific arguments.
159   *
160   * @return array of <code>String</code>s containing the un-parsed arguments
161   * or <strong>empty array</strong> if commandLine was not defined.
162   */
163  public String[] getRemainingArgs() {
164    return (commandLine == null) ? new String[]{} : commandLine.getArgs();
165  }
166
167  /**
168   * Get the modified configuration
169   * @return the configuration that has the modified parameters.
170   */
171  public Configuration getConfiguration() {
172    return conf;
173  }
174
175  /**
176   * Returns the commons-cli <code>CommandLine</code> object
177   * to process the parsed arguments.
178   *
179   * Note: If the object is created with
180   * {@link #GenericOptionsParser(Configuration, String[])}, then returned
181   * object will only contain parsed generic options.
182   *
183   * @return <code>CommandLine</code> representing list of arguments
184   *         parsed against Options descriptor.
185   */
186  public CommandLine getCommandLine() {
187    return commandLine;
188  }
189
190  /**
191   * Specify properties of each generic option
192   */
193  @SuppressWarnings("static-access")
194  private static Options buildGeneralOptions(Options opts) {
195    Option fs = OptionBuilder.withArgName("local|namenode:port")
196    .hasArg()
197    .withDescription("specify a namenode")
198    .create("fs");
199    Option jt = OptionBuilder.withArgName("local|jobtracker:port")
200    .hasArg()
201    .withDescription("specify a job tracker")
202    .create("jt");
203    Option oconf = OptionBuilder.withArgName("configuration file")
204    .hasArg()
205    .withDescription("specify an application configuration file")
206    .create("conf");
207    Option property = OptionBuilder.withArgName("property=value")
208    .hasArg()
209    .withDescription("use value for given property")
210    .create('D');
211    Option libjars = OptionBuilder.withArgName("paths")
212    .hasArg()
213    .withDescription("comma separated jar files to include in the classpath.")
214    .create("libjars");
215    Option files = OptionBuilder.withArgName("paths")
216    .hasArg()
217    .withDescription("comma separated files to be copied to the " +
218           "map reduce cluster")
219    .create("files");
220    Option archives = OptionBuilder.withArgName("paths")
221    .hasArg()
222    .withDescription("comma separated archives to be unarchived" +
223                     " on the compute machines.")
224    .create("archives");
225
226    opts.addOption(fs);
227    opts.addOption(jt);
228    opts.addOption(oconf);
229    opts.addOption(property);
230    opts.addOption(libjars);
231    opts.addOption(files);
232    opts.addOption(archives);
233
234    return opts;
235  }
236
237  /**
238   * Modify configuration according user-specified generic options
239   * @param conf Configuration to be modified
240   * @param line User-specified generic options
241   */
242  private void processGeneralOptions(Configuration conf,
243      CommandLine line) {
244    if (line.hasOption("fs")) {
245      FileSystem.setDefaultUri(conf, line.getOptionValue("fs"));
246    }
247
248    if (line.hasOption("jt")) {
249      conf.set("mapred.job.tracker", line.getOptionValue("jt"));
250    }
251    if (line.hasOption("conf")) {
252      String[] values = line.getOptionValues("conf");
253      for(String value : values) {
254        conf.addResource(new Path(value));
255      }
256    }
257    try {
258      if (line.hasOption("libjars")) {
259        conf.set("tmpjars", 
260                 validateFiles(line.getOptionValue("libjars"), conf));
261        //setting libjars in client classpath
262        URL[] libjars = getLibJars(conf);
263        if(libjars!=null && libjars.length>0) {
264          conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader()));
265          Thread.currentThread().setContextClassLoader(
266              new URLClassLoader(libjars, 
267                  Thread.currentThread().getContextClassLoader()));
268        }
269      }
270      if (line.hasOption("files")) {
271        conf.set("tmpfiles", 
272                 validateFiles(line.getOptionValue("files"), conf));
273      }
274      if (line.hasOption("archives")) {
275        conf.set("tmparchives", 
276                  validateFiles(line.getOptionValue("archives"), conf));
277      }
278    } catch (IOException ioe) {
279      System.err.println(StringUtils.stringifyException(ioe));
280    }
281    if (line.hasOption('D')) {
282      String[] property = line.getOptionValues('D');
283      for(String prop : property) {
284        String[] keyval = prop.split("=", 2);
285        if (keyval.length == 2) {
286          conf.set(keyval[0], keyval[1]);
287        }
288      }
289    }
290    conf.setBoolean("mapred.used.genericoptionsparser", true);
291  }
292 
293  /**
294   * If libjars are set in the conf, parse the libjars.
295   * @param conf
296   * @return libjar urls
297   * @throws IOException
298   */
299  public static URL[] getLibJars(Configuration conf) throws IOException {
300    String jars = conf.get("tmpjars");
301    if(jars==null) {
302      return null;
303    }
304    String[] files = jars.split(",");
305    URL[] cp = new URL[files.length];
306    for (int i=0;i<cp.length;i++) {
307      Path tmp = new Path(files[i]);
308      cp[i] = FileSystem.getLocal(conf).pathToFile(tmp).toURI().toURL();
309    }
310    return cp;
311  }
312
313  /**
314   * takes input as a comma separated list of files
315   * and verifies if they exist. It defaults for file:///
316   * if the files specified do not have a scheme.
317   * it returns the paths uri converted defaulting to file:///.
318   * So an input of  /home/user/file1,/home/user/file2 would return
319   * file:///home/user/file1,file:///home/user/file2
320   * @param files
321   * @return
322   */
323  private String validateFiles(String files, Configuration conf) throws IOException  {
324    if (files == null) 
325      return null;
326    String[] fileArr = files.split(",");
327    String[] finalArr = new String[fileArr.length];
328    for (int i =0; i < fileArr.length; i++) {
329      String tmp = fileArr[i];
330      String finalPath;
331      Path path = new Path(tmp);
332      URI pathURI =  path.toUri();
333      FileSystem localFs = FileSystem.getLocal(conf);
334      if (pathURI.getScheme() == null) {
335        //default to the local file system
336        //check if the file exists or not first
337        if (!localFs.exists(path)) {
338          throw new FileNotFoundException("File " + tmp + " does not exist.");
339        }
340        finalPath = path.makeQualified(localFs).toString();
341      }
342      else {
343        // check if the file exists in this file system
344        // we need to recreate this filesystem object to copy
345        // these files to the file system jobtracker is running
346        // on.
347        FileSystem fs = path.getFileSystem(conf);
348        if (!fs.exists(path)) {
349          throw new FileNotFoundException("File " + tmp + " does not exist.");
350        }
351        finalPath = path.makeQualified(fs).toString();
352        try {
353          fs.close();
354        } catch(IOException e){};
355      }
356      finalArr[i] = finalPath;
357    }
358    return StringUtils.arrayToString(finalArr);
359  }
360 
361
362  /**
363   * Parse the user-specified options, get the generic options, and modify
364   * configuration accordingly
365   * @param conf Configuration to be modified
366   * @param args User-specified arguments
367   * @return Command-specific arguments
368   */
369  private String[] parseGeneralOptions(Options opts, Configuration conf, 
370      String[] args) {
371    opts = buildGeneralOptions(opts);
372    CommandLineParser parser = new GnuParser();
373    try {
374      commandLine = parser.parse(opts, args, true);
375      processGeneralOptions(conf, commandLine);
376      return commandLine.getArgs();
377    } catch(ParseException e) {
378      LOG.warn("options parsing failed: "+e.getMessage());
379
380      HelpFormatter formatter = new HelpFormatter();
381      formatter.printHelp("general options are: ", opts);
382    }
383    return args;
384  }
385
386  /**
387   * Print the usage message for generic command-line options supported.
388   *
389   * @param out stream to print the usage message to.
390   */
391  public static void printGenericCommandUsage(PrintStream out) {
392   
393    out.println("Generic options supported are");
394    out.println("-conf <configuration file>     specify an application configuration file");
395    out.println("-D <property=value>            use value for given property");
396    out.println("-fs <local|namenode:port>      specify a namenode");
397    out.println("-jt <local|jobtracker:port>    specify a job tracker");
398    out.println("-files <comma separated list of files>    " + 
399      "specify comma separated files to be copied to the map reduce cluster");
400    out.println("-libjars <comma separated list of jars>    " +
401      "specify comma separated jar files to include in the classpath.");
402    out.println("-archives <comma separated list of archives>    " +
403                "specify comma separated archives to be unarchived" +
404                " on the compute machines.\n");
405    out.println("The general command line syntax is");
406    out.println("bin/hadoop command [genericOptions] [commandOptions]\n");
407  }
408 
409}
Note: See TracBrowser for help on using the repository browser.