1 | /** |
---|
2 | * Licensed to the Apache Software Foundation (ASF) under one |
---|
3 | * or more contributor license agreements. See the NOTICE file |
---|
4 | * distributed with this work for additional information |
---|
5 | * regarding copyright ownership. The ASF licenses this file |
---|
6 | * to you under the Apache License, Version 2.0 (the |
---|
7 | * "License"); you may not use this file except in compliance |
---|
8 | * with the License. You may obtain a copy of the License at |
---|
9 | * |
---|
10 | * http://www.apache.org/licenses/LICENSE-2.0 |
---|
11 | * |
---|
12 | * Unless required by applicable law or agreed to in writing, software |
---|
13 | * distributed under the License is distributed on an "AS IS" BASIS, |
---|
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
---|
15 | * See the License for the specific language governing permissions and |
---|
16 | * limitations under the License. |
---|
17 | */ |
---|
18 | package org.apache.hadoop.util; |
---|
19 | |
---|
20 | import java.io.FileNotFoundException; |
---|
21 | import java.io.IOException; |
---|
22 | import java.io.PrintStream; |
---|
23 | import java.net.URI; |
---|
24 | import java.net.URL; |
---|
25 | import java.net.URLClassLoader; |
---|
26 | |
---|
27 | import org.apache.commons.cli.CommandLine; |
---|
28 | import org.apache.commons.cli.CommandLineParser; |
---|
29 | import org.apache.commons.cli.GnuParser; |
---|
30 | import org.apache.commons.cli.HelpFormatter; |
---|
31 | import org.apache.commons.cli.Option; |
---|
32 | import org.apache.commons.cli.OptionBuilder; |
---|
33 | import org.apache.commons.cli.Options; |
---|
34 | import org.apache.commons.cli.ParseException; |
---|
35 | import org.apache.commons.logging.Log; |
---|
36 | import org.apache.commons.logging.LogFactory; |
---|
37 | import org.apache.hadoop.conf.Configuration; |
---|
38 | import org.apache.hadoop.fs.FileSystem; |
---|
39 | import org.apache.hadoop.fs.Path; |
---|
40 | |
---|
41 | /** |
---|
42 | * <code>GenericOptionsParser</code> is a utility to parse command line |
---|
43 | * arguments generic to the Hadoop framework. |
---|
44 | * |
---|
45 | * <code>GenericOptionsParser</code> recognizes several standarad command |
---|
46 | * line arguments, enabling applications to easily specify a namenode, a |
---|
47 | * jobtracker, additional configuration resources etc. |
---|
48 | * |
---|
49 | * <h4 id="GenericOptions">Generic Options</h4> |
---|
50 | * |
---|
51 | * <p>The supported generic options are:</p> |
---|
52 | * <p><blockquote><pre> |
---|
53 | * -conf <configuration file> specify a configuration file |
---|
54 | * -D <property=value> use value for given property |
---|
55 | * -fs <local|namenode:port> specify a namenode |
---|
56 | * -jt <local|jobtracker:port> specify a job tracker |
---|
57 | * -files <comma separated list of files> specify comma separated |
---|
58 | * files to be copied to the map reduce cluster |
---|
59 | * -libjars <comma separated list of jars> specify comma separated |
---|
60 | * jar files to include in the classpath. |
---|
61 | * -archives <comma separated list of archives> specify comma |
---|
62 | * separated archives to be unarchived on the compute machines. |
---|
63 | |
---|
64 | * </pre></blockquote></p> |
---|
65 | * |
---|
66 | * <p>The general command line syntax is:</p> |
---|
67 | * <p><tt><pre> |
---|
68 | * bin/hadoop command [genericOptions] [commandOptions] |
---|
69 | * </pre></tt></p> |
---|
70 | * |
---|
71 | * <p>Generic command line arguments <strong>might</strong> modify |
---|
72 | * <code>Configuration </code> objects, given to constructors.</p> |
---|
73 | * |
---|
74 | * <p>The functionality is implemented using Commons CLI.</p> |
---|
75 | * |
---|
76 | * <p>Examples:</p> |
---|
77 | * <p><blockquote><pre> |
---|
78 | * $ bin/hadoop dfs -fs darwin:8020 -ls /data |
---|
79 | * list /data directory in dfs with namenode darwin:8020 |
---|
80 | * |
---|
81 | * $ bin/hadoop dfs -D fs.default.name=darwin:8020 -ls /data |
---|
82 | * list /data directory in dfs with namenode darwin:8020 |
---|
83 | * |
---|
84 | * $ bin/hadoop dfs -conf hadoop-site.xml -ls /data |
---|
85 | * list /data directory in dfs with conf specified in hadoop-site.xml |
---|
86 | * |
---|
87 | * $ bin/hadoop job -D mapred.job.tracker=darwin:50020 -submit job.xml |
---|
88 | * submit a job to job tracker darwin:50020 |
---|
89 | * |
---|
90 | * $ bin/hadoop job -jt darwin:50020 -submit job.xml |
---|
91 | * submit a job to job tracker darwin:50020 |
---|
92 | * |
---|
93 | * $ bin/hadoop job -jt local -submit job.xml |
---|
94 | * submit a job to local runner |
---|
95 | * |
---|
96 | * $ bin/hadoop jar -libjars testlib.jar |
---|
97 | * -archives test.tgz -files file.txt inputjar args |
---|
98 | * job submission with libjars, files and archives |
---|
99 | * </pre></blockquote></p> |
---|
100 | * |
---|
101 | * @see Tool |
---|
102 | * @see ToolRunner |
---|
103 | */ |
---|
104 | public class GenericOptionsParser { |
---|
105 | |
---|
106 | private static final Log LOG = LogFactory.getLog(GenericOptionsParser.class); |
---|
107 | private Configuration conf; |
---|
108 | private CommandLine commandLine; |
---|
109 | |
---|
110 | /** |
---|
111 | * Create an options parser with the given options to parse the args. |
---|
112 | * @param opts the options |
---|
113 | * @param args the command line arguments |
---|
114 | */ |
---|
115 | public GenericOptionsParser(Options opts, String[] args) { |
---|
116 | this(new Configuration(), new Options(), args); |
---|
117 | } |
---|
118 | |
---|
119 | /** |
---|
120 | * Create an options parser to parse the args. |
---|
121 | * @param args the command line arguments |
---|
122 | */ |
---|
123 | public GenericOptionsParser(String[] args) { |
---|
124 | this(new Configuration(), new Options(), args); |
---|
125 | } |
---|
126 | |
---|
127 | /** |
---|
128 | * Create a <code>GenericOptionsParser<code> to parse only the generic Hadoop |
---|
129 | * arguments. |
---|
130 | * |
---|
131 | * The array of string arguments other than the generic arguments can be |
---|
132 | * obtained by {@link #getRemainingArgs()}. |
---|
133 | * |
---|
134 | * @param conf the <code>Configuration</code> to modify. |
---|
135 | * @param args command-line arguments. |
---|
136 | */ |
---|
137 | public GenericOptionsParser(Configuration conf, String[] args) { |
---|
138 | this(conf, new Options(), args); |
---|
139 | } |
---|
140 | |
---|
141 | /** |
---|
142 | * Create a <code>GenericOptionsParser</code> to parse given options as well |
---|
143 | * as generic Hadoop options. |
---|
144 | * |
---|
145 | * The resulting <code>CommandLine</code> object can be obtained by |
---|
146 | * {@link #getCommandLine()}. |
---|
147 | * |
---|
148 | * @param conf the configuration to modify |
---|
149 | * @param options options built by the caller |
---|
150 | * @param args User-specified arguments |
---|
151 | */ |
---|
152 | public GenericOptionsParser(Configuration conf, Options options, String[] args) { |
---|
153 | parseGeneralOptions(options, conf, args); |
---|
154 | this.conf = conf; |
---|
155 | } |
---|
156 | |
---|
157 | /** |
---|
158 | * Returns an array of Strings containing only application-specific arguments. |
---|
159 | * |
---|
160 | * @return array of <code>String</code>s containing the un-parsed arguments |
---|
161 | * or <strong>empty array</strong> if commandLine was not defined. |
---|
162 | */ |
---|
163 | public String[] getRemainingArgs() { |
---|
164 | return (commandLine == null) ? new String[]{} : commandLine.getArgs(); |
---|
165 | } |
---|
166 | |
---|
167 | /** |
---|
168 | * Get the modified configuration |
---|
169 | * @return the configuration that has the modified parameters. |
---|
170 | */ |
---|
171 | public Configuration getConfiguration() { |
---|
172 | return conf; |
---|
173 | } |
---|
174 | |
---|
175 | /** |
---|
176 | * Returns the commons-cli <code>CommandLine</code> object |
---|
177 | * to process the parsed arguments. |
---|
178 | * |
---|
179 | * Note: If the object is created with |
---|
180 | * {@link #GenericOptionsParser(Configuration, String[])}, then returned |
---|
181 | * object will only contain parsed generic options. |
---|
182 | * |
---|
183 | * @return <code>CommandLine</code> representing list of arguments |
---|
184 | * parsed against Options descriptor. |
---|
185 | */ |
---|
186 | public CommandLine getCommandLine() { |
---|
187 | return commandLine; |
---|
188 | } |
---|
189 | |
---|
190 | /** |
---|
191 | * Specify properties of each generic option |
---|
192 | */ |
---|
193 | @SuppressWarnings("static-access") |
---|
194 | private static Options buildGeneralOptions(Options opts) { |
---|
195 | Option fs = OptionBuilder.withArgName("local|namenode:port") |
---|
196 | .hasArg() |
---|
197 | .withDescription("specify a namenode") |
---|
198 | .create("fs"); |
---|
199 | Option jt = OptionBuilder.withArgName("local|jobtracker:port") |
---|
200 | .hasArg() |
---|
201 | .withDescription("specify a job tracker") |
---|
202 | .create("jt"); |
---|
203 | Option oconf = OptionBuilder.withArgName("configuration file") |
---|
204 | .hasArg() |
---|
205 | .withDescription("specify an application configuration file") |
---|
206 | .create("conf"); |
---|
207 | Option property = OptionBuilder.withArgName("property=value") |
---|
208 | .hasArg() |
---|
209 | .withDescription("use value for given property") |
---|
210 | .create('D'); |
---|
211 | Option libjars = OptionBuilder.withArgName("paths") |
---|
212 | .hasArg() |
---|
213 | .withDescription("comma separated jar files to include in the classpath.") |
---|
214 | .create("libjars"); |
---|
215 | Option files = OptionBuilder.withArgName("paths") |
---|
216 | .hasArg() |
---|
217 | .withDescription("comma separated files to be copied to the " + |
---|
218 | "map reduce cluster") |
---|
219 | .create("files"); |
---|
220 | Option archives = OptionBuilder.withArgName("paths") |
---|
221 | .hasArg() |
---|
222 | .withDescription("comma separated archives to be unarchived" + |
---|
223 | " on the compute machines.") |
---|
224 | .create("archives"); |
---|
225 | |
---|
226 | opts.addOption(fs); |
---|
227 | opts.addOption(jt); |
---|
228 | opts.addOption(oconf); |
---|
229 | opts.addOption(property); |
---|
230 | opts.addOption(libjars); |
---|
231 | opts.addOption(files); |
---|
232 | opts.addOption(archives); |
---|
233 | |
---|
234 | return opts; |
---|
235 | } |
---|
236 | |
---|
237 | /** |
---|
238 | * Modify configuration according user-specified generic options |
---|
239 | * @param conf Configuration to be modified |
---|
240 | * @param line User-specified generic options |
---|
241 | */ |
---|
242 | private void processGeneralOptions(Configuration conf, |
---|
243 | CommandLine line) { |
---|
244 | if (line.hasOption("fs")) { |
---|
245 | FileSystem.setDefaultUri(conf, line.getOptionValue("fs")); |
---|
246 | } |
---|
247 | |
---|
248 | if (line.hasOption("jt")) { |
---|
249 | conf.set("mapred.job.tracker", line.getOptionValue("jt")); |
---|
250 | } |
---|
251 | if (line.hasOption("conf")) { |
---|
252 | String[] values = line.getOptionValues("conf"); |
---|
253 | for(String value : values) { |
---|
254 | conf.addResource(new Path(value)); |
---|
255 | } |
---|
256 | } |
---|
257 | try { |
---|
258 | if (line.hasOption("libjars")) { |
---|
259 | conf.set("tmpjars", |
---|
260 | validateFiles(line.getOptionValue("libjars"), conf)); |
---|
261 | //setting libjars in client classpath |
---|
262 | URL[] libjars = getLibJars(conf); |
---|
263 | if(libjars!=null && libjars.length>0) { |
---|
264 | conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader())); |
---|
265 | Thread.currentThread().setContextClassLoader( |
---|
266 | new URLClassLoader(libjars, |
---|
267 | Thread.currentThread().getContextClassLoader())); |
---|
268 | } |
---|
269 | } |
---|
270 | if (line.hasOption("files")) { |
---|
271 | conf.set("tmpfiles", |
---|
272 | validateFiles(line.getOptionValue("files"), conf)); |
---|
273 | } |
---|
274 | if (line.hasOption("archives")) { |
---|
275 | conf.set("tmparchives", |
---|
276 | validateFiles(line.getOptionValue("archives"), conf)); |
---|
277 | } |
---|
278 | } catch (IOException ioe) { |
---|
279 | System.err.println(StringUtils.stringifyException(ioe)); |
---|
280 | } |
---|
281 | if (line.hasOption('D')) { |
---|
282 | String[] property = line.getOptionValues('D'); |
---|
283 | for(String prop : property) { |
---|
284 | String[] keyval = prop.split("=", 2); |
---|
285 | if (keyval.length == 2) { |
---|
286 | conf.set(keyval[0], keyval[1]); |
---|
287 | } |
---|
288 | } |
---|
289 | } |
---|
290 | conf.setBoolean("mapred.used.genericoptionsparser", true); |
---|
291 | } |
---|
292 | |
---|
293 | /** |
---|
294 | * If libjars are set in the conf, parse the libjars. |
---|
295 | * @param conf |
---|
296 | * @return libjar urls |
---|
297 | * @throws IOException |
---|
298 | */ |
---|
299 | public static URL[] getLibJars(Configuration conf) throws IOException { |
---|
300 | String jars = conf.get("tmpjars"); |
---|
301 | if(jars==null) { |
---|
302 | return null; |
---|
303 | } |
---|
304 | String[] files = jars.split(","); |
---|
305 | URL[] cp = new URL[files.length]; |
---|
306 | for (int i=0;i<cp.length;i++) { |
---|
307 | Path tmp = new Path(files[i]); |
---|
308 | cp[i] = FileSystem.getLocal(conf).pathToFile(tmp).toURI().toURL(); |
---|
309 | } |
---|
310 | return cp; |
---|
311 | } |
---|
312 | |
---|
313 | /** |
---|
314 | * takes input as a comma separated list of files |
---|
315 | * and verifies if they exist. It defaults for file:/// |
---|
316 | * if the files specified do not have a scheme. |
---|
317 | * it returns the paths uri converted defaulting to file:///. |
---|
318 | * So an input of /home/user/file1,/home/user/file2 would return |
---|
319 | * file:///home/user/file1,file:///home/user/file2 |
---|
320 | * @param files |
---|
321 | * @return |
---|
322 | */ |
---|
323 | private String validateFiles(String files, Configuration conf) throws IOException { |
---|
324 | if (files == null) |
---|
325 | return null; |
---|
326 | String[] fileArr = files.split(","); |
---|
327 | String[] finalArr = new String[fileArr.length]; |
---|
328 | for (int i =0; i < fileArr.length; i++) { |
---|
329 | String tmp = fileArr[i]; |
---|
330 | String finalPath; |
---|
331 | Path path = new Path(tmp); |
---|
332 | URI pathURI = path.toUri(); |
---|
333 | FileSystem localFs = FileSystem.getLocal(conf); |
---|
334 | if (pathURI.getScheme() == null) { |
---|
335 | //default to the local file system |
---|
336 | //check if the file exists or not first |
---|
337 | if (!localFs.exists(path)) { |
---|
338 | throw new FileNotFoundException("File " + tmp + " does not exist."); |
---|
339 | } |
---|
340 | finalPath = path.makeQualified(localFs).toString(); |
---|
341 | } |
---|
342 | else { |
---|
343 | // check if the file exists in this file system |
---|
344 | // we need to recreate this filesystem object to copy |
---|
345 | // these files to the file system jobtracker is running |
---|
346 | // on. |
---|
347 | FileSystem fs = path.getFileSystem(conf); |
---|
348 | if (!fs.exists(path)) { |
---|
349 | throw new FileNotFoundException("File " + tmp + " does not exist."); |
---|
350 | } |
---|
351 | finalPath = path.makeQualified(fs).toString(); |
---|
352 | try { |
---|
353 | fs.close(); |
---|
354 | } catch(IOException e){}; |
---|
355 | } |
---|
356 | finalArr[i] = finalPath; |
---|
357 | } |
---|
358 | return StringUtils.arrayToString(finalArr); |
---|
359 | } |
---|
360 | |
---|
361 | |
---|
362 | /** |
---|
363 | * Parse the user-specified options, get the generic options, and modify |
---|
364 | * configuration accordingly |
---|
365 | * @param conf Configuration to be modified |
---|
366 | * @param args User-specified arguments |
---|
367 | * @return Command-specific arguments |
---|
368 | */ |
---|
369 | private String[] parseGeneralOptions(Options opts, Configuration conf, |
---|
370 | String[] args) { |
---|
371 | opts = buildGeneralOptions(opts); |
---|
372 | CommandLineParser parser = new GnuParser(); |
---|
373 | try { |
---|
374 | commandLine = parser.parse(opts, args, true); |
---|
375 | processGeneralOptions(conf, commandLine); |
---|
376 | return commandLine.getArgs(); |
---|
377 | } catch(ParseException e) { |
---|
378 | LOG.warn("options parsing failed: "+e.getMessage()); |
---|
379 | |
---|
380 | HelpFormatter formatter = new HelpFormatter(); |
---|
381 | formatter.printHelp("general options are: ", opts); |
---|
382 | } |
---|
383 | return args; |
---|
384 | } |
---|
385 | |
---|
386 | /** |
---|
387 | * Print the usage message for generic command-line options supported. |
---|
388 | * |
---|
389 | * @param out stream to print the usage message to. |
---|
390 | */ |
---|
391 | public static void printGenericCommandUsage(PrintStream out) { |
---|
392 | |
---|
393 | out.println("Generic options supported are"); |
---|
394 | out.println("-conf <configuration file> specify an application configuration file"); |
---|
395 | out.println("-D <property=value> use value for given property"); |
---|
396 | out.println("-fs <local|namenode:port> specify a namenode"); |
---|
397 | out.println("-jt <local|jobtracker:port> specify a job tracker"); |
---|
398 | out.println("-files <comma separated list of files> " + |
---|
399 | "specify comma separated files to be copied to the map reduce cluster"); |
---|
400 | out.println("-libjars <comma separated list of jars> " + |
---|
401 | "specify comma separated jar files to include in the classpath."); |
---|
402 | out.println("-archives <comma separated list of archives> " + |
---|
403 | "specify comma separated archives to be unarchived" + |
---|
404 | " on the compute machines.\n"); |
---|
405 | out.println("The general command line syntax is"); |
---|
406 | out.println("bin/hadoop command [genericOptions] [commandOptions]\n"); |
---|
407 | } |
---|
408 | |
---|
409 | } |
---|