[120] | 1 | /** |
---|
| 2 | * Licensed to the Apache Software Foundation (ASF) under one |
---|
| 3 | * or more contributor license agreements. See the NOTICE file |
---|
| 4 | * distributed with this work for additional information |
---|
| 5 | * regarding copyright ownership. The ASF licenses this file |
---|
| 6 | * to you under the Apache License, Version 2.0 (the |
---|
| 7 | * "License"); you may not use this file except in compliance |
---|
| 8 | * with the License. You may obtain a copy of the License at |
---|
| 9 | * |
---|
| 10 | * http://www.apache.org/licenses/LICENSE-2.0 |
---|
| 11 | * |
---|
| 12 | * Unless required by applicable law or agreed to in writing, software |
---|
| 13 | * distributed under the License is distributed on an "AS IS" BASIS, |
---|
| 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
---|
| 15 | * See the License for the specific language governing permissions and |
---|
| 16 | * limitations under the License. |
---|
| 17 | */ |
---|
| 18 | package org.apache.hadoop.util; |
---|
| 19 | |
---|
| 20 | import java.io.FileNotFoundException; |
---|
| 21 | import java.io.IOException; |
---|
| 22 | import java.io.PrintStream; |
---|
| 23 | import java.net.URI; |
---|
| 24 | import java.net.URL; |
---|
| 25 | import java.net.URLClassLoader; |
---|
| 26 | |
---|
| 27 | import org.apache.commons.cli.CommandLine; |
---|
| 28 | import org.apache.commons.cli.CommandLineParser; |
---|
| 29 | import org.apache.commons.cli.GnuParser; |
---|
| 30 | import org.apache.commons.cli.HelpFormatter; |
---|
| 31 | import org.apache.commons.cli.Option; |
---|
| 32 | import org.apache.commons.cli.OptionBuilder; |
---|
| 33 | import org.apache.commons.cli.Options; |
---|
| 34 | import org.apache.commons.cli.ParseException; |
---|
| 35 | import org.apache.commons.logging.Log; |
---|
| 36 | import org.apache.commons.logging.LogFactory; |
---|
| 37 | import org.apache.hadoop.conf.Configuration; |
---|
| 38 | import org.apache.hadoop.fs.FileSystem; |
---|
| 39 | import org.apache.hadoop.fs.Path; |
---|
| 40 | |
---|
| 41 | /** |
---|
| 42 | * <code>GenericOptionsParser</code> is a utility to parse command line |
---|
| 43 | * arguments generic to the Hadoop framework. |
---|
| 44 | * |
---|
| 45 | * <code>GenericOptionsParser</code> recognizes several standarad command |
---|
| 46 | * line arguments, enabling applications to easily specify a namenode, a |
---|
| 47 | * jobtracker, additional configuration resources etc. |
---|
| 48 | * |
---|
| 49 | * <h4 id="GenericOptions">Generic Options</h4> |
---|
| 50 | * |
---|
| 51 | * <p>The supported generic options are:</p> |
---|
| 52 | * <p><blockquote><pre> |
---|
| 53 | * -conf <configuration file> specify a configuration file |
---|
| 54 | * -D <property=value> use value for given property |
---|
| 55 | * -fs <local|namenode:port> specify a namenode |
---|
| 56 | * -jt <local|jobtracker:port> specify a job tracker |
---|
| 57 | * -files <comma separated list of files> specify comma separated |
---|
| 58 | * files to be copied to the map reduce cluster |
---|
| 59 | * -libjars <comma separated list of jars> specify comma separated |
---|
| 60 | * jar files to include in the classpath. |
---|
| 61 | * -archives <comma separated list of archives> specify comma |
---|
| 62 | * separated archives to be unarchived on the compute machines. |
---|
| 63 | |
---|
| 64 | * </pre></blockquote></p> |
---|
| 65 | * |
---|
| 66 | * <p>The general command line syntax is:</p> |
---|
| 67 | * <p><tt><pre> |
---|
| 68 | * bin/hadoop command [genericOptions] [commandOptions] |
---|
| 69 | * </pre></tt></p> |
---|
| 70 | * |
---|
| 71 | * <p>Generic command line arguments <strong>might</strong> modify |
---|
| 72 | * <code>Configuration </code> objects, given to constructors.</p> |
---|
| 73 | * |
---|
| 74 | * <p>The functionality is implemented using Commons CLI.</p> |
---|
| 75 | * |
---|
| 76 | * <p>Examples:</p> |
---|
| 77 | * <p><blockquote><pre> |
---|
| 78 | * $ bin/hadoop dfs -fs darwin:8020 -ls /data |
---|
| 79 | * list /data directory in dfs with namenode darwin:8020 |
---|
| 80 | * |
---|
| 81 | * $ bin/hadoop dfs -D fs.default.name=darwin:8020 -ls /data |
---|
| 82 | * list /data directory in dfs with namenode darwin:8020 |
---|
| 83 | * |
---|
| 84 | * $ bin/hadoop dfs -conf hadoop-site.xml -ls /data |
---|
| 85 | * list /data directory in dfs with conf specified in hadoop-site.xml |
---|
| 86 | * |
---|
| 87 | * $ bin/hadoop job -D mapred.job.tracker=darwin:50020 -submit job.xml |
---|
| 88 | * submit a job to job tracker darwin:50020 |
---|
| 89 | * |
---|
| 90 | * $ bin/hadoop job -jt darwin:50020 -submit job.xml |
---|
| 91 | * submit a job to job tracker darwin:50020 |
---|
| 92 | * |
---|
| 93 | * $ bin/hadoop job -jt local -submit job.xml |
---|
| 94 | * submit a job to local runner |
---|
| 95 | * |
---|
| 96 | * $ bin/hadoop jar -libjars testlib.jar |
---|
| 97 | * -archives test.tgz -files file.txt inputjar args |
---|
| 98 | * job submission with libjars, files and archives |
---|
| 99 | * </pre></blockquote></p> |
---|
| 100 | * |
---|
| 101 | * @see Tool |
---|
| 102 | * @see ToolRunner |
---|
| 103 | */ |
---|
| 104 | public class GenericOptionsParser { |
---|
| 105 | |
---|
| 106 | private static final Log LOG = LogFactory.getLog(GenericOptionsParser.class); |
---|
| 107 | private Configuration conf; |
---|
| 108 | private CommandLine commandLine; |
---|
| 109 | |
---|
| 110 | /** |
---|
| 111 | * Create an options parser with the given options to parse the args. |
---|
| 112 | * @param opts the options |
---|
| 113 | * @param args the command line arguments |
---|
| 114 | */ |
---|
| 115 | public GenericOptionsParser(Options opts, String[] args) { |
---|
| 116 | this(new Configuration(), new Options(), args); |
---|
| 117 | } |
---|
| 118 | |
---|
| 119 | /** |
---|
| 120 | * Create an options parser to parse the args. |
---|
| 121 | * @param args the command line arguments |
---|
| 122 | */ |
---|
| 123 | public GenericOptionsParser(String[] args) { |
---|
| 124 | this(new Configuration(), new Options(), args); |
---|
| 125 | } |
---|
| 126 | |
---|
| 127 | /** |
---|
| 128 | * Create a <code>GenericOptionsParser<code> to parse only the generic Hadoop |
---|
| 129 | * arguments. |
---|
| 130 | * |
---|
| 131 | * The array of string arguments other than the generic arguments can be |
---|
| 132 | * obtained by {@link #getRemainingArgs()}. |
---|
| 133 | * |
---|
| 134 | * @param conf the <code>Configuration</code> to modify. |
---|
| 135 | * @param args command-line arguments. |
---|
| 136 | */ |
---|
| 137 | public GenericOptionsParser(Configuration conf, String[] args) { |
---|
| 138 | this(conf, new Options(), args); |
---|
| 139 | } |
---|
| 140 | |
---|
| 141 | /** |
---|
| 142 | * Create a <code>GenericOptionsParser</code> to parse given options as well |
---|
| 143 | * as generic Hadoop options. |
---|
| 144 | * |
---|
| 145 | * The resulting <code>CommandLine</code> object can be obtained by |
---|
| 146 | * {@link #getCommandLine()}. |
---|
| 147 | * |
---|
| 148 | * @param conf the configuration to modify |
---|
| 149 | * @param options options built by the caller |
---|
| 150 | * @param args User-specified arguments |
---|
| 151 | */ |
---|
| 152 | public GenericOptionsParser(Configuration conf, Options options, String[] args) { |
---|
| 153 | parseGeneralOptions(options, conf, args); |
---|
| 154 | this.conf = conf; |
---|
| 155 | } |
---|
| 156 | |
---|
| 157 | /** |
---|
| 158 | * Returns an array of Strings containing only application-specific arguments. |
---|
| 159 | * |
---|
| 160 | * @return array of <code>String</code>s containing the un-parsed arguments |
---|
| 161 | * or <strong>empty array</strong> if commandLine was not defined. |
---|
| 162 | */ |
---|
| 163 | public String[] getRemainingArgs() { |
---|
| 164 | return (commandLine == null) ? new String[]{} : commandLine.getArgs(); |
---|
| 165 | } |
---|
| 166 | |
---|
| 167 | /** |
---|
| 168 | * Get the modified configuration |
---|
| 169 | * @return the configuration that has the modified parameters. |
---|
| 170 | */ |
---|
| 171 | public Configuration getConfiguration() { |
---|
| 172 | return conf; |
---|
| 173 | } |
---|
| 174 | |
---|
| 175 | /** |
---|
| 176 | * Returns the commons-cli <code>CommandLine</code> object |
---|
| 177 | * to process the parsed arguments. |
---|
| 178 | * |
---|
| 179 | * Note: If the object is created with |
---|
| 180 | * {@link #GenericOptionsParser(Configuration, String[])}, then returned |
---|
| 181 | * object will only contain parsed generic options. |
---|
| 182 | * |
---|
| 183 | * @return <code>CommandLine</code> representing list of arguments |
---|
| 184 | * parsed against Options descriptor. |
---|
| 185 | */ |
---|
| 186 | public CommandLine getCommandLine() { |
---|
| 187 | return commandLine; |
---|
| 188 | } |
---|
| 189 | |
---|
| 190 | /** |
---|
| 191 | * Specify properties of each generic option |
---|
| 192 | */ |
---|
| 193 | @SuppressWarnings("static-access") |
---|
| 194 | private static Options buildGeneralOptions(Options opts) { |
---|
| 195 | Option fs = OptionBuilder.withArgName("local|namenode:port") |
---|
| 196 | .hasArg() |
---|
| 197 | .withDescription("specify a namenode") |
---|
| 198 | .create("fs"); |
---|
| 199 | Option jt = OptionBuilder.withArgName("local|jobtracker:port") |
---|
| 200 | .hasArg() |
---|
| 201 | .withDescription("specify a job tracker") |
---|
| 202 | .create("jt"); |
---|
| 203 | Option oconf = OptionBuilder.withArgName("configuration file") |
---|
| 204 | .hasArg() |
---|
| 205 | .withDescription("specify an application configuration file") |
---|
| 206 | .create("conf"); |
---|
| 207 | Option property = OptionBuilder.withArgName("property=value") |
---|
| 208 | .hasArg() |
---|
| 209 | .withDescription("use value for given property") |
---|
| 210 | .create('D'); |
---|
| 211 | Option libjars = OptionBuilder.withArgName("paths") |
---|
| 212 | .hasArg() |
---|
| 213 | .withDescription("comma separated jar files to include in the classpath.") |
---|
| 214 | .create("libjars"); |
---|
| 215 | Option files = OptionBuilder.withArgName("paths") |
---|
| 216 | .hasArg() |
---|
| 217 | .withDescription("comma separated files to be copied to the " + |
---|
| 218 | "map reduce cluster") |
---|
| 219 | .create("files"); |
---|
| 220 | Option archives = OptionBuilder.withArgName("paths") |
---|
| 221 | .hasArg() |
---|
| 222 | .withDescription("comma separated archives to be unarchived" + |
---|
| 223 | " on the compute machines.") |
---|
| 224 | .create("archives"); |
---|
| 225 | |
---|
| 226 | opts.addOption(fs); |
---|
| 227 | opts.addOption(jt); |
---|
| 228 | opts.addOption(oconf); |
---|
| 229 | opts.addOption(property); |
---|
| 230 | opts.addOption(libjars); |
---|
| 231 | opts.addOption(files); |
---|
| 232 | opts.addOption(archives); |
---|
| 233 | |
---|
| 234 | return opts; |
---|
| 235 | } |
---|
| 236 | |
---|
| 237 | /** |
---|
| 238 | * Modify configuration according user-specified generic options |
---|
| 239 | * @param conf Configuration to be modified |
---|
| 240 | * @param line User-specified generic options |
---|
| 241 | */ |
---|
| 242 | private void processGeneralOptions(Configuration conf, |
---|
| 243 | CommandLine line) { |
---|
| 244 | if (line.hasOption("fs")) { |
---|
| 245 | FileSystem.setDefaultUri(conf, line.getOptionValue("fs")); |
---|
| 246 | } |
---|
| 247 | |
---|
| 248 | if (line.hasOption("jt")) { |
---|
| 249 | conf.set("mapred.job.tracker", line.getOptionValue("jt")); |
---|
| 250 | } |
---|
| 251 | if (line.hasOption("conf")) { |
---|
| 252 | String[] values = line.getOptionValues("conf"); |
---|
| 253 | for(String value : values) { |
---|
| 254 | conf.addResource(new Path(value)); |
---|
| 255 | } |
---|
| 256 | } |
---|
| 257 | try { |
---|
| 258 | if (line.hasOption("libjars")) { |
---|
| 259 | conf.set("tmpjars", |
---|
| 260 | validateFiles(line.getOptionValue("libjars"), conf)); |
---|
| 261 | //setting libjars in client classpath |
---|
| 262 | URL[] libjars = getLibJars(conf); |
---|
| 263 | if(libjars!=null && libjars.length>0) { |
---|
| 264 | conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader())); |
---|
| 265 | Thread.currentThread().setContextClassLoader( |
---|
| 266 | new URLClassLoader(libjars, |
---|
| 267 | Thread.currentThread().getContextClassLoader())); |
---|
| 268 | } |
---|
| 269 | } |
---|
| 270 | if (line.hasOption("files")) { |
---|
| 271 | conf.set("tmpfiles", |
---|
| 272 | validateFiles(line.getOptionValue("files"), conf)); |
---|
| 273 | } |
---|
| 274 | if (line.hasOption("archives")) { |
---|
| 275 | conf.set("tmparchives", |
---|
| 276 | validateFiles(line.getOptionValue("archives"), conf)); |
---|
| 277 | } |
---|
| 278 | } catch (IOException ioe) { |
---|
| 279 | System.err.println(StringUtils.stringifyException(ioe)); |
---|
| 280 | } |
---|
| 281 | if (line.hasOption('D')) { |
---|
| 282 | String[] property = line.getOptionValues('D'); |
---|
| 283 | for(String prop : property) { |
---|
| 284 | String[] keyval = prop.split("=", 2); |
---|
| 285 | if (keyval.length == 2) { |
---|
| 286 | conf.set(keyval[0], keyval[1]); |
---|
| 287 | } |
---|
| 288 | } |
---|
| 289 | } |
---|
| 290 | conf.setBoolean("mapred.used.genericoptionsparser", true); |
---|
| 291 | } |
---|
| 292 | |
---|
| 293 | /** |
---|
| 294 | * If libjars are set in the conf, parse the libjars. |
---|
| 295 | * @param conf |
---|
| 296 | * @return libjar urls |
---|
| 297 | * @throws IOException |
---|
| 298 | */ |
---|
| 299 | public static URL[] getLibJars(Configuration conf) throws IOException { |
---|
| 300 | String jars = conf.get("tmpjars"); |
---|
| 301 | if(jars==null) { |
---|
| 302 | return null; |
---|
| 303 | } |
---|
| 304 | String[] files = jars.split(","); |
---|
| 305 | URL[] cp = new URL[files.length]; |
---|
| 306 | for (int i=0;i<cp.length;i++) { |
---|
| 307 | Path tmp = new Path(files[i]); |
---|
| 308 | cp[i] = FileSystem.getLocal(conf).pathToFile(tmp).toURI().toURL(); |
---|
| 309 | } |
---|
| 310 | return cp; |
---|
| 311 | } |
---|
| 312 | |
---|
| 313 | /** |
---|
| 314 | * takes input as a comma separated list of files |
---|
| 315 | * and verifies if they exist. It defaults for file:/// |
---|
| 316 | * if the files specified do not have a scheme. |
---|
| 317 | * it returns the paths uri converted defaulting to file:///. |
---|
| 318 | * So an input of /home/user/file1,/home/user/file2 would return |
---|
| 319 | * file:///home/user/file1,file:///home/user/file2 |
---|
| 320 | * @param files |
---|
| 321 | * @return |
---|
| 322 | */ |
---|
| 323 | private String validateFiles(String files, Configuration conf) throws IOException { |
---|
| 324 | if (files == null) |
---|
| 325 | return null; |
---|
| 326 | String[] fileArr = files.split(","); |
---|
| 327 | String[] finalArr = new String[fileArr.length]; |
---|
| 328 | for (int i =0; i < fileArr.length; i++) { |
---|
| 329 | String tmp = fileArr[i]; |
---|
| 330 | String finalPath; |
---|
| 331 | Path path = new Path(tmp); |
---|
| 332 | URI pathURI = path.toUri(); |
---|
| 333 | FileSystem localFs = FileSystem.getLocal(conf); |
---|
| 334 | if (pathURI.getScheme() == null) { |
---|
| 335 | //default to the local file system |
---|
| 336 | //check if the file exists or not first |
---|
| 337 | if (!localFs.exists(path)) { |
---|
| 338 | throw new FileNotFoundException("File " + tmp + " does not exist."); |
---|
| 339 | } |
---|
| 340 | finalPath = path.makeQualified(localFs).toString(); |
---|
| 341 | } |
---|
| 342 | else { |
---|
| 343 | // check if the file exists in this file system |
---|
| 344 | // we need to recreate this filesystem object to copy |
---|
| 345 | // these files to the file system jobtracker is running |
---|
| 346 | // on. |
---|
| 347 | FileSystem fs = path.getFileSystem(conf); |
---|
| 348 | if (!fs.exists(path)) { |
---|
| 349 | throw new FileNotFoundException("File " + tmp + " does not exist."); |
---|
| 350 | } |
---|
| 351 | finalPath = path.makeQualified(fs).toString(); |
---|
| 352 | try { |
---|
| 353 | fs.close(); |
---|
| 354 | } catch(IOException e){}; |
---|
| 355 | } |
---|
| 356 | finalArr[i] = finalPath; |
---|
| 357 | } |
---|
| 358 | return StringUtils.arrayToString(finalArr); |
---|
| 359 | } |
---|
| 360 | |
---|
| 361 | |
---|
| 362 | /** |
---|
| 363 | * Parse the user-specified options, get the generic options, and modify |
---|
| 364 | * configuration accordingly |
---|
| 365 | * @param conf Configuration to be modified |
---|
| 366 | * @param args User-specified arguments |
---|
| 367 | * @return Command-specific arguments |
---|
| 368 | */ |
---|
| 369 | private String[] parseGeneralOptions(Options opts, Configuration conf, |
---|
| 370 | String[] args) { |
---|
| 371 | opts = buildGeneralOptions(opts); |
---|
| 372 | CommandLineParser parser = new GnuParser(); |
---|
| 373 | try { |
---|
| 374 | commandLine = parser.parse(opts, args, true); |
---|
| 375 | processGeneralOptions(conf, commandLine); |
---|
| 376 | return commandLine.getArgs(); |
---|
| 377 | } catch(ParseException e) { |
---|
| 378 | LOG.warn("options parsing failed: "+e.getMessage()); |
---|
| 379 | |
---|
| 380 | HelpFormatter formatter = new HelpFormatter(); |
---|
| 381 | formatter.printHelp("general options are: ", opts); |
---|
| 382 | } |
---|
| 383 | return args; |
---|
| 384 | } |
---|
| 385 | |
---|
| 386 | /** |
---|
| 387 | * Print the usage message for generic command-line options supported. |
---|
| 388 | * |
---|
| 389 | * @param out stream to print the usage message to. |
---|
| 390 | */ |
---|
| 391 | public static void printGenericCommandUsage(PrintStream out) { |
---|
| 392 | |
---|
| 393 | out.println("Generic options supported are"); |
---|
| 394 | out.println("-conf <configuration file> specify an application configuration file"); |
---|
| 395 | out.println("-D <property=value> use value for given property"); |
---|
| 396 | out.println("-fs <local|namenode:port> specify a namenode"); |
---|
| 397 | out.println("-jt <local|jobtracker:port> specify a job tracker"); |
---|
| 398 | out.println("-files <comma separated list of files> " + |
---|
| 399 | "specify comma separated files to be copied to the map reduce cluster"); |
---|
| 400 | out.println("-libjars <comma separated list of jars> " + |
---|
| 401 | "specify comma separated jar files to include in the classpath."); |
---|
| 402 | out.println("-archives <comma separated list of archives> " + |
---|
| 403 | "specify comma separated archives to be unarchived" + |
---|
| 404 | " on the compute machines.\n"); |
---|
| 405 | out.println("The general command line syntax is"); |
---|
| 406 | out.println("bin/hadoop command [genericOptions] [commandOptions]\n"); |
---|
| 407 | } |
---|
| 408 | |
---|
| 409 | } |
---|