source: proiecte/HadoopJUnit/hadoop-0.20.1/src/benchmarks/gridmix2/src/java/org/apache/hadoop/mapred/GenericMRLoadJobCreator.java @ 120

Last change on this file since 120 was 120, checked in by (none), 14 years ago

Added the mail files for the Hadoop JUNit Project

  • Property svn:executable set to *
File size: 3.5 KB
Line 
1/**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements.  See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership.  The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License.  You may obtain a copy of the License at
9 *
10 *     http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19package org.apache.hadoop.mapred;
20
21import java.util.Random;
22import java.util.Stack;
23
24import org.apache.hadoop.fs.FileStatus;
25import org.apache.hadoop.fs.FileSystem;
26import org.apache.hadoop.fs.Path;
27import org.apache.hadoop.io.LongWritable;
28import org.apache.hadoop.io.SequenceFile;
29import org.apache.hadoop.io.Text;
30import org.apache.hadoop.mapred.GenericMRLoadGenerator;
31import org.apache.hadoop.mapred.lib.NullOutputFormat;
32import org.apache.hadoop.mapred.JobConf;
33
34public class GenericMRLoadJobCreator extends GenericMRLoadGenerator {
35
36  public static JobConf createJob(String[] argv, boolean mapoutputCompressed,
37      boolean outputCompressed) throws Exception {
38
39    JobConf job = new JobConf();
40    job.setJarByClass(GenericMRLoadGenerator.class);
41    job.setMapperClass(SampleMapper.class);
42    job.setReducerClass(SampleReducer.class);
43    if (!parseArgs(argv, job)) {
44      return null;
45    }
46
47    if (null == FileOutputFormat.getOutputPath(job)) {
48      // No output dir? No writes
49      job.setOutputFormat(NullOutputFormat.class);
50    }
51
52    if (0 == FileInputFormat.getInputPaths(job).length) {
53      // No input dir? Generate random data
54      System.err.println("No input path; ignoring InputFormat");
55      confRandom(job);
56    } else if (null != job.getClass("mapred.indirect.input.format", null)) {
57      // specified IndirectInputFormat? Build src list
58      JobClient jClient = new JobClient(job);
59      Path sysdir = jClient.getSystemDir();
60      Random r = new Random();
61      Path indirInputFile = new Path(sysdir, Integer.toString(r
62          .nextInt(Integer.MAX_VALUE), 36)
63          + "_files");
64      job.set("mapred.indirect.input.file", indirInputFile.toString());
65      SequenceFile.Writer writer = SequenceFile.createWriter(sysdir
66          .getFileSystem(job), job, indirInputFile, LongWritable.class,
67          Text.class, SequenceFile.CompressionType.NONE);
68      try {
69        for (Path p : FileInputFormat.getInputPaths(job)) {
70          FileSystem fs = p.getFileSystem(job);
71          Stack<Path> pathstack = new Stack<Path>();
72          pathstack.push(p);
73          while (!pathstack.empty()) {
74            for (FileStatus stat : fs.listStatus(pathstack.pop())) {
75              if (stat.isDir()) {
76                if (!stat.getPath().getName().startsWith("_")) {
77                  pathstack.push(stat.getPath());
78                }
79              } else {
80                writer.sync();
81                writer.append(new LongWritable(stat.getLen()), new Text(stat
82                    .getPath().toUri().toString()));
83              }
84            }
85          }
86        }
87      } finally {
88        writer.close();
89      }
90    }
91
92    job.setCompressMapOutput(mapoutputCompressed);
93    job.setBoolean("mapred.output.compress", outputCompressed);
94    return job;
95
96  }
97
98}
Note: See TracBrowser for help on using the repository browser.