source: proiecte/HadoopJUnit/hadoop-0.20.1/src/test/org/apache/hadoop/fs/loadGenerator/StructureGenerator.java @ 120

Last change on this file since 120 was 120, checked in by (none), 14 years ago

Added the mail files for the Hadoop JUNit Project

  • Property svn:executable set to *
File size: 10.4 KB
Line 
1/**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements.  See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership.  The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License.  You may obtain a copy of the License at
9 *
10 *     http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19package org.apache.hadoop.fs.loadGenerator;
20
21import java.io.File;
22import java.io.FileNotFoundException;
23import java.io.PrintStream;
24import java.util.ArrayList;
25import java.util.List;
26import java.util.Random;
27
28import org.apache.hadoop.util.ToolRunner;
29
30/**
31 * This program generates a random namespace structure with the following
32 * constraints:
33 * 1. The number of subdirectories is a random number in [minWidth, maxWidth].
34 * 2. The maximum depth of each subdirectory is a random number
35 *    [2*maxDepth/3, maxDepth].
36 * 3. Files are randomly placed in the empty directories. The size of each
37 *    file follows Gaussian distribution.
38 * The generated namespace structure is described by two files in the output
39 * directory. Each line of the first file
40 * contains the full name of a leaf directory. 
41 * Each line of the second file contains
42 * the full name of a file and its size, separated by a blank.
43 *
44 * The synopsis of the command is
45 * java StructureGenerator
46    -maxDepth <maxDepth> : maximum depth of the directory tree; default is 5.
47    -minWidth <minWidth> : minimum number of subdirectories per directories; default is 1
48    -maxWidth <maxWidth> : maximum number of subdirectories per directories; default is 5
49    -numOfFiles <#OfFiles> : the total number of files; default is 10.
50    -avgFileSize <avgFileSizeInBlocks>: average size of blocks; default is 1.
51    -outDir <outDir>: output directory; default is the current directory.
52    -seed <seed>: random number generator seed; default is the current time.
53 */
54public class StructureGenerator {
55  private int maxDepth = 5;
56  private int minWidth = 1;
57  private int maxWidth = 5;
58  private int numOfFiles = 10;
59  private double avgFileSize = 1;
60  private File outDir = DEFAULT_STRUCTURE_DIRECTORY;
61  final static private String USAGE = "java StructureGenerator\n" +
62        "-maxDepth <maxDepth>\n" +
63    "-minWidth <minWidth>\n" +
64    "-maxWidth <maxWidth>\n" +
65    "-numOfFiles <#OfFiles>\n" +
66    "-avgFileSize <avgFileSizeInBlocks>\n" +
67    "-outDir <outDir>\n" +
68    "-seed <seed>";
69 
70  private Random r = null; 
71 
72  /** Default directory for storing file/directory structure */
73  final static File DEFAULT_STRUCTURE_DIRECTORY = new File(".");
74  /** The name of the file for storing directory structure */
75  final static String DIR_STRUCTURE_FILE_NAME = "dirStructure";
76  /** The name of the file for storing file structure */
77  final static String FILE_STRUCTURE_FILE_NAME = "fileStructure";
78  /** The name prefix for the files created by this program */
79  final static String FILE_NAME_PREFIX = "_file_";
80 
81  /**
82   * The main function first parses the command line arguments,
83   * then generates in-memory directory structure and outputs to a file,
84   * last generates in-memory files and outputs them to a file.
85   */
86  public int run(String[] args) throws Exception {
87    int exitCode = 0;
88    exitCode = init(args);
89    if (exitCode != 0) {
90      return exitCode;
91    }
92    genDirStructure();
93    output(new File(outDir, DIR_STRUCTURE_FILE_NAME));
94    genFileStructure();
95    outputFiles(new File(outDir, FILE_STRUCTURE_FILE_NAME));
96    return exitCode;
97  }
98
99  /** Parse the command line arguments and initialize the data */
100  private int init(String[] args) {
101    try {
102      for (int i = 0; i < args.length; i++) { // parse command line
103        if (args[i].equals("-maxDepth")) {
104          maxDepth = Integer.parseInt(args[++i]);
105          if (maxDepth<1) {
106            System.err.println("maxDepth must be positive: " + maxDepth);
107            return -1;
108          }
109        } else if (args[i].equals("-minWidth")) {
110          minWidth = Integer.parseInt(args[++i]);
111          if (minWidth<0) {
112            System.err.println("minWidth must be positive: " + minWidth);
113            return -1;
114          }
115        } else if (args[i].equals("-maxWidth")) {
116          maxWidth = Integer.parseInt(args[++i]);
117        } else if (args[i].equals("-numOfFiles")) {
118          numOfFiles = Integer.parseInt(args[++i]);
119          if (numOfFiles<1) {
120            System.err.println("NumOfFiles must be positive: " + numOfFiles);
121            return -1;
122          }
123        } else if (args[i].equals("-avgFileSize")) {
124          avgFileSize = Double.parseDouble(args[++i]);
125          if (avgFileSize<=0) {
126            System.err.println("AvgFileSize must be positive: " + avgFileSize);
127            return -1;
128          }
129        } else if (args[i].equals("-outDir")) {
130          outDir = new File(args[++i]);
131        } else if (args[i].equals("-seed")) {
132          r = new Random(Long.parseLong(args[++i]));
133        } else {
134          System.err.println(USAGE);
135          ToolRunner.printGenericCommandUsage(System.err);
136          return -1;
137        }
138      }
139    } catch (NumberFormatException e) {
140      System.err.println("Illegal parameter: " + e.getLocalizedMessage());
141      System.err.println(USAGE);
142      return -1;
143    }
144   
145    if (maxWidth < minWidth) {
146      System.err.println(
147          "maxWidth must be bigger than minWidth: " + maxWidth);
148      return -1;
149    }
150   
151    if (r==null) {
152      r = new Random();
153    }
154    return 0;
155  }
156 
157  /** In memory representation of a directory */
158  private static class INode {
159    private String name;
160    private List<INode> children = new ArrayList<INode>();
161   
162    /** Constructor */
163    private INode(String name) {
164      this.name = name;
165    }
166   
167    /** Add a child (subdir/file) */
168    private void addChild(INode child) {
169      children.add(child);
170    }
171   
172    /** Output the subtree rooted at the current node.
173     * Only the leaves are printed.
174     */
175    private void output(PrintStream out, String prefix) {
176      prefix = prefix==null?name:prefix+"/"+name;
177      if (children.isEmpty()) {
178        out.println(prefix);
179      } else {
180        for (INode child : children) {
181          child.output(out, prefix);
182        }
183      }
184    }
185   
186    /** Output the files in the subtree rooted at this node */
187    protected void outputFiles(PrintStream out, String prefix) {
188      prefix = prefix==null?name:prefix+"/"+name;
189      for (INode child : children) {
190        child.outputFiles(out, prefix);
191      }
192    }
193   
194    /** Add all the leaves in the subtree to the input list */
195    private void getLeaves(List<INode> leaves) {
196      if (children.isEmpty()) {
197        leaves.add(this);
198      } else {
199        for (INode child : children) {
200          child.getLeaves(leaves);
201        }
202      }
203    }
204  }
205 
206  /** In memory representation of a file */
207  private static class FileINode extends INode {
208    private double numOfBlocks;
209
210    /** constructor */
211    private FileINode(String name, double numOfBlocks) {
212      super(name);
213      this.numOfBlocks = numOfBlocks;
214    }
215   
216    /** Output a file attribute */
217    protected void outputFiles(PrintStream out, String prefix) {
218      prefix = (prefix == null)?super.name: prefix + "/"+super.name;
219      out.println(prefix + " " + numOfBlocks);
220    }
221  }
222
223  private INode root;
224 
225  /** Generates a directory tree with a max depth of <code>maxDepth</code> */
226  private void genDirStructure() {
227    root = genDirStructure("", maxDepth);
228  }
229 
230  /** Generate a directory tree rooted at <code>rootName</code>
231   * The number of subtree is in the range of [minWidth, maxWidth].
232   * The maximum depth of each subtree is in the range of
233   * [2*maxDepth/3, maxDepth].
234   */
235  private INode genDirStructure(String rootName, int maxDepth) {
236    INode root = new INode(rootName);
237   
238    if (maxDepth>0) {
239      maxDepth--;
240      int minDepth = maxDepth*2/3;
241      // Figure out the number of subdirectories to generate
242      int numOfSubDirs = minWidth + r.nextInt(maxWidth-minWidth+1);
243      // Expand the tree
244      for (int i=0; i<numOfSubDirs; i++) {
245        int childDepth = (maxDepth == 0)?0:
246          (r.nextInt(maxDepth-minDepth+1)+minDepth);
247        INode child = genDirStructure("dir"+i, childDepth);
248        root.addChild(child);
249      }
250    }
251    return root;
252  }
253 
254  /** Collects leaf nodes in the tree */
255  private List<INode> getLeaves() {
256    List<INode> leaveDirs = new ArrayList<INode>();
257    root.getLeaves(leaveDirs);
258    return leaveDirs;
259  }
260 
261  /** Decides where to place all the files and its length.
262   * It first collects all empty directories in the tree.
263   * For each file, it randomly chooses an empty directory to place the file.
264   * The file's length is generated using Gaussian distribution.
265   */
266  private void genFileStructure() {
267    List<INode> leaves = getLeaves();
268    int totalLeaves = leaves.size();
269    for (int i=0; i<numOfFiles; i++) {
270      int leaveNum = r.nextInt(totalLeaves);
271      double fileSize;
272      do {
273        fileSize = r.nextGaussian()+avgFileSize;
274      } while (fileSize<0);
275      leaves.get(leaveNum).addChild(
276          new FileINode(FILE_NAME_PREFIX+i, fileSize));
277    }
278  }
279 
280  /** Output directory structure to a file, each line of the file
281   * contains the directory name. Only empty directory names are printed. */
282  private void output(File outFile) throws FileNotFoundException {
283    System.out.println("Printing to " + outFile.toString());
284    PrintStream out = new PrintStream(outFile);
285    root.output(out, null);
286    out.close();
287  }
288 
289  /** Output all files' attributes to a file, each line of the output file
290   * contains a file name and its length. */
291  private void outputFiles(File outFile) throws FileNotFoundException {
292    System.out.println("Printing to " + outFile.toString());
293    PrintStream out = new PrintStream(outFile);
294    root.outputFiles(out, null);
295    out.close();
296  }
297 
298  /**
299   * Main program
300   * @param args Command line arguments
301   * @throws Exception
302   */
303  public static void main(String[] args) throws Exception {
304    StructureGenerator sg = new StructureGenerator();
305    System.exit(sg.run(args));
306  }
307}
Note: See TracBrowser for help on using the repository browser.