[120] | 1 | /** |
---|
| 2 | * Licensed to the Apache Software Foundation (ASF) under one |
---|
| 3 | * or more contributor license agreements. See the NOTICE file |
---|
| 4 | * distributed with this work for additional information |
---|
| 5 | * regarding copyright ownership. The ASF licenses this file |
---|
| 6 | * to you under the Apache License, Version 2.0 (the |
---|
| 7 | * "License"); you may not use this file except in compliance |
---|
| 8 | * with the License. You may obtain a copy of the License at |
---|
| 9 | * |
---|
| 10 | * http://www.apache.org/licenses/LICENSE-2.0 |
---|
| 11 | * |
---|
| 12 | * Unless required by applicable law or agreed to in writing, software |
---|
| 13 | * distributed under the License is distributed on an "AS IS" BASIS, |
---|
| 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
---|
| 15 | * See the License for the specific language governing permissions and |
---|
| 16 | * limitations under the License. |
---|
| 17 | */ |
---|
| 18 | |
---|
| 19 | package org.apache.hadoop.fs.loadGenerator; |
---|
| 20 | |
---|
| 21 | import java.io.File; |
---|
| 22 | import java.io.FileNotFoundException; |
---|
| 23 | import java.io.PrintStream; |
---|
| 24 | import java.util.ArrayList; |
---|
| 25 | import java.util.List; |
---|
| 26 | import java.util.Random; |
---|
| 27 | |
---|
| 28 | import org.apache.hadoop.util.ToolRunner; |
---|
| 29 | |
---|
| 30 | /** |
---|
| 31 | * This program generates a random namespace structure with the following |
---|
| 32 | * constraints: |
---|
| 33 | * 1. The number of subdirectories is a random number in [minWidth, maxWidth]. |
---|
| 34 | * 2. The maximum depth of each subdirectory is a random number |
---|
| 35 | * [2*maxDepth/3, maxDepth]. |
---|
| 36 | * 3. Files are randomly placed in the empty directories. The size of each |
---|
| 37 | * file follows Gaussian distribution. |
---|
| 38 | * The generated namespace structure is described by two files in the output |
---|
| 39 | * directory. Each line of the first file |
---|
| 40 | * contains the full name of a leaf directory. |
---|
| 41 | * Each line of the second file contains |
---|
| 42 | * the full name of a file and its size, separated by a blank. |
---|
| 43 | * |
---|
| 44 | * The synopsis of the command is |
---|
| 45 | * java StructureGenerator |
---|
| 46 | -maxDepth <maxDepth> : maximum depth of the directory tree; default is 5. |
---|
| 47 | -minWidth <minWidth> : minimum number of subdirectories per directories; default is 1 |
---|
| 48 | -maxWidth <maxWidth> : maximum number of subdirectories per directories; default is 5 |
---|
| 49 | -numOfFiles <#OfFiles> : the total number of files; default is 10. |
---|
| 50 | -avgFileSize <avgFileSizeInBlocks>: average size of blocks; default is 1. |
---|
| 51 | -outDir <outDir>: output directory; default is the current directory. |
---|
| 52 | -seed <seed>: random number generator seed; default is the current time. |
---|
| 53 | */ |
---|
| 54 | public class StructureGenerator { |
---|
| 55 | private int maxDepth = 5; |
---|
| 56 | private int minWidth = 1; |
---|
| 57 | private int maxWidth = 5; |
---|
| 58 | private int numOfFiles = 10; |
---|
| 59 | private double avgFileSize = 1; |
---|
| 60 | private File outDir = DEFAULT_STRUCTURE_DIRECTORY; |
---|
| 61 | final static private String USAGE = "java StructureGenerator\n" + |
---|
| 62 | "-maxDepth <maxDepth>\n" + |
---|
| 63 | "-minWidth <minWidth>\n" + |
---|
| 64 | "-maxWidth <maxWidth>\n" + |
---|
| 65 | "-numOfFiles <#OfFiles>\n" + |
---|
| 66 | "-avgFileSize <avgFileSizeInBlocks>\n" + |
---|
| 67 | "-outDir <outDir>\n" + |
---|
| 68 | "-seed <seed>"; |
---|
| 69 | |
---|
| 70 | private Random r = null; |
---|
| 71 | |
---|
| 72 | /** Default directory for storing file/directory structure */ |
---|
| 73 | final static File DEFAULT_STRUCTURE_DIRECTORY = new File("."); |
---|
| 74 | /** The name of the file for storing directory structure */ |
---|
| 75 | final static String DIR_STRUCTURE_FILE_NAME = "dirStructure"; |
---|
| 76 | /** The name of the file for storing file structure */ |
---|
| 77 | final static String FILE_STRUCTURE_FILE_NAME = "fileStructure"; |
---|
| 78 | /** The name prefix for the files created by this program */ |
---|
| 79 | final static String FILE_NAME_PREFIX = "_file_"; |
---|
| 80 | |
---|
| 81 | /** |
---|
| 82 | * The main function first parses the command line arguments, |
---|
| 83 | * then generates in-memory directory structure and outputs to a file, |
---|
| 84 | * last generates in-memory files and outputs them to a file. |
---|
| 85 | */ |
---|
| 86 | public int run(String[] args) throws Exception { |
---|
| 87 | int exitCode = 0; |
---|
| 88 | exitCode = init(args); |
---|
| 89 | if (exitCode != 0) { |
---|
| 90 | return exitCode; |
---|
| 91 | } |
---|
| 92 | genDirStructure(); |
---|
| 93 | output(new File(outDir, DIR_STRUCTURE_FILE_NAME)); |
---|
| 94 | genFileStructure(); |
---|
| 95 | outputFiles(new File(outDir, FILE_STRUCTURE_FILE_NAME)); |
---|
| 96 | return exitCode; |
---|
| 97 | } |
---|
| 98 | |
---|
| 99 | /** Parse the command line arguments and initialize the data */ |
---|
| 100 | private int init(String[] args) { |
---|
| 101 | try { |
---|
| 102 | for (int i = 0; i < args.length; i++) { // parse command line |
---|
| 103 | if (args[i].equals("-maxDepth")) { |
---|
| 104 | maxDepth = Integer.parseInt(args[++i]); |
---|
| 105 | if (maxDepth<1) { |
---|
| 106 | System.err.println("maxDepth must be positive: " + maxDepth); |
---|
| 107 | return -1; |
---|
| 108 | } |
---|
| 109 | } else if (args[i].equals("-minWidth")) { |
---|
| 110 | minWidth = Integer.parseInt(args[++i]); |
---|
| 111 | if (minWidth<0) { |
---|
| 112 | System.err.println("minWidth must be positive: " + minWidth); |
---|
| 113 | return -1; |
---|
| 114 | } |
---|
| 115 | } else if (args[i].equals("-maxWidth")) { |
---|
| 116 | maxWidth = Integer.parseInt(args[++i]); |
---|
| 117 | } else if (args[i].equals("-numOfFiles")) { |
---|
| 118 | numOfFiles = Integer.parseInt(args[++i]); |
---|
| 119 | if (numOfFiles<1) { |
---|
| 120 | System.err.println("NumOfFiles must be positive: " + numOfFiles); |
---|
| 121 | return -1; |
---|
| 122 | } |
---|
| 123 | } else if (args[i].equals("-avgFileSize")) { |
---|
| 124 | avgFileSize = Double.parseDouble(args[++i]); |
---|
| 125 | if (avgFileSize<=0) { |
---|
| 126 | System.err.println("AvgFileSize must be positive: " + avgFileSize); |
---|
| 127 | return -1; |
---|
| 128 | } |
---|
| 129 | } else if (args[i].equals("-outDir")) { |
---|
| 130 | outDir = new File(args[++i]); |
---|
| 131 | } else if (args[i].equals("-seed")) { |
---|
| 132 | r = new Random(Long.parseLong(args[++i])); |
---|
| 133 | } else { |
---|
| 134 | System.err.println(USAGE); |
---|
| 135 | ToolRunner.printGenericCommandUsage(System.err); |
---|
| 136 | return -1; |
---|
| 137 | } |
---|
| 138 | } |
---|
| 139 | } catch (NumberFormatException e) { |
---|
| 140 | System.err.println("Illegal parameter: " + e.getLocalizedMessage()); |
---|
| 141 | System.err.println(USAGE); |
---|
| 142 | return -1; |
---|
| 143 | } |
---|
| 144 | |
---|
| 145 | if (maxWidth < minWidth) { |
---|
| 146 | System.err.println( |
---|
| 147 | "maxWidth must be bigger than minWidth: " + maxWidth); |
---|
| 148 | return -1; |
---|
| 149 | } |
---|
| 150 | |
---|
| 151 | if (r==null) { |
---|
| 152 | r = new Random(); |
---|
| 153 | } |
---|
| 154 | return 0; |
---|
| 155 | } |
---|
| 156 | |
---|
| 157 | /** In memory representation of a directory */ |
---|
| 158 | private static class INode { |
---|
| 159 | private String name; |
---|
| 160 | private List<INode> children = new ArrayList<INode>(); |
---|
| 161 | |
---|
| 162 | /** Constructor */ |
---|
| 163 | private INode(String name) { |
---|
| 164 | this.name = name; |
---|
| 165 | } |
---|
| 166 | |
---|
| 167 | /** Add a child (subdir/file) */ |
---|
| 168 | private void addChild(INode child) { |
---|
| 169 | children.add(child); |
---|
| 170 | } |
---|
| 171 | |
---|
| 172 | /** Output the subtree rooted at the current node. |
---|
| 173 | * Only the leaves are printed. |
---|
| 174 | */ |
---|
| 175 | private void output(PrintStream out, String prefix) { |
---|
| 176 | prefix = prefix==null?name:prefix+"/"+name; |
---|
| 177 | if (children.isEmpty()) { |
---|
| 178 | out.println(prefix); |
---|
| 179 | } else { |
---|
| 180 | for (INode child : children) { |
---|
| 181 | child.output(out, prefix); |
---|
| 182 | } |
---|
| 183 | } |
---|
| 184 | } |
---|
| 185 | |
---|
| 186 | /** Output the files in the subtree rooted at this node */ |
---|
| 187 | protected void outputFiles(PrintStream out, String prefix) { |
---|
| 188 | prefix = prefix==null?name:prefix+"/"+name; |
---|
| 189 | for (INode child : children) { |
---|
| 190 | child.outputFiles(out, prefix); |
---|
| 191 | } |
---|
| 192 | } |
---|
| 193 | |
---|
| 194 | /** Add all the leaves in the subtree to the input list */ |
---|
| 195 | private void getLeaves(List<INode> leaves) { |
---|
| 196 | if (children.isEmpty()) { |
---|
| 197 | leaves.add(this); |
---|
| 198 | } else { |
---|
| 199 | for (INode child : children) { |
---|
| 200 | child.getLeaves(leaves); |
---|
| 201 | } |
---|
| 202 | } |
---|
| 203 | } |
---|
| 204 | } |
---|
| 205 | |
---|
| 206 | /** In memory representation of a file */ |
---|
| 207 | private static class FileINode extends INode { |
---|
| 208 | private double numOfBlocks; |
---|
| 209 | |
---|
| 210 | /** constructor */ |
---|
| 211 | private FileINode(String name, double numOfBlocks) { |
---|
| 212 | super(name); |
---|
| 213 | this.numOfBlocks = numOfBlocks; |
---|
| 214 | } |
---|
| 215 | |
---|
| 216 | /** Output a file attribute */ |
---|
| 217 | protected void outputFiles(PrintStream out, String prefix) { |
---|
| 218 | prefix = (prefix == null)?super.name: prefix + "/"+super.name; |
---|
| 219 | out.println(prefix + " " + numOfBlocks); |
---|
| 220 | } |
---|
| 221 | } |
---|
| 222 | |
---|
| 223 | private INode root; |
---|
| 224 | |
---|
| 225 | /** Generates a directory tree with a max depth of <code>maxDepth</code> */ |
---|
| 226 | private void genDirStructure() { |
---|
| 227 | root = genDirStructure("", maxDepth); |
---|
| 228 | } |
---|
| 229 | |
---|
| 230 | /** Generate a directory tree rooted at <code>rootName</code> |
---|
| 231 | * The number of subtree is in the range of [minWidth, maxWidth]. |
---|
| 232 | * The maximum depth of each subtree is in the range of |
---|
| 233 | * [2*maxDepth/3, maxDepth]. |
---|
| 234 | */ |
---|
| 235 | private INode genDirStructure(String rootName, int maxDepth) { |
---|
| 236 | INode root = new INode(rootName); |
---|
| 237 | |
---|
| 238 | if (maxDepth>0) { |
---|
| 239 | maxDepth--; |
---|
| 240 | int minDepth = maxDepth*2/3; |
---|
| 241 | // Figure out the number of subdirectories to generate |
---|
| 242 | int numOfSubDirs = minWidth + r.nextInt(maxWidth-minWidth+1); |
---|
| 243 | // Expand the tree |
---|
| 244 | for (int i=0; i<numOfSubDirs; i++) { |
---|
| 245 | int childDepth = (maxDepth == 0)?0: |
---|
| 246 | (r.nextInt(maxDepth-minDepth+1)+minDepth); |
---|
| 247 | INode child = genDirStructure("dir"+i, childDepth); |
---|
| 248 | root.addChild(child); |
---|
| 249 | } |
---|
| 250 | } |
---|
| 251 | return root; |
---|
| 252 | } |
---|
| 253 | |
---|
| 254 | /** Collects leaf nodes in the tree */ |
---|
| 255 | private List<INode> getLeaves() { |
---|
| 256 | List<INode> leaveDirs = new ArrayList<INode>(); |
---|
| 257 | root.getLeaves(leaveDirs); |
---|
| 258 | return leaveDirs; |
---|
| 259 | } |
---|
| 260 | |
---|
| 261 | /** Decides where to place all the files and its length. |
---|
| 262 | * It first collects all empty directories in the tree. |
---|
| 263 | * For each file, it randomly chooses an empty directory to place the file. |
---|
| 264 | * The file's length is generated using Gaussian distribution. |
---|
| 265 | */ |
---|
| 266 | private void genFileStructure() { |
---|
| 267 | List<INode> leaves = getLeaves(); |
---|
| 268 | int totalLeaves = leaves.size(); |
---|
| 269 | for (int i=0; i<numOfFiles; i++) { |
---|
| 270 | int leaveNum = r.nextInt(totalLeaves); |
---|
| 271 | double fileSize; |
---|
| 272 | do { |
---|
| 273 | fileSize = r.nextGaussian()+avgFileSize; |
---|
| 274 | } while (fileSize<0); |
---|
| 275 | leaves.get(leaveNum).addChild( |
---|
| 276 | new FileINode(FILE_NAME_PREFIX+i, fileSize)); |
---|
| 277 | } |
---|
| 278 | } |
---|
| 279 | |
---|
| 280 | /** Output directory structure to a file, each line of the file |
---|
| 281 | * contains the directory name. Only empty directory names are printed. */ |
---|
| 282 | private void output(File outFile) throws FileNotFoundException { |
---|
| 283 | System.out.println("Printing to " + outFile.toString()); |
---|
| 284 | PrintStream out = new PrintStream(outFile); |
---|
| 285 | root.output(out, null); |
---|
| 286 | out.close(); |
---|
| 287 | } |
---|
| 288 | |
---|
| 289 | /** Output all files' attributes to a file, each line of the output file |
---|
| 290 | * contains a file name and its length. */ |
---|
| 291 | private void outputFiles(File outFile) throws FileNotFoundException { |
---|
| 292 | System.out.println("Printing to " + outFile.toString()); |
---|
| 293 | PrintStream out = new PrintStream(outFile); |
---|
| 294 | root.outputFiles(out, null); |
---|
| 295 | out.close(); |
---|
| 296 | } |
---|
| 297 | |
---|
| 298 | /** |
---|
| 299 | * Main program |
---|
| 300 | * @param args Command line arguments |
---|
| 301 | * @throws Exception |
---|
| 302 | */ |
---|
| 303 | public static void main(String[] args) throws Exception { |
---|
| 304 | StructureGenerator sg = new StructureGenerator(); |
---|
| 305 | System.exit(sg.run(args)); |
---|
| 306 | } |
---|
| 307 | } |
---|