1 | /** |
---|
2 | * Licensed to the Apache Software Foundation (ASF) under one |
---|
3 | * or more contributor license agreements. See the NOTICE file |
---|
4 | * distributed with this work for additional information |
---|
5 | * regarding copyright ownership. The ASF licenses this file |
---|
6 | * to you under the Apache License, Version 2.0 (the |
---|
7 | * "License"); you may not use this file except in compliance |
---|
8 | * with the License. You may obtain a copy of the License at |
---|
9 | * |
---|
10 | * http://www.apache.org/licenses/LICENSE-2.0 |
---|
11 | * |
---|
12 | * Unless required by applicable law or agreed to in writing, software |
---|
13 | * distributed under the License is distributed on an "AS IS" BASIS, |
---|
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
---|
15 | * See the License for the specific language governing permissions and |
---|
16 | * limitations under the License. |
---|
17 | */ |
---|
18 | |
---|
19 | package org.apache.hadoop.fs.loadGenerator; |
---|
20 | |
---|
21 | import java.io.File; |
---|
22 | import java.io.FileNotFoundException; |
---|
23 | import java.io.PrintStream; |
---|
24 | import java.util.ArrayList; |
---|
25 | import java.util.List; |
---|
26 | import java.util.Random; |
---|
27 | |
---|
28 | import org.apache.hadoop.util.ToolRunner; |
---|
29 | |
---|
30 | /** |
---|
31 | * This program generates a random namespace structure with the following |
---|
32 | * constraints: |
---|
33 | * 1. The number of subdirectories is a random number in [minWidth, maxWidth]. |
---|
34 | * 2. The maximum depth of each subdirectory is a random number |
---|
35 | * [2*maxDepth/3, maxDepth]. |
---|
36 | * 3. Files are randomly placed in the empty directories. The size of each |
---|
37 | * file follows Gaussian distribution. |
---|
38 | * The generated namespace structure is described by two files in the output |
---|
39 | * directory. Each line of the first file |
---|
40 | * contains the full name of a leaf directory. |
---|
41 | * Each line of the second file contains |
---|
42 | * the full name of a file and its size, separated by a blank. |
---|
43 | * |
---|
44 | * The synopsis of the command is |
---|
45 | * java StructureGenerator |
---|
46 | -maxDepth <maxDepth> : maximum depth of the directory tree; default is 5. |
---|
47 | -minWidth <minWidth> : minimum number of subdirectories per directories; default is 1 |
---|
48 | -maxWidth <maxWidth> : maximum number of subdirectories per directories; default is 5 |
---|
49 | -numOfFiles <#OfFiles> : the total number of files; default is 10. |
---|
50 | -avgFileSize <avgFileSizeInBlocks>: average size of blocks; default is 1. |
---|
51 | -outDir <outDir>: output directory; default is the current directory. |
---|
52 | -seed <seed>: random number generator seed; default is the current time. |
---|
53 | */ |
---|
54 | public class StructureGenerator { |
---|
55 | private int maxDepth = 5; |
---|
56 | private int minWidth = 1; |
---|
57 | private int maxWidth = 5; |
---|
58 | private int numOfFiles = 10; |
---|
59 | private double avgFileSize = 1; |
---|
60 | private File outDir = DEFAULT_STRUCTURE_DIRECTORY; |
---|
61 | final static private String USAGE = "java StructureGenerator\n" + |
---|
62 | "-maxDepth <maxDepth>\n" + |
---|
63 | "-minWidth <minWidth>\n" + |
---|
64 | "-maxWidth <maxWidth>\n" + |
---|
65 | "-numOfFiles <#OfFiles>\n" + |
---|
66 | "-avgFileSize <avgFileSizeInBlocks>\n" + |
---|
67 | "-outDir <outDir>\n" + |
---|
68 | "-seed <seed>"; |
---|
69 | |
---|
70 | private Random r = null; |
---|
71 | |
---|
72 | /** Default directory for storing file/directory structure */ |
---|
73 | final static File DEFAULT_STRUCTURE_DIRECTORY = new File("."); |
---|
74 | /** The name of the file for storing directory structure */ |
---|
75 | final static String DIR_STRUCTURE_FILE_NAME = "dirStructure"; |
---|
76 | /** The name of the file for storing file structure */ |
---|
77 | final static String FILE_STRUCTURE_FILE_NAME = "fileStructure"; |
---|
78 | /** The name prefix for the files created by this program */ |
---|
79 | final static String FILE_NAME_PREFIX = "_file_"; |
---|
80 | |
---|
81 | /** |
---|
82 | * The main function first parses the command line arguments, |
---|
83 | * then generates in-memory directory structure and outputs to a file, |
---|
84 | * last generates in-memory files and outputs them to a file. |
---|
85 | */ |
---|
86 | public int run(String[] args) throws Exception { |
---|
87 | int exitCode = 0; |
---|
88 | exitCode = init(args); |
---|
89 | if (exitCode != 0) { |
---|
90 | return exitCode; |
---|
91 | } |
---|
92 | genDirStructure(); |
---|
93 | output(new File(outDir, DIR_STRUCTURE_FILE_NAME)); |
---|
94 | genFileStructure(); |
---|
95 | outputFiles(new File(outDir, FILE_STRUCTURE_FILE_NAME)); |
---|
96 | return exitCode; |
---|
97 | } |
---|
98 | |
---|
99 | /** Parse the command line arguments and initialize the data */ |
---|
100 | private int init(String[] args) { |
---|
101 | try { |
---|
102 | for (int i = 0; i < args.length; i++) { // parse command line |
---|
103 | if (args[i].equals("-maxDepth")) { |
---|
104 | maxDepth = Integer.parseInt(args[++i]); |
---|
105 | if (maxDepth<1) { |
---|
106 | System.err.println("maxDepth must be positive: " + maxDepth); |
---|
107 | return -1; |
---|
108 | } |
---|
109 | } else if (args[i].equals("-minWidth")) { |
---|
110 | minWidth = Integer.parseInt(args[++i]); |
---|
111 | if (minWidth<0) { |
---|
112 | System.err.println("minWidth must be positive: " + minWidth); |
---|
113 | return -1; |
---|
114 | } |
---|
115 | } else if (args[i].equals("-maxWidth")) { |
---|
116 | maxWidth = Integer.parseInt(args[++i]); |
---|
117 | } else if (args[i].equals("-numOfFiles")) { |
---|
118 | numOfFiles = Integer.parseInt(args[++i]); |
---|
119 | if (numOfFiles<1) { |
---|
120 | System.err.println("NumOfFiles must be positive: " + numOfFiles); |
---|
121 | return -1; |
---|
122 | } |
---|
123 | } else if (args[i].equals("-avgFileSize")) { |
---|
124 | avgFileSize = Double.parseDouble(args[++i]); |
---|
125 | if (avgFileSize<=0) { |
---|
126 | System.err.println("AvgFileSize must be positive: " + avgFileSize); |
---|
127 | return -1; |
---|
128 | } |
---|
129 | } else if (args[i].equals("-outDir")) { |
---|
130 | outDir = new File(args[++i]); |
---|
131 | } else if (args[i].equals("-seed")) { |
---|
132 | r = new Random(Long.parseLong(args[++i])); |
---|
133 | } else { |
---|
134 | System.err.println(USAGE); |
---|
135 | ToolRunner.printGenericCommandUsage(System.err); |
---|
136 | return -1; |
---|
137 | } |
---|
138 | } |
---|
139 | } catch (NumberFormatException e) { |
---|
140 | System.err.println("Illegal parameter: " + e.getLocalizedMessage()); |
---|
141 | System.err.println(USAGE); |
---|
142 | return -1; |
---|
143 | } |
---|
144 | |
---|
145 | if (maxWidth < minWidth) { |
---|
146 | System.err.println( |
---|
147 | "maxWidth must be bigger than minWidth: " + maxWidth); |
---|
148 | return -1; |
---|
149 | } |
---|
150 | |
---|
151 | if (r==null) { |
---|
152 | r = new Random(); |
---|
153 | } |
---|
154 | return 0; |
---|
155 | } |
---|
156 | |
---|
157 | /** In memory representation of a directory */ |
---|
158 | private static class INode { |
---|
159 | private String name; |
---|
160 | private List<INode> children = new ArrayList<INode>(); |
---|
161 | |
---|
162 | /** Constructor */ |
---|
163 | private INode(String name) { |
---|
164 | this.name = name; |
---|
165 | } |
---|
166 | |
---|
167 | /** Add a child (subdir/file) */ |
---|
168 | private void addChild(INode child) { |
---|
169 | children.add(child); |
---|
170 | } |
---|
171 | |
---|
172 | /** Output the subtree rooted at the current node. |
---|
173 | * Only the leaves are printed. |
---|
174 | */ |
---|
175 | private void output(PrintStream out, String prefix) { |
---|
176 | prefix = prefix==null?name:prefix+"/"+name; |
---|
177 | if (children.isEmpty()) { |
---|
178 | out.println(prefix); |
---|
179 | } else { |
---|
180 | for (INode child : children) { |
---|
181 | child.output(out, prefix); |
---|
182 | } |
---|
183 | } |
---|
184 | } |
---|
185 | |
---|
186 | /** Output the files in the subtree rooted at this node */ |
---|
187 | protected void outputFiles(PrintStream out, String prefix) { |
---|
188 | prefix = prefix==null?name:prefix+"/"+name; |
---|
189 | for (INode child : children) { |
---|
190 | child.outputFiles(out, prefix); |
---|
191 | } |
---|
192 | } |
---|
193 | |
---|
194 | /** Add all the leaves in the subtree to the input list */ |
---|
195 | private void getLeaves(List<INode> leaves) { |
---|
196 | if (children.isEmpty()) { |
---|
197 | leaves.add(this); |
---|
198 | } else { |
---|
199 | for (INode child : children) { |
---|
200 | child.getLeaves(leaves); |
---|
201 | } |
---|
202 | } |
---|
203 | } |
---|
204 | } |
---|
205 | |
---|
206 | /** In memory representation of a file */ |
---|
207 | private static class FileINode extends INode { |
---|
208 | private double numOfBlocks; |
---|
209 | |
---|
210 | /** constructor */ |
---|
211 | private FileINode(String name, double numOfBlocks) { |
---|
212 | super(name); |
---|
213 | this.numOfBlocks = numOfBlocks; |
---|
214 | } |
---|
215 | |
---|
216 | /** Output a file attribute */ |
---|
217 | protected void outputFiles(PrintStream out, String prefix) { |
---|
218 | prefix = (prefix == null)?super.name: prefix + "/"+super.name; |
---|
219 | out.println(prefix + " " + numOfBlocks); |
---|
220 | } |
---|
221 | } |
---|
222 | |
---|
223 | private INode root; |
---|
224 | |
---|
225 | /** Generates a directory tree with a max depth of <code>maxDepth</code> */ |
---|
226 | private void genDirStructure() { |
---|
227 | root = genDirStructure("", maxDepth); |
---|
228 | } |
---|
229 | |
---|
230 | /** Generate a directory tree rooted at <code>rootName</code> |
---|
231 | * The number of subtree is in the range of [minWidth, maxWidth]. |
---|
232 | * The maximum depth of each subtree is in the range of |
---|
233 | * [2*maxDepth/3, maxDepth]. |
---|
234 | */ |
---|
235 | private INode genDirStructure(String rootName, int maxDepth) { |
---|
236 | INode root = new INode(rootName); |
---|
237 | |
---|
238 | if (maxDepth>0) { |
---|
239 | maxDepth--; |
---|
240 | int minDepth = maxDepth*2/3; |
---|
241 | // Figure out the number of subdirectories to generate |
---|
242 | int numOfSubDirs = minWidth + r.nextInt(maxWidth-minWidth+1); |
---|
243 | // Expand the tree |
---|
244 | for (int i=0; i<numOfSubDirs; i++) { |
---|
245 | int childDepth = (maxDepth == 0)?0: |
---|
246 | (r.nextInt(maxDepth-minDepth+1)+minDepth); |
---|
247 | INode child = genDirStructure("dir"+i, childDepth); |
---|
248 | root.addChild(child); |
---|
249 | } |
---|
250 | } |
---|
251 | return root; |
---|
252 | } |
---|
253 | |
---|
254 | /** Collects leaf nodes in the tree */ |
---|
255 | private List<INode> getLeaves() { |
---|
256 | List<INode> leaveDirs = new ArrayList<INode>(); |
---|
257 | root.getLeaves(leaveDirs); |
---|
258 | return leaveDirs; |
---|
259 | } |
---|
260 | |
---|
261 | /** Decides where to place all the files and its length. |
---|
262 | * It first collects all empty directories in the tree. |
---|
263 | * For each file, it randomly chooses an empty directory to place the file. |
---|
264 | * The file's length is generated using Gaussian distribution. |
---|
265 | */ |
---|
266 | private void genFileStructure() { |
---|
267 | List<INode> leaves = getLeaves(); |
---|
268 | int totalLeaves = leaves.size(); |
---|
269 | for (int i=0; i<numOfFiles; i++) { |
---|
270 | int leaveNum = r.nextInt(totalLeaves); |
---|
271 | double fileSize; |
---|
272 | do { |
---|
273 | fileSize = r.nextGaussian()+avgFileSize; |
---|
274 | } while (fileSize<0); |
---|
275 | leaves.get(leaveNum).addChild( |
---|
276 | new FileINode(FILE_NAME_PREFIX+i, fileSize)); |
---|
277 | } |
---|
278 | } |
---|
279 | |
---|
280 | /** Output directory structure to a file, each line of the file |
---|
281 | * contains the directory name. Only empty directory names are printed. */ |
---|
282 | private void output(File outFile) throws FileNotFoundException { |
---|
283 | System.out.println("Printing to " + outFile.toString()); |
---|
284 | PrintStream out = new PrintStream(outFile); |
---|
285 | root.output(out, null); |
---|
286 | out.close(); |
---|
287 | } |
---|
288 | |
---|
289 | /** Output all files' attributes to a file, each line of the output file |
---|
290 | * contains a file name and its length. */ |
---|
291 | private void outputFiles(File outFile) throws FileNotFoundException { |
---|
292 | System.out.println("Printing to " + outFile.toString()); |
---|
293 | PrintStream out = new PrintStream(outFile); |
---|
294 | root.outputFiles(out, null); |
---|
295 | out.close(); |
---|
296 | } |
---|
297 | |
---|
298 | /** |
---|
299 | * Main program |
---|
300 | * @param args Command line arguments |
---|
301 | * @throws Exception |
---|
302 | */ |
---|
303 | public static void main(String[] args) throws Exception { |
---|
304 | StructureGenerator sg = new StructureGenerator(); |
---|
305 | System.exit(sg.run(args)); |
---|
306 | } |
---|
307 | } |
---|