source: proiecte/HadoopJUnit/hadoop-0.20.1/docs/SLG_user_guide.html @ 120

Last change on this file since 120 was 120, checked in by (none), 14 years ago

Added the mail files for the Hadoop JUNit Project

  • Property svn:executable set to *
File size: 17.2 KB
Line 
1<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
2<html>
3<head>
4<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
5<meta content="Apache Forrest" name="Generator">
6<meta name="Forrest-version" content="0.8">
7<meta name="Forrest-skin-name" content="pelt">
8<title> HDFS Synthetic Load Generator Guide </title>
9<link type="text/css" href="skin/basic.css" rel="stylesheet">
10<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
11<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
12<link type="text/css" href="skin/profile.css" rel="stylesheet">
13<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
14<link rel="shortcut icon" href="images/favicon.ico">
15</head>
16<body onload="init()">
17<script type="text/javascript">ndeSetTextSize();</script>
18<div id="top">
19<!--+
20    |breadtrail
21    +-->
22<div class="breadtrail">
23<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://hadoop.apache.org/">Hadoop</a> &gt; <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
24</div>
25<!--+
26    |header
27    +-->
28<div class="header">
29<!--+
30    |start group logo
31    +-->
32<div class="grouplogo">
33<a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
34</div>
35<!--+
36    |end group logo
37    +-->
38<!--+
39    |start Project Logo
40    +-->
41<div class="projectlogo">
42<a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.gif" title="Scalable Computing Platform"></a>
43</div>
44<!--+
45    |end Project Logo
46    +-->
47<!--+
48    |start Search
49    +-->
50<div class="searchbox">
51<form action="http://www.google.com/search" method="get" class="roundtopsmall">
52<input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
53                    <input name="Search" value="Search" type="submit">
54</form>
55</div>
56<!--+
57    |end search
58    +-->
59<!--+
60    |start Tabs
61    +-->
62<ul id="tabs">
63<li>
64<a class="unselected" href="http://hadoop.apache.org/core/">Project</a>
65</li>
66<li>
67<a class="unselected" href="http://wiki.apache.org/hadoop">Wiki</a>
68</li>
69<li class="current">
70<a class="selected" href="index.html">Hadoop 0.20 Documentation</a>
71</li>
72</ul>
73<!--+
74    |end Tabs
75    +-->
76</div>
77</div>
78<div id="main">
79<div id="publishedStrip">
80<!--+
81    |start Subtabs
82    +-->
83<div id="level2tabs"></div>
84<!--+
85    |end Endtabs
86    +-->
87<script type="text/javascript"><!--
88document.write("Last Published: " + document.lastModified);
89//  --></script>
90</div>
91<!--+
92    |breadtrail
93    +-->
94<div class="breadtrail">
95
96             &nbsp;
97           </div>
98<!--+
99    |start Menu, mainarea
100    +-->
101<!--+
102    |start Menu
103    +-->
104<div id="menu">
105<div onclick="SwitchMenu('menu_1.1', 'skin/')" id="menu_1.1Title" class="menutitle">Getting Started</div>
106<div id="menu_1.1" class="menuitemgroup">
107<div class="menuitem">
108<a href="index.html">Overview</a>
109</div>
110<div class="menuitem">
111<a href="quickstart.html">Quick Start</a>
112</div>
113<div class="menuitem">
114<a href="cluster_setup.html">Cluster Setup</a>
115</div>
116<div class="menuitem">
117<a href="mapred_tutorial.html">Map/Reduce Tutorial</a>
118</div>
119</div>
120<div onclick="SwitchMenu('menu_1.2', 'skin/')" id="menu_1.2Title" class="menutitle">Programming Guides</div>
121<div id="menu_1.2" class="menuitemgroup">
122<div class="menuitem">
123<a href="commands_manual.html">Commands</a>
124</div>
125<div class="menuitem">
126<a href="distcp.html">DistCp</a>
127</div>
128<div class="menuitem">
129<a href="native_libraries.html">Native Libraries</a>
130</div>
131<div class="menuitem">
132<a href="streaming.html">Streaming</a>
133</div>
134<div class="menuitem">
135<a href="fair_scheduler.html">Fair Scheduler</a>
136</div>
137<div class="menuitem">
138<a href="capacity_scheduler.html">Capacity Scheduler</a>
139</div>
140<div class="menuitem">
141<a href="service_level_auth.html">Service Level Authorization</a>
142</div>
143<div class="menuitem">
144<a href="vaidya.html">Vaidya</a>
145</div>
146<div class="menuitem">
147<a href="hadoop_archives.html">Archives</a>
148</div>
149</div>
150<div onclick="SwitchMenu('menu_selected_1.3', 'skin/')" id="menu_selected_1.3Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">HDFS</div>
151<div id="menu_selected_1.3" class="selectedmenuitemgroup" style="display: block;">
152<div class="menuitem">
153<a href="hdfs_user_guide.html">User Guide</a>
154</div>
155<div class="menuitem">
156<a href="hdfs_design.html">Architecture</a>
157</div>
158<div class="menuitem">
159<a href="hdfs_shell.html">File System Shell Guide</a>
160</div>
161<div class="menuitem">
162<a href="hdfs_permissions_guide.html">Permissions Guide</a>
163</div>
164<div class="menuitem">
165<a href="hdfs_quota_admin_guide.html">Quotas Guide</a>
166</div>
167<div class="menupage">
168<div class="menupagetitle">Synthetic Load Generator Guide</div>
169</div>
170<div class="menuitem">
171<a href="libhdfs.html">C API libhdfs</a>
172</div>
173</div>
174<div onclick="SwitchMenu('menu_1.4', 'skin/')" id="menu_1.4Title" class="menutitle">HOD</div>
175<div id="menu_1.4" class="menuitemgroup">
176<div class="menuitem">
177<a href="hod_user_guide.html">User Guide</a>
178</div>
179<div class="menuitem">
180<a href="hod_admin_guide.html">Admin Guide</a>
181</div>
182<div class="menuitem">
183<a href="hod_config_guide.html">Config Guide</a>
184</div>
185</div>
186<div onclick="SwitchMenu('menu_1.5', 'skin/')" id="menu_1.5Title" class="menutitle">Miscellaneous</div>
187<div id="menu_1.5" class="menuitemgroup">
188<div class="menuitem">
189<a href="api/index.html">API Docs</a>
190</div>
191<div class="menuitem">
192<a href="jdiff/changes.html">API Changes</a>
193</div>
194<div class="menuitem">
195<a href="http://wiki.apache.org/hadoop/">Wiki</a>
196</div>
197<div class="menuitem">
198<a href="http://wiki.apache.org/hadoop/FAQ">FAQ</a>
199</div>
200<div class="menuitem">
201<a href="releasenotes.html">Release Notes</a>
202</div>
203<div class="menuitem">
204<a href="changes.html">Change Log</a>
205</div>
206</div>
207<div id="credit"></div>
208<div id="roundbottom">
209<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
210<!--+
211  |alternative credits
212  +-->
213<div id="credit2"></div>
214</div>
215<!--+
216    |end Menu
217    +-->
218<!--+
219    |start content
220    +-->
221<div id="content">
222<div title="Portable Document Format" class="pdflink">
223<a class="dida" href="SLG_user_guide.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
224        PDF</a>
225</div>
226<h1> HDFS Synthetic Load Generator Guide </h1>
227<div id="minitoc-area">
228<ul class="minitoc">
229<li>
230<a href="#Description"> Description </a>
231</li>
232<li>
233<a href="#Synopsis"> Synopsis </a>
234</li>
235<li>
236<a href="#Test+Space+Population"> Test Space Population </a>
237<ul class="minitoc">
238<li>
239<a href="#Structure+Generator"> Structure Generator </a>
240</li>
241<li>
242<a href="#Test+Space+Generator"> Test Space Generator </a>
243</li>
244</ul>
245</li>
246</ul>
247</div>
248               
249<a name="N1000D"></a><a name="Description"></a>
250<h2 class="h3"> Description </h2>
251<div class="section">
252<p>
253        The synthetic load generator (SLG) is a tool for testing NameNode behavior
254        under different client loads. The user can generate different mixes
255        of read, write, and list requests by specifying the probabilities of
256        read and write. The user controls the intensity of the load by adjusting
257        parameters for the number of worker threads and the delay between
258        operations. While load generators are running, the user can profile and
259        monitor the running of the NameNode. When a load generator exits, it
260        prints some NameNode statistics like the average execution time of each
261        kind of operation and the NameNode throughput.
262                       </p>
263</div>
264               
265<a name="N10017"></a><a name="Synopsis"></a>
266<h2 class="h3"> Synopsis </h2>
267<div class="section">
268<p>
269       
270<span class="codefrag">java LoadGenerator [options]</span>
271<br>
272                       
273</p>
274<p>
275        Options include:<br>
276       
277<span class="codefrag">&nbsp;&nbsp;-readProbability &lt;read probability&gt;</span>
278<br>
279       
280<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;the probability of the read operation;
281                default is 0.3333. </span>
282<br>
283       
284<span class="codefrag">&nbsp;&nbsp;-writeProbability &lt;write probability&gt;</span>
285<br>
286       
287<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;the probability of the write
288                operations; default is 0.3333.</span>
289<br>
290       
291<span class="codefrag">&nbsp;&nbsp;-root &lt;test space root&gt;</span>
292<br>
293       
294<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;the root of the test space;
295                default is /testLoadSpace.</span>
296<br>
297       
298<span class="codefrag">&nbsp;&nbsp;-maxDelayBetweenOps
299                &lt;maxDelayBetweenOpsInMillis&gt;</span>
300<br> 
301       
302<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;the maximum delay between two consecutive
303                operations in a thread; default is 0 indicating no delay.
304                </span>
305<br>
306       
307<span class="codefrag">&nbsp;&nbsp;-numOfThreads &lt;numOfThreads&gt;</span>
308<br> 
309       
310<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;the number of threads to spawn;
311                default is 200.</span>
312<br>
313       
314<span class="codefrag">&nbsp;&nbsp;-elapsedTime &lt;elapsedTimeInSecs&gt;</span>
315<br>
316       
317<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;the number of seconds that the program
318                will run; A value of zero indicates that the program runs
319                forever. The default value is 0.</span>
320<br>
321       
322<span class="codefrag">&nbsp;&nbsp;-startTime &lt;startTimeInMillis&gt;</span>
323<br> 
324       
325<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;the time that all worker threads
326                start to run. By default it is 10 seconds after the main
327                program starts running.This creates a barrier if more than
328                one load generator is running.
329        </span>
330<br>
331       
332<span class="codefrag">&nbsp;&nbsp;-seed &lt;seed&gt;</span>
333<br>
334       
335<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;the random generator seed for repeating
336                requests to NameNode when running with a single thread;
337                default is the current time.</span>
338<br>
339                       
340</p>
341<p>
342        After command line argument parsing, the load generator traverses
343        the test space and builds a table of all directories and another table
344        of all files in the test space. It then waits until the start time to
345        spawn the number of worker threads as specified by the user. Each
346        thread sends a stream of requests to NameNode. At each iteration,
347        it first decides if it is going to read a file, create a file, or
348        list a directory following the read and write probabilities specified
349        by the user. The listing probability is equal to
350        <em>1-read probability-write probability</em>. When reading,
351        it randomly picks a file in the test space and reads the entire file.
352        When writing, it randomly picks a directory in the test space and
353        creates a file there. To avoid two threads with the same load
354        generator or from two different load generators create the same
355        file, the file name consists of the current machine's host name
356        and the thread id. The length of the file follows Gaussian
357        distribution with an average size of 2 blocks and the standard
358        deviation of 1. The new file is filled with byte 'a'. To avoid
359        the test space to grow indefinitely, the file is deleted immediately
360        after the file creation completes. While listing, it randomly
361        picks a directory in the test space and lists its content.
362        After an operation completes, the thread pauses for a random
363        amount of time in the range of [0, maxDelayBetweenOps] if the
364        specified maximum delay is not zero. All threads are stopped when
365        the specified elapsed time is passed. Before exiting, the program
366        prints the average execution for each kind of NameNode operations,
367        and the number of requests served by the NameNode per second.
368                        </p>
369</div>
370               
371<a name="N10070"></a><a name="Test+Space+Population"></a>
372<h2 class="h3"> Test Space Population </h2>
373<div class="section">
374<p>
375        The user needs to populate a test space before she runs a
376        load generator. The structure generator generates a random
377        test space structure and the data generator creates the files
378        and directories of the test space in Hadoop distributed file system.
379                        </p>
380<a name="N10079"></a><a name="Structure+Generator"></a>
381<h3 class="h4"> Structure Generator </h3>
382<p>
383        This tool generates a random namespace structure with the
384        following constraints:
385                                </p>
386<ol>
387       
388<li>The number of subdirectories that a directory can have is
389            a random number in [minWidth, maxWidth].</li>
390       
391<li>The maximum depth of each subdirectory is a random number
392            [2*maxDepth/3, maxDepth].</li>
393       
394<li>Files are randomly placed in leaf directories. The size of
395            each file follows Gaussian distribution with an average size
396            of 1 block and a standard deviation of 1.</li>
397                                       
398</ol>
399<p>
400        The generated namespace structure is described by two files in
401        the output directory. Each line of the first file contains the
402        full name of a leaf directory. Each line of the second file
403        contains the full name of a file and its size, separated by a blank.
404                                </p>
405<p>
406        The synopsis of the command is
407                                </p>
408<p>
409       
410<span class="codefrag">java StructureGenerator [options]</span>
411                               
412</p>
413<p>
414        Options include:<br>
415       
416<span class="codefrag">&nbsp;&nbsp;-maxDepth &lt;maxDepth&gt;</span>
417<br>
418       
419<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;maximum depth of the directory tree;
420                default is 5.</span>
421<br>
422       
423<span class="codefrag">&nbsp;&nbsp;-minWidth &lt;minWidth&gt;</span>
424<br> 
425       
426<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;minimum number of subdirectories per
427                directories; default is 1.</span>
428<br>
429       
430<span class="codefrag">&nbsp;&nbsp;-maxWidth &lt;maxWidth&gt;</span>
431<br> 
432       
433<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;maximum number of subdirectories per
434                directories; default is 5.</span>
435<br>
436       
437<span class="codefrag">&nbsp;&nbsp;-numOfFiles &lt;#OfFiles&gt;</span>
438<br> 
439       
440<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;the total number of files in the test
441                space; default is 10.</span>
442<br>
443       
444<span class="codefrag">&nbsp;&nbsp;-avgFileSize &lt;avgFileSizeInBlocks&gt;</span>
445<br>
446       
447<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;average size of blocks; default is 1.
448                </span>
449<br>
450       
451<span class="codefrag">&nbsp;&nbsp;-outDir &lt;outDir&gt;</span>
452<br>
453       
454<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;output directory; default is the
455                current directory. </span>
456<br>
457       
458<span class="codefrag">&nbsp;&nbsp;-seed &lt;seed&gt;</span>
459<br>
460       
461<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;random number generator seed;
462                default is the current time.</span>
463<br>
464                               
465</p>
466<a name="N100D8"></a><a name="Test+Space+Generator"></a>
467<h3 class="h4"> Test Space Generator </h3>
468<p>
469        This tool reads the directory structure and file structure from
470        the input directory and creates the namespace in Hadoop distributed
471        file system. All files are filled with byte 'a'.
472                                </p>
473<p>
474        The synopsis of the command is
475                                </p>
476<p>
477       
478<span class="codefrag">java DataGenerator [options]</span>
479                               
480</p>
481<p>
482        Options include:<br>
483       
484<span class="codefrag">&nbsp;&nbsp;-inDir &lt;inDir&gt;</span>
485<br>
486       
487<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;input directory name where directory/file
488                structures are stored; default is the current directory.
489        </span>
490<br>
491       
492<span class="codefrag">&nbsp;&nbsp;-root &lt;test space root&gt;</span>
493<br>
494       
495<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;the name of the root directory which the
496                new namespace is going to be placed under;
497                default is "/testLoadSpace".</span>
498<br>
499                               
500</p>
501</div>
502       
503</div>
504<!--+
505    |end content
506    +-->
507<div class="clearboth">&nbsp;</div>
508</div>
509<div id="footer">
510<!--+
511    |start bottomstrip
512    +-->
513<div class="lastmodified">
514<script type="text/javascript"><!--
515document.write("Last Published: " + document.lastModified);
516//  --></script>
517</div>
518<div class="copyright">
519        Copyright &copy;
520         2008 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
521</div>
522<!--+
523    |end bottomstrip
524    +-->
525</div>
526</body>
527</html>
Note: See TracBrowser for help on using the repository browser.