source: proiecte/HadoopJUnit/hadoop-0.20.1/docs/vaidya.html @ 120

Last change on this file since 120 was 120, checked in by (none), 14 years ago

Added the mail files for the Hadoop JUNit Project

  • Property svn:executable set to *
File size: 17.8 KB
Line 
1<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
2<html>
3<head>
4<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
5<meta content="Apache Forrest" name="Generator">
6<meta name="Forrest-version" content="0.8">
7<meta name="Forrest-skin-name" content="pelt">
8<title>Vaidya Guide</title>
9<link type="text/css" href="skin/basic.css" rel="stylesheet">
10<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
11<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
12<link type="text/css" href="skin/profile.css" rel="stylesheet">
13<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
14<link rel="shortcut icon" href="images/favicon.ico">
15</head>
16<body onload="init()">
17<script type="text/javascript">ndeSetTextSize();</script>
18<div id="top">
19<!--+
20    |breadtrail
21    +-->
22<div class="breadtrail">
23<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://hadoop.apache.org/">Hadoop</a> &gt; <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
24</div>
25<!--+
26    |header
27    +-->
28<div class="header">
29<!--+
30    |start group logo
31    +-->
32<div class="grouplogo">
33<a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
34</div>
35<!--+
36    |end group logo
37    +-->
38<!--+
39    |start Project Logo
40    +-->
41<div class="projectlogo">
42<a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.gif" title="Scalable Computing Platform"></a>
43</div>
44<!--+
45    |end Project Logo
46    +-->
47<!--+
48    |start Search
49    +-->
50<div class="searchbox">
51<form action="http://www.google.com/search" method="get" class="roundtopsmall">
52<input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
53                    <input name="Search" value="Search" type="submit">
54</form>
55</div>
56<!--+
57    |end search
58    +-->
59<!--+
60    |start Tabs
61    +-->
62<ul id="tabs">
63<li>
64<a class="unselected" href="http://hadoop.apache.org/core/">Project</a>
65</li>
66<li>
67<a class="unselected" href="http://wiki.apache.org/hadoop">Wiki</a>
68</li>
69<li class="current">
70<a class="selected" href="index.html">Hadoop 0.20 Documentation</a>
71</li>
72</ul>
73<!--+
74    |end Tabs
75    +-->
76</div>
77</div>
78<div id="main">
79<div id="publishedStrip">
80<!--+
81    |start Subtabs
82    +-->
83<div id="level2tabs"></div>
84<!--+
85    |end Endtabs
86    +-->
87<script type="text/javascript"><!--
88document.write("Last Published: " + document.lastModified);
89//  --></script>
90</div>
91<!--+
92    |breadtrail
93    +-->
94<div class="breadtrail">
95
96             &nbsp;
97           </div>
98<!--+
99    |start Menu, mainarea
100    +-->
101<!--+
102    |start Menu
103    +-->
104<div id="menu">
105<div onclick="SwitchMenu('menu_1.1', 'skin/')" id="menu_1.1Title" class="menutitle">Getting Started</div>
106<div id="menu_1.1" class="menuitemgroup">
107<div class="menuitem">
108<a href="index.html">Overview</a>
109</div>
110<div class="menuitem">
111<a href="quickstart.html">Quick Start</a>
112</div>
113<div class="menuitem">
114<a href="cluster_setup.html">Cluster Setup</a>
115</div>
116<div class="menuitem">
117<a href="mapred_tutorial.html">Map/Reduce Tutorial</a>
118</div>
119</div>
120<div onclick="SwitchMenu('menu_selected_1.2', 'skin/')" id="menu_selected_1.2Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Programming Guides</div>
121<div id="menu_selected_1.2" class="selectedmenuitemgroup" style="display: block;">
122<div class="menuitem">
123<a href="commands_manual.html">Commands</a>
124</div>
125<div class="menuitem">
126<a href="distcp.html">DistCp</a>
127</div>
128<div class="menuitem">
129<a href="native_libraries.html">Native Libraries</a>
130</div>
131<div class="menuitem">
132<a href="streaming.html">Streaming</a>
133</div>
134<div class="menuitem">
135<a href="fair_scheduler.html">Fair Scheduler</a>
136</div>
137<div class="menuitem">
138<a href="capacity_scheduler.html">Capacity Scheduler</a>
139</div>
140<div class="menuitem">
141<a href="service_level_auth.html">Service Level Authorization</a>
142</div>
143<div class="menupage">
144<div class="menupagetitle">Vaidya</div>
145</div>
146<div class="menuitem">
147<a href="hadoop_archives.html">Archives</a>
148</div>
149</div>
150<div onclick="SwitchMenu('menu_1.3', 'skin/')" id="menu_1.3Title" class="menutitle">HDFS</div>
151<div id="menu_1.3" class="menuitemgroup">
152<div class="menuitem">
153<a href="hdfs_user_guide.html">User Guide</a>
154</div>
155<div class="menuitem">
156<a href="hdfs_design.html">Architecture</a>
157</div>
158<div class="menuitem">
159<a href="hdfs_shell.html">File System Shell Guide</a>
160</div>
161<div class="menuitem">
162<a href="hdfs_permissions_guide.html">Permissions Guide</a>
163</div>
164<div class="menuitem">
165<a href="hdfs_quota_admin_guide.html">Quotas Guide</a>
166</div>
167<div class="menuitem">
168<a href="SLG_user_guide.html">Synthetic Load Generator Guide</a>
169</div>
170<div class="menuitem">
171<a href="libhdfs.html">C API libhdfs</a>
172</div>
173</div>
174<div onclick="SwitchMenu('menu_1.4', 'skin/')" id="menu_1.4Title" class="menutitle">HOD</div>
175<div id="menu_1.4" class="menuitemgroup">
176<div class="menuitem">
177<a href="hod_user_guide.html">User Guide</a>
178</div>
179<div class="menuitem">
180<a href="hod_admin_guide.html">Admin Guide</a>
181</div>
182<div class="menuitem">
183<a href="hod_config_guide.html">Config Guide</a>
184</div>
185</div>
186<div onclick="SwitchMenu('menu_1.5', 'skin/')" id="menu_1.5Title" class="menutitle">Miscellaneous</div>
187<div id="menu_1.5" class="menuitemgroup">
188<div class="menuitem">
189<a href="api/index.html">API Docs</a>
190</div>
191<div class="menuitem">
192<a href="jdiff/changes.html">API Changes</a>
193</div>
194<div class="menuitem">
195<a href="http://wiki.apache.org/hadoop/">Wiki</a>
196</div>
197<div class="menuitem">
198<a href="http://wiki.apache.org/hadoop/FAQ">FAQ</a>
199</div>
200<div class="menuitem">
201<a href="releasenotes.html">Release Notes</a>
202</div>
203<div class="menuitem">
204<a href="changes.html">Change Log</a>
205</div>
206</div>
207<div id="credit"></div>
208<div id="roundbottom">
209<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
210<!--+
211  |alternative credits
212  +-->
213<div id="credit2"></div>
214</div>
215<!--+
216    |end Menu
217    +-->
218<!--+
219    |start content
220    +-->
221<div id="content">
222<div title="Portable Document Format" class="pdflink">
223<a class="dida" href="vaidya.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
224        PDF</a>
225</div>
226<h1>Vaidya Guide</h1>
227<div id="minitoc-area">
228<ul class="minitoc">
229<li>
230<a href="#Purpose">Purpose</a>
231</li>
232<li>
233<a href="#Pre-requisites">Pre-requisites</a>
234</li>
235<li>
236<a href="#Overview">Overview</a>
237</li>
238<li>
239<a href="#Terminology">Terminology</a>
240</li>
241<li>
242<a href="#How+to+Execute+the+Hadoop+Vaidya+Tool">How to Execute the Hadoop Vaidya Tool</a>
243</li>
244<li>
245<a href="#How+to+Write+and+Execute+your+own+Tests">How to Write and Execute your own Tests</a>
246</li>
247</ul>
248</div>
249 
250   
251<a name="N1000D"></a><a name="Purpose"></a>
252<h2 class="h3">Purpose</h2>
253<div class="section">
254<p>This document describes various user-facing facets of Hadoop Vaidya, a performance diagnostic tool for map/reduce jobs. It
255         describes how to execute a default set of rules against your map/reduce job counters and
256         how to write and execute new rules to detect specific performance problems.
257      </p>
258<p>A few sample test rules are provided with the tool with the objective of growing the rules database over the time.
259         You are welcome to contribute new rules for everyone's benefit; to do so, follow the
260         <a href="http://wiki.apache.org/hadoop/HowToContribute">How to Contribute</a> procedure
261         specified on Apache Hadoop website.
262      </p>
263</div>
264   
265   
266<a name="N1001E"></a><a name="Pre-requisites"></a>
267<h2 class="h3">Pre-requisites</h2>
268<div class="section">
269<p>Ensure that Hadoop is installed and configured. More details:</p>
270<ul>
271       
272<li>
273          Make sure HADOOP_HOME environment variable is set.
274        </li>
275       
276<li>
277          Make sure Java is installed and configured as a part of the Hadoop installation.
278        </li>
279     
280</ul>
281</div>
282   
283   
284<a name="N10031"></a><a name="Overview"></a>
285<h2 class="h3">Overview</h2>
286<div class="section">
287<p>Hadoop Vaidya (Vaidya in Sanskrit language means "one who knows", or "a physician")
288            is a rule based performance diagnostic tool for
289        Map/Reduce jobs. It performs a post execution analysis of map/reduce
290        job by parsing and collecting execution statistics through job history
291        and job configuration files. It runs a set of predefined tests/rules
292        against job execution statistics to diagnose various performance problems.
293        Each test rule detects a specific performance problem with the Map/Reduce job and provides
294        a targeted advice to the user. This tool generates an XML report based on
295        the evaluation results of individual test rules.
296      </p>
297</div>
298 
299   
300<a name="N1003B"></a><a name="Terminology"></a>
301<h2 class="h3">Terminology</h2>
302<div class="section">
303<p> This section describes main concepts and terminology involved with Hadoop Vaidya,</p>
304<ul>
305                       
306<li> 
307<em>PostExPerformanceDiagnoser</em>: This class extends the base Diagnoser class and acts as a driver for post execution performance analysis of Map/Reduce Jobs.
308                       It detects performance inefficiencies by executing a set of performance diagnosis rules against the job execution statistics.</li>
309                       
310<li> 
311<em>Job Statistics</em>: This includes the job configuration information (job.xml) and various counters logged by Map/Reduce job as a part of the job history log
312                           file. The counters are parsed and collected into the Job Statistics data structures, which contains global job level aggregate counters and
313                             a set of counters for each Map and Reduce task.</li>
314                       
315<li> 
316<em>Diagnostic Test/Rule</em>: This is a program logic that detects the inefficiency of M/R job based on the job statistics. The
317                                 description of the Test is specified as an XML element (DiagnosticTest) in a test description file e.g.
318                                 default tests description file, <em>$HADOOP_HOME/contrib/vaidya/conf/postex_diagnosis_tests.xml</em>. The actual logic is coded as
319                                 a java class and referenced in the DiagnosticTest XML element. </li>
320               
321</ul>
322<p></p>
323<p>Following section describes the <em>DiagnosticTest</em> XML element in a diagnostic test description file </p>
324<ul>
325                       
326<li> 
327<em>DiagnosticTest{Title}</em>: Specifies a short name/description of the test.</li>
328                       
329<li> 
330<em>DiagnosticTest{ClassName}</em>: Specifies fully qualified class name that implements the test logic.</li>
331                       
332<li> 
333<em>DiagnosticTest{Description}</em>: Specifies a full description of the test rule.</li>
334                       
335<li> 
336<em>DiagnosticTest{Importance}</em>: Specifies a declarative value for overall importance of the test rule. (Values: High, Medium, Low)</li>
337                       
338<li> 
339<em>DiagnosticTest{SuccessThreshod}</em>: This is a threshold value specified by test case writer such that if impact level of the test case
340                                 is lesser, then test is declared as PASSED (or NEGATIVE). The impact level is calculated and returned
341                                 by individual test's evaluate function, specifying the degree of problem job has with respect to the condition being evaluated.</li>
342                       
343<li> 
344<em>DiagnosticTest{Prescription}</em>: This is a targeted advice written by the test case adviser for the user to follow when test is not PASSED. </li>
345                       
346<li> 
347<em>DiagonsticTest{InputElement}</em>: This is a test specific input that test writer has to optionally provide. This will be supplied to individual test case
348                       class so that test writer can use it within test case. This is typically a test configuration information such that test writer need not change the
349                       Java code for test case but rather can configure the test case using these input values. </li>
350               
351</ul>
352<p></p>
353<p>Following section describes the performance analysis report generated by the tool in XML format</p>
354<ul>
355                       
356<li> 
357<em>PostExPerformanceDiagnosticReport</em>: This is a document (root) element from the XML report generated by the tool. </li>
358                       
359<li> 
360<em>TestReportElement</em>: This is a XML report element from the test report document, one for each individual test specified in test description
361                                 file </li> 
362                       
363<li> 
364<em>TestReportElement{TestTitle}</em>: Will be included from DiagnosticTest{Title} </li>
365                       
366<li> 
367<em>TestReportElement{TestDescription}</em>: Will be included from DiagnosticTest{Description} </li>
368                       
369<li> 
370<em>TestReportElement{TestImportance}</em>: Will be included from DiagnosticTest{Importance} </li>
371                       
372<li> 
373<em>TestReportElement{TestSeverity}</em>: This is a product of Test Impact level and Test Importance. It indicates overall severity of the test.</li>
374                       
375<li> 
376<em>TestReportElement{ReferenceDetails}</em>: This is a test specific runtime information provided by test case to support the test result and severity. Typically
377                                 Test writer should print the test impact level in this section. </li>
378                       
379<li> 
380<em>TestReportElement{TestResults}</em>: This is boolean outcome of the test based on the SuccessThreshold specified by test writer in the DiagnosticTest description. The
381                                 test PASSED(NEGATIVE) indicates no problem vs. FAILED (POSITIVE) indicates a potential problem with the job for given test case. </li>
382                       
383<li> 
384<em>TestReportElement{TestPrescription}</em>: This will be included from DiagnosticTest{Prescription}, unless test case writer overrides it in the test case class through getPrescription()
385                                 method </li>
386               
387</ul>
388</div>
389       
390       
391<a name="N100D0"></a><a name="How+to+Execute+the+Hadoop+Vaidya+Tool"></a>
392<h2 class="h3">How to Execute the Hadoop Vaidya Tool</h2>
393<div class="section">
394<p>Script to execute Hadoop Vaidya is in <span class="codefrag">$HADOOP_HOME/contrib/vaidya/bin/</span> directory.
395                   It comes with a default set of rules defined in file:
396           <span class="codefrag">$HADOOP_HOME/contrib/vaidya/conf/postex_diagnosis_tests.xml</span> 
397</p>
398<ul>
399                       
400<li>Make sure HADOOP_HOME environment variable is set and Java is installed and configured.</li>
401                       
402<li>Execute the Hadoop Vaidya script with -help (or without any arguments) to get the command line help. e.g.
403                       <span class="codefrag">=&gt;sh $HADOOP_HOME/contrib/vaidya/bin/vaidya.sh -help</span>
404</li>
405                       
406<li>User needs to
407                                 supply job's configuration file (<span class="codefrag">-jobconf job_conf.xml</span>), job history log file (<span class="codefrag">-joblog job_history_log_file</span>), and optionally the test description
408                                 file (<span class="codefrag">-testconf postex_diagonostic_tests.xml</span>). If test description file is not specified then the default one is picked up from the Hadoop Vaidya Jar (<span class="codefrag">$HADOOP_HOME/contrib/vaidya/hadoop-{version}-vaidya.jar</span>).
409                                 This default test description file is also available at following location for users to make a local copy, modify and add new test rules:
410                             <span class="codefrag">$HADOOP_HOME/contrib/vaidya/conf/postex_diagnostic_tests.xml</span>
411</li>
412                       
413<li> Use <span class="codefrag">-report report_file</span> option to store the xml report into specified report_file. </li> 
414                 
415</ul>
416</div>
417       
418   
419<a name="N10102"></a><a name="How+to+Write+and+Execute+your+own+Tests"></a>
420<h2 class="h3">How to Write and Execute your own Tests</h2>
421<div class="section">
422<p>Writing and executing your own test rules is not very hard. You can take a look at Hadoop Vaidya source code for existing set of tests.
423                   The source code is at this <a href="http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/">hadoop svn repository location</a>
424                   . The default set of tests are under <span class="codefrag">"postexdiagnosis/tests/"</span> folder.</p>
425<ul>
426                 
427<li>Writing a test class for your new test case should extend the <span class="codefrag">org.apache.hadoop.vaidya.DiagnosticTest</span> class and
428                       it should override following three methods from the base class,
429              <ul> 
430                               
431<li> evaluate() </li>
432                               
433<li> getPrescription() </li> 
434                               
435<li> getReferenceDetails() </li> 
436             
437</ul>
438         
439</li>
440                 
441<li>Make a local copy of the <span class="codefrag">$HADOOP_HOME/contrib/vaidya/conf/postex_diagnostic_tests.xml</span> file or create a new test description XML file.</li>
442                 
443<li>Add the test description element for your new test case to this test description file.</li>
444                 
445<li>Compile your new test class (or multiple classes), archive them into a Jar file and add it to the CLASSPATH e.g. (<span class="codefrag">export CLASSPATH=$CLASSPATH:newtests.jar</span>)</li>
446                 
447<li>Execute the Hadoop Vaidya script with the job configuration, job history log and reference to newly created test description file using <em>--testconf</em> option.
448                  <span class="codefrag">=&gt;sh $HADOOP_HOME/contrib/vaidya/bin/vaidya.sh -joblog job_history_log_file -jobconf job.xml -testconf new_test_description_file -report report.xml</span>
449</li>
450               
451</ul>
452</div>
453       
454   
455<p> 
456</p>
457   
458<p> 
459</p>
460   
461<p>
462     
463<em>Java and JNI are trademarks or registered trademarks of
464      Sun Microsystems, Inc. in the United States and other countries.</em>
465   
466</p>
467   
468 
469</div>
470<!--+
471    |end content
472    +-->
473<div class="clearboth">&nbsp;</div>
474</div>
475<div id="footer">
476<!--+
477    |start bottomstrip
478    +-->
479<div class="lastmodified">
480<script type="text/javascript"><!--
481document.write("Last Published: " + document.lastModified);
482//  --></script>
483</div>
484<div class="copyright">
485        Copyright &copy;
486         2008 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
487</div>
488<!--+
489    |end bottomstrip
490    +-->
491</div>
492</body>
493</html>
Note: See TracBrowser for help on using the repository browser.