source: proiecte/HadoopJUnit/hadoop-0.20.1/docs/cluster_setup.html @ 120

Last change on this file since 120 was 120, checked in by (none), 14 years ago

Added the mail files for the Hadoop JUNit Project

  • Property svn:executable set to *
File size: 42.0 KB
Line 
1<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
2<html>
3<head>
4<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
5<meta content="Apache Forrest" name="Generator">
6<meta name="Forrest-version" content="0.8">
7<meta name="Forrest-skin-name" content="pelt">
8<title>Cluster Setup</title>
9<link type="text/css" href="skin/basic.css" rel="stylesheet">
10<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
11<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
12<link type="text/css" href="skin/profile.css" rel="stylesheet">
13<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
14<link rel="shortcut icon" href="images/favicon.ico">
15</head>
16<body onload="init()">
17<script type="text/javascript">ndeSetTextSize();</script>
18<div id="top">
19<!--+
20    |breadtrail
21    +-->
22<div class="breadtrail">
23<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://hadoop.apache.org/">Hadoop</a> &gt; <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
24</div>
25<!--+
26    |header
27    +-->
28<div class="header">
29<!--+
30    |start group logo
31    +-->
32<div class="grouplogo">
33<a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
34</div>
35<!--+
36    |end group logo
37    +-->
38<!--+
39    |start Project Logo
40    +-->
41<div class="projectlogo">
42<a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.gif" title="Scalable Computing Platform"></a>
43</div>
44<!--+
45    |end Project Logo
46    +-->
47<!--+
48    |start Search
49    +-->
50<div class="searchbox">
51<form action="http://www.google.com/search" method="get" class="roundtopsmall">
52<input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
53                    <input name="Search" value="Search" type="submit">
54</form>
55</div>
56<!--+
57    |end search
58    +-->
59<!--+
60    |start Tabs
61    +-->
62<ul id="tabs">
63<li>
64<a class="unselected" href="http://hadoop.apache.org/core/">Project</a>
65</li>
66<li>
67<a class="unselected" href="http://wiki.apache.org/hadoop">Wiki</a>
68</li>
69<li class="current">
70<a class="selected" href="index.html">Hadoop 0.20 Documentation</a>
71</li>
72</ul>
73<!--+
74    |end Tabs
75    +-->
76</div>
77</div>
78<div id="main">
79<div id="publishedStrip">
80<!--+
81    |start Subtabs
82    +-->
83<div id="level2tabs"></div>
84<!--+
85    |end Endtabs
86    +-->
87<script type="text/javascript"><!--
88document.write("Last Published: " + document.lastModified);
89//  --></script>
90</div>
91<!--+
92    |breadtrail
93    +-->
94<div class="breadtrail">
95
96             &nbsp;
97           </div>
98<!--+
99    |start Menu, mainarea
100    +-->
101<!--+
102    |start Menu
103    +-->
104<div id="menu">
105<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Getting Started</div>
106<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
107<div class="menuitem">
108<a href="index.html">Overview</a>
109</div>
110<div class="menuitem">
111<a href="quickstart.html">Quick Start</a>
112</div>
113<div class="menupage">
114<div class="menupagetitle">Cluster Setup</div>
115</div>
116<div class="menuitem">
117<a href="mapred_tutorial.html">Map/Reduce Tutorial</a>
118</div>
119</div>
120<div onclick="SwitchMenu('menu_1.2', 'skin/')" id="menu_1.2Title" class="menutitle">Programming Guides</div>
121<div id="menu_1.2" class="menuitemgroup">
122<div class="menuitem">
123<a href="commands_manual.html">Commands</a>
124</div>
125<div class="menuitem">
126<a href="distcp.html">DistCp</a>
127</div>
128<div class="menuitem">
129<a href="native_libraries.html">Native Libraries</a>
130</div>
131<div class="menuitem">
132<a href="streaming.html">Streaming</a>
133</div>
134<div class="menuitem">
135<a href="fair_scheduler.html">Fair Scheduler</a>
136</div>
137<div class="menuitem">
138<a href="capacity_scheduler.html">Capacity Scheduler</a>
139</div>
140<div class="menuitem">
141<a href="service_level_auth.html">Service Level Authorization</a>
142</div>
143<div class="menuitem">
144<a href="vaidya.html">Vaidya</a>
145</div>
146<div class="menuitem">
147<a href="hadoop_archives.html">Archives</a>
148</div>
149</div>
150<div onclick="SwitchMenu('menu_1.3', 'skin/')" id="menu_1.3Title" class="menutitle">HDFS</div>
151<div id="menu_1.3" class="menuitemgroup">
152<div class="menuitem">
153<a href="hdfs_user_guide.html">User Guide</a>
154</div>
155<div class="menuitem">
156<a href="hdfs_design.html">Architecture</a>
157</div>
158<div class="menuitem">
159<a href="hdfs_shell.html">File System Shell Guide</a>
160</div>
161<div class="menuitem">
162<a href="hdfs_permissions_guide.html">Permissions Guide</a>
163</div>
164<div class="menuitem">
165<a href="hdfs_quota_admin_guide.html">Quotas Guide</a>
166</div>
167<div class="menuitem">
168<a href="SLG_user_guide.html">Synthetic Load Generator Guide</a>
169</div>
170<div class="menuitem">
171<a href="libhdfs.html">C API libhdfs</a>
172</div>
173</div>
174<div onclick="SwitchMenu('menu_1.4', 'skin/')" id="menu_1.4Title" class="menutitle">HOD</div>
175<div id="menu_1.4" class="menuitemgroup">
176<div class="menuitem">
177<a href="hod_user_guide.html">User Guide</a>
178</div>
179<div class="menuitem">
180<a href="hod_admin_guide.html">Admin Guide</a>
181</div>
182<div class="menuitem">
183<a href="hod_config_guide.html">Config Guide</a>
184</div>
185</div>
186<div onclick="SwitchMenu('menu_1.5', 'skin/')" id="menu_1.5Title" class="menutitle">Miscellaneous</div>
187<div id="menu_1.5" class="menuitemgroup">
188<div class="menuitem">
189<a href="api/index.html">API Docs</a>
190</div>
191<div class="menuitem">
192<a href="jdiff/changes.html">API Changes</a>
193</div>
194<div class="menuitem">
195<a href="http://wiki.apache.org/hadoop/">Wiki</a>
196</div>
197<div class="menuitem">
198<a href="http://wiki.apache.org/hadoop/FAQ">FAQ</a>
199</div>
200<div class="menuitem">
201<a href="releasenotes.html">Release Notes</a>
202</div>
203<div class="menuitem">
204<a href="changes.html">Change Log</a>
205</div>
206</div>
207<div id="credit"></div>
208<div id="roundbottom">
209<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
210<!--+
211  |alternative credits
212  +-->
213<div id="credit2"></div>
214</div>
215<!--+
216    |end Menu
217    +-->
218<!--+
219    |start content
220    +-->
221<div id="content">
222<div title="Portable Document Format" class="pdflink">
223<a class="dida" href="cluster_setup.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
224        PDF</a>
225</div>
226<h1>Cluster Setup</h1>
227<div id="minitoc-area">
228<ul class="minitoc">
229<li>
230<a href="#Purpose">Purpose</a>
231</li>
232<li>
233<a href="#Pre-requisites">Pre-requisites</a>
234</li>
235<li>
236<a href="#Installation">Installation</a>
237</li>
238<li>
239<a href="#Configuration">Configuration</a>
240<ul class="minitoc">
241<li>
242<a href="#Configuration+Files">Configuration Files</a>
243</li>
244<li>
245<a href="#Site+Configuration">Site Configuration</a>
246<ul class="minitoc">
247<li>
248<a href="#Configuring+the+Environment+of+the+Hadoop+Daemons">Configuring the Environment of the Hadoop Daemons</a>
249</li>
250<li>
251<a href="#Configuring+the+Hadoop+Daemons">Configuring the Hadoop Daemons</a>
252</li>
253<li>
254<a href="#Memory+monitoring"> Memory monitoring</a>
255</li>
256<li>
257<a href="#Slaves">Slaves</a>
258</li>
259<li>
260<a href="#Logging">Logging</a>
261</li>
262</ul>
263</li>
264</ul>
265</li>
266<li>
267<a href="#Cluster+Restartability">Cluster Restartability</a>
268<ul class="minitoc">
269<li>
270<a href="#Map%2FReduce">Map/Reduce</a>
271</li>
272</ul>
273</li>
274<li>
275<a href="#Hadoop+Rack+Awareness">Hadoop Rack Awareness</a>
276</li>
277<li>
278<a href="#Hadoop+Startup">Hadoop Startup</a>
279</li>
280<li>
281<a href="#Hadoop+Shutdown">Hadoop Shutdown</a>
282</li>
283</ul>
284</div>
285 
286   
287<a name="N1000D"></a><a name="Purpose"></a>
288<h2 class="h3">Purpose</h2>
289<div class="section">
290<p>This document describes how to install, configure and manage non-trivial
291      Hadoop clusters ranging from a few nodes to extremely large clusters with
292      thousands of nodes.</p>
293<p>
294      To play with Hadoop, you may first want to install Hadoop on a single machine (see <a href="quickstart.html"> Hadoop Quick Start</a>).
295      </p>
296</div>
297   
298   
299<a name="N1001E"></a><a name="Pre-requisites"></a>
300<h2 class="h3">Pre-requisites</h2>
301<div class="section">
302<ol>
303       
304<li>
305          Make sure all <a href="quickstart.html#PreReqs">requisite</a> software
306          is installed on all nodes in your cluster.
307        </li>
308       
309<li>
310         
311<a href="quickstart.html#Download">Get</a> the Hadoop software.
312        </li>
313     
314</ol>
315</div>
316   
317   
318<a name="N10036"></a><a name="Installation"></a>
319<h2 class="h3">Installation</h2>
320<div class="section">
321<p>Installing a Hadoop cluster typically involves unpacking the software
322      on all the machines in the cluster.</p>
323<p>Typically one machine in the cluster is designated as the
324      <span class="codefrag">NameNode</span> and another machine the as <span class="codefrag">JobTracker</span>,
325      exclusively. These are the <em>masters</em>. The rest of the machines in
326      the cluster act as both <span class="codefrag">DataNode</span> <em>and</em> 
327      <span class="codefrag">TaskTracker</span>. These are the <em>slaves</em>.</p>
328<p>The root of the distribution is referred to as
329      <span class="codefrag">HADOOP_HOME</span>. All machines in the cluster usually have the same
330      <span class="codefrag">HADOOP_HOME</span> path.</p>
331</div>
332   
333   
334<a name="N10061"></a><a name="Configuration"></a>
335<h2 class="h3">Configuration</h2>
336<div class="section">
337<p>The following sections describe how to configure a Hadoop cluster.</p>
338<a name="N1006A"></a><a name="Configuration+Files"></a>
339<h3 class="h4">Configuration Files</h3>
340<p>Hadoop configuration is driven by two types of important
341        configuration files:</p>
342<ol>
343         
344<li>
345            Read-only default configuration -
346            <a href="http://hadoop.apache.org/core/docs/current/core-default.html">src/core/core-default.xml</a>,
347            <a href="http://hadoop.apache.org/core/docs/current/hdfs-default.html">src/hdfs/hdfs-default.xml</a> and
348            <a href="http://hadoop.apache.org/core/docs/current/mapred-default.html">src/mapred/mapred-default.xml</a>.
349          </li>
350         
351<li>
352            Site-specific configuration -
353            <em>conf/core-site.xml</em>,
354            <em>conf/hdfs-site.xml</em> and
355            <em>conf/mapred-site.xml</em>.
356          </li>
357       
358</ol>
359<p>To learn more about how the Hadoop framework is controlled by these
360        configuration files, look
361        <a href="api/org/apache/hadoop/conf/Configuration.html">here</a>.</p>
362<p>Additionally, you can control the Hadoop scripts found in the
363        <span class="codefrag">bin/</span> directory of the distribution, by setting site-specific
364        values via the <span class="codefrag">conf/hadoop-env.sh</span>.</p>
365<a name="N100A2"></a><a name="Site+Configuration"></a>
366<h3 class="h4">Site Configuration</h3>
367<p>To configure the Hadoop cluster you will need to configure the
368        <em>environment</em> in which the Hadoop daemons execute as well as
369        the <em>configuration parameters</em> for the Hadoop daemons.</p>
370<p>The Hadoop daemons are <span class="codefrag">NameNode</span>/<span class="codefrag">DataNode</span> 
371        and <span class="codefrag">JobTracker</span>/<span class="codefrag">TaskTracker</span>.</p>
372<a name="N100C0"></a><a name="Configuring+the+Environment+of+the+Hadoop+Daemons"></a>
373<h4>Configuring the Environment of the Hadoop Daemons</h4>
374<p>Administrators should use the <span class="codefrag">conf/hadoop-env.sh</span> script
375          to do site-specific customization of the Hadoop daemons' process
376          environment.</p>
377<p>At the very least you should specify the
378          <span class="codefrag">JAVA_HOME</span> so that it is correctly defined on each
379          remote node.</p>
380<p>Administrators can configure individual daemons using the
381          configuration options <span class="codefrag">HADOOP_*_OPTS</span>. Various options
382          available are shown below in the table. </p>
383<table class="ForrestTable" cellspacing="1" cellpadding="4">
384         
385<tr>
386<th colspan="1" rowspan="1">Daemon</th><th colspan="1" rowspan="1">Configure Options</th>
387</tr>
388         
389<tr>
390<td colspan="1" rowspan="1">NameNode</td><td colspan="1" rowspan="1">HADOOP_NAMENODE_OPTS</td>
391</tr>
392         
393<tr>
394<td colspan="1" rowspan="1">DataNode</td><td colspan="1" rowspan="1">HADOOP_DATANODE_OPTS</td>
395</tr>
396         
397<tr>
398<td colspan="1" rowspan="1">SecondaryNamenode</td>
399              <td colspan="1" rowspan="1">HADOOP_SECONDARYNAMENODE_OPTS</td>
400</tr>
401         
402<tr>
403<td colspan="1" rowspan="1">JobTracker</td><td colspan="1" rowspan="1">HADOOP_JOBTRACKER_OPTS</td>
404</tr>
405         
406<tr>
407<td colspan="1" rowspan="1">TaskTracker</td><td colspan="1" rowspan="1">HADOOP_TASKTRACKER_OPTS</td>
408</tr>
409         
410</table>
411<p> For example, To configure Namenode to use parallelGC, the
412          following statement should be added in <span class="codefrag">hadoop-env.sh</span> :
413          <br>
414<span class="codefrag">
415          export HADOOP_NAMENODE_OPTS="-XX:+UseParallelGC ${HADOOP_NAMENODE_OPTS}"
416          </span>
417<br>
418</p>
419<p>Other useful configuration parameters that you can customize
420          include:</p>
421<ul>
422           
423<li>
424             
425<span class="codefrag">HADOOP_LOG_DIR</span> - The directory where the daemons'
426              log files are stored. They are automatically created if they don't
427              exist.
428            </li>
429           
430<li>
431             
432<span class="codefrag">HADOOP_HEAPSIZE</span> - The maximum amount of heapsize
433              to use, in MB e.g. <span class="codefrag">1000MB</span>. This is used to
434              configure the heap size for the hadoop daemon. By default,
435              the value is <span class="codefrag">1000MB</span>.
436            </li>
437         
438</ul>
439<a name="N1013B"></a><a name="Configuring+the+Hadoop+Daemons"></a>
440<h4>Configuring the Hadoop Daemons</h4>
441<p>This section deals with important parameters to be specified in the
442          following:
443          <br>
444         
445<span class="codefrag">conf/core-site.xml</span>:</p>
446<table class="ForrestTable" cellspacing="1" cellpadding="4">
447                   
448<tr>
449                     
450<th colspan="1" rowspan="1">Parameter</th>
451                      <th colspan="1" rowspan="1">Value</th> 
452                      <th colspan="1" rowspan="1">Notes</th>
453                   
454</tr>
455                   
456<tr>
457             
458<td colspan="1" rowspan="1">fs.default.name</td>
459              <td colspan="1" rowspan="1">URI of <span class="codefrag">NameNode</span>.</td>
460              <td colspan="1" rowspan="1"><em>hdfs://hostname/</em></td>
461           
462</tr>
463         
464</table>
465<p>
466<br>
467<span class="codefrag">conf/hdfs-site.xml</span>:</p>
468<table class="ForrestTable" cellspacing="1" cellpadding="4">   
469       
470<tr>
471         
472<th colspan="1" rowspan="1">Parameter</th>
473          <th colspan="1" rowspan="1">Value</th> 
474          <th colspan="1" rowspan="1">Notes</th>
475       
476</tr>
477                   
478<tr>
479                     
480<td colspan="1" rowspan="1">dfs.name.dir</td>
481                      <td colspan="1" rowspan="1">
482                        Path on the local filesystem where the <span class="codefrag">NameNode</span> 
483                        stores the namespace and transactions logs persistently.</td>
484                      <td colspan="1" rowspan="1">
485                        If this is a comma-delimited list of directories then the name
486                        table is replicated in all of the directories, for redundancy.
487                      </td>
488                   
489</tr>
490                   
491<tr>
492                     
493<td colspan="1" rowspan="1">dfs.data.dir</td>
494                      <td colspan="1" rowspan="1">
495                        Comma separated list of paths on the local filesystem of a
496                        <span class="codefrag">DataNode</span> where it should store its blocks.
497                      </td>
498                      <td colspan="1" rowspan="1">
499                        If this is a comma-delimited list of directories, then data will
500                        be stored in all named directories, typically on different
501                        devices.
502                      </td>
503                   
504</tr>
505     
506</table>
507<p>
508<br>
509<span class="codefrag">conf/mapred-site.xml</span>:</p>
510<table class="ForrestTable" cellspacing="1" cellpadding="4">
511         
512<tr>
513         
514<th colspan="1" rowspan="1">Parameter</th>
515          <th colspan="1" rowspan="1">Value</th> 
516          <th colspan="1" rowspan="1">Notes</th>
517       
518</tr>
519       
520<tr>
521         
522<td colspan="1" rowspan="1">mapred.job.tracker</td>
523          <td colspan="1" rowspan="1">Host or IP and port of <span class="codefrag">JobTracker</span>.</td>
524          <td colspan="1" rowspan="1"><em>host:port</em> pair.</td>
525       
526</tr>
527                   
528<tr>
529                     
530<td colspan="1" rowspan="1">mapred.system.dir</td>
531                      <td colspan="1" rowspan="1">
532                        Path on the HDFS where where the Map/Reduce framework stores
533                        system files e.g. <span class="codefrag">/hadoop/mapred/system/</span>.
534                      </td>
535                      <td colspan="1" rowspan="1">
536                        This is in the default filesystem (HDFS) and must be accessible
537                        from both the server and client machines.
538                      </td>
539                   
540</tr>
541                   
542<tr>
543                     
544<td colspan="1" rowspan="1">mapred.local.dir</td>
545                      <td colspan="1" rowspan="1">
546                        Comma-separated list of paths on the local filesystem where
547                        temporary Map/Reduce data is written.
548                      </td>
549                      <td colspan="1" rowspan="1">Multiple paths help spread disk i/o.</td>
550                   
551</tr>
552                   
553<tr>
554                     
555<td colspan="1" rowspan="1">mapred.tasktracker.{map|reduce}.tasks.maximum</td>
556                      <td colspan="1" rowspan="1">
557                        The maximum number of Map/Reduce tasks, which are run
558                        simultaneously on a given <span class="codefrag">TaskTracker</span>, individually.
559                      </td>
560                      <td colspan="1" rowspan="1">
561                        Defaults to 2 (2 maps and 2 reduces), but vary it depending on
562                        your hardware.
563                      </td>
564                   
565</tr>
566                   
567<tr>
568                     
569<td colspan="1" rowspan="1">dfs.hosts/dfs.hosts.exclude</td>
570                      <td colspan="1" rowspan="1">List of permitted/excluded DataNodes.</td>
571                      <td colspan="1" rowspan="1">
572                        If necessary, use these files to control the list of allowable
573                        datanodes.
574                      </td>
575                   
576</tr>
577                   
578<tr>
579                     
580<td colspan="1" rowspan="1">mapred.hosts/mapred.hosts.exclude</td>
581                      <td colspan="1" rowspan="1">List of permitted/excluded TaskTrackers.</td>
582                      <td colspan="1" rowspan="1">
583                        If necessary, use these files to control the list of allowable
584                        TaskTrackers.
585                      </td>
586                   
587</tr>
588       
589<tr>
590         
591<td colspan="1" rowspan="1">mapred.queue.names</td>
592          <td colspan="1" rowspan="1">Comma separated list of queues to which jobs can be submitted.</td>
593          <td colspan="1" rowspan="1">
594            The Map/Reduce system always supports atleast one queue
595            with the name as <em>default</em>. Hence, this parameter's
596            value should always contain the string <em>default</em>.
597            Some job schedulers supported in Hadoop, like the
598            <a href="capacity_scheduler.html">Capacity
599            Scheduler</a>, support multiple queues. If such a scheduler is
600            being used, the list of configured queue names must be
601            specified here. Once queues are defined, users can submit
602            jobs to a queue using the property name
603            <em>mapred.job.queue.name</em> in the job configuration.
604            There could be a separate
605            configuration file for configuring properties of these
606            queues that is managed by the scheduler.
607            Refer to the documentation of the scheduler for information on
608            the same.
609          </td>
610       
611</tr>
612       
613<tr>
614         
615<td colspan="1" rowspan="1">mapred.acls.enabled</td>
616          <td colspan="1" rowspan="1">Specifies whether ACLs are supported for controlling job
617              submission and administration</td>
618          <td colspan="1" rowspan="1">
619            If <em>true</em>, ACLs would be checked while submitting
620            and administering jobs. ACLs can be specified using the
621            configuration parameters of the form
622            <em>mapred.queue.queue-name.acl-name</em>, defined below.
623          </td>
624       
625</tr>
626       
627<tr>
628         
629<td colspan="1" rowspan="1">mapred.queue.<em>queue-name</em>.acl-submit-job</td>
630          <td colspan="1" rowspan="1">List of users and groups that can submit jobs to the
631              specified <em>queue-name</em>.</td>
632          <td colspan="1" rowspan="1">
633            The list of users and groups are both comma separated
634            list of names. The two lists are separated by a blank.
635            Example: <em>user1,user2 group1,group2</em>.
636            If you wish to define only a list of groups, provide
637            a blank at the beginning of the value.
638          </td>
639       
640</tr>
641       
642<tr>
643         
644<td colspan="1" rowspan="1">mapred.queue.<em>queue-name</em>.acl-administer-job</td>
645          <td colspan="1" rowspan="1">List of users and groups that can change the priority
646              or kill jobs that have been submitted to the
647              specified <em>queue-name</em>.</td>
648          <td colspan="1" rowspan="1">
649            The list of users and groups are both comma separated
650            list of names. The two lists are separated by a blank.
651            Example: <em>user1,user2 group1,group2</em>.
652            If you wish to define only a list of groups, provide
653            a blank at the beginning of the value. Note that an
654            owner of a job can always change the priority or kill
655            his/her own job, irrespective of the ACLs.
656          </td>
657       
658</tr>
659                 
660</table>
661<p>Typically all the above parameters are marked as
662          <a href="api/org/apache/hadoop/conf/Configuration.html#FinalParams">
663          final</a> to ensure that they cannot be overriden by user-applications.
664          </p>
665<a name="N102BF"></a><a name="Real-World+Cluster+Configurations"></a>
666<h5>Real-World Cluster Configurations</h5>
667<p>This section lists some non-default configuration parameters which
668            have been used to run the <em>sort</em> benchmark on very large
669            clusters.</p>
670<ul>
671             
672<li>
673               
674<p>Some non-default configuration values used to run sort900,
675                that is 9TB of data sorted on a cluster with 900 nodes:</p>
676               
677<table class="ForrestTable" cellspacing="1" cellpadding="4">
678                         
679<tr>
680               
681<th colspan="1" rowspan="1">Configuration File</th>
682                            <th colspan="1" rowspan="1">Parameter</th>
683                            <th colspan="1" rowspan="1">Value</th> 
684                            <th colspan="1" rowspan="1">Notes</th>
685                         
686</tr>
687                 
688<tr>
689                   
690<td colspan="1" rowspan="1">conf/hdfs-site.xml</td>
691                    <td colspan="1" rowspan="1">dfs.block.size</td>
692                    <td colspan="1" rowspan="1">134217728</td>
693                    <td colspan="1" rowspan="1">HDFS blocksize of 128MB for large file-systems.</td>
694                 
695</tr>
696                 
697<tr>
698                   
699<td colspan="1" rowspan="1">conf/hdfs-site.xml</td>
700                    <td colspan="1" rowspan="1">dfs.namenode.handler.count</td>
701                    <td colspan="1" rowspan="1">40</td>
702                    <td colspan="1" rowspan="1">
703                      More NameNode server threads to handle RPCs from large
704                      number of DataNodes.
705                    </td>
706                 
707</tr>
708                 
709<tr>
710                   
711<td colspan="1" rowspan="1">conf/mapred-site.xml</td>
712                    <td colspan="1" rowspan="1">mapred.reduce.parallel.copies</td>
713                    <td colspan="1" rowspan="1">20</td>
714                    <td colspan="1" rowspan="1">
715                      Higher number of parallel copies run by reduces to fetch
716                      outputs from very large number of maps.
717                    </td>
718                 
719</tr>
720                 
721<tr>
722                   
723<td colspan="1" rowspan="1">conf/mapred-site.xml</td>
724                    <td colspan="1" rowspan="1">mapred.child.java.opts</td>
725                    <td colspan="1" rowspan="1">-Xmx512M</td>
726                    <td colspan="1" rowspan="1">
727                      Larger heap-size for child jvms of maps/reduces.
728                    </td>
729                 
730</tr>
731                 
732<tr>
733                   
734<td colspan="1" rowspan="1">conf/core-site.xml</td>
735                    <td colspan="1" rowspan="1">fs.inmemory.size.mb</td>
736                    <td colspan="1" rowspan="1">200</td>
737                    <td colspan="1" rowspan="1">
738                      Larger amount of memory allocated for the in-memory
739                      file-system used to merge map-outputs at the reduces.
740                    </td>
741                 
742</tr>
743                 
744<tr>
745                   
746<td colspan="1" rowspan="1">conf/core-site.xml</td>
747                    <td colspan="1" rowspan="1">io.sort.factor</td>
748                    <td colspan="1" rowspan="1">100</td>
749                    <td colspan="1" rowspan="1">More streams merged at once while sorting files.</td>
750                 
751</tr>
752                 
753<tr>
754                   
755<td colspan="1" rowspan="1">conf/core-site.xml</td>
756                    <td colspan="1" rowspan="1">io.sort.mb</td>
757                    <td colspan="1" rowspan="1">200</td>
758                    <td colspan="1" rowspan="1">Higher memory-limit while sorting data.</td>
759                 
760</tr>
761                 
762<tr>
763                   
764<td colspan="1" rowspan="1">conf/core-site.xml</td>
765                    <td colspan="1" rowspan="1">io.file.buffer.size</td>
766                    <td colspan="1" rowspan="1">131072</td>
767                    <td colspan="1" rowspan="1">Size of read/write buffer used in SequenceFiles.</td>
768                 
769</tr>
770               
771</table>
772             
773</li>
774             
775<li>
776               
777<p>Updates to some configuration values to run sort1400 and
778                sort2000, that is 14TB of data sorted on 1400 nodes and 20TB of
779                data sorted on 2000 nodes:</p>
780               
781<table class="ForrestTable" cellspacing="1" cellpadding="4">
782                         
783<tr>
784               
785<th colspan="1" rowspan="1">Configuration File</th>
786                            <th colspan="1" rowspan="1">Parameter</th>
787                            <th colspan="1" rowspan="1">Value</th> 
788                            <th colspan="1" rowspan="1">Notes</th>
789                         
790</tr>
791                 
792<tr>
793                   
794<td colspan="1" rowspan="1">conf/mapred-site.xml</td>
795                    <td colspan="1" rowspan="1">mapred.job.tracker.handler.count</td>
796                    <td colspan="1" rowspan="1">60</td>
797                    <td colspan="1" rowspan="1">
798                      More JobTracker server threads to handle RPCs from large
799                      number of TaskTrackers.
800                    </td>
801                 
802</tr>
803                 
804<tr>
805                   
806<td colspan="1" rowspan="1">conf/mapred-site.xml</td>
807                    <td colspan="1" rowspan="1">mapred.reduce.parallel.copies</td>
808                    <td colspan="1" rowspan="1">50</td>
809                    <td colspan="1" rowspan="1"></td>
810                 
811</tr>
812                 
813<tr>
814                   
815<td colspan="1" rowspan="1">conf/mapred-site.xml</td>
816                    <td colspan="1" rowspan="1">tasktracker.http.threads</td>
817                    <td colspan="1" rowspan="1">50</td>
818                    <td colspan="1" rowspan="1">
819                      More worker threads for the TaskTracker's http server. The
820                      http server is used by reduces to fetch intermediate
821                      map-outputs.
822                    </td>
823                 
824</tr>
825                 
826<tr>
827                   
828<td colspan="1" rowspan="1">conf/mapred-site.xml</td>
829                    <td colspan="1" rowspan="1">mapred.child.java.opts</td>
830                    <td colspan="1" rowspan="1">-Xmx1024M</td>
831                    <td colspan="1" rowspan="1">Larger heap-size for child jvms of maps/reduces.</td>
832                 
833</tr>
834               
835</table>
836             
837</li>
838           
839</ul>
840<a name="N10423"></a><a name="Memory+monitoring"></a>
841<h4> Memory monitoring</h4>
842<p>A <span class="codefrag">TaskTracker</span>(TT) can be configured to monitor memory
843        usage of tasks it spawns, so that badly-behaved jobs do not bring
844        down a machine due to excess memory consumption. With monitoring
845        enabled, every task is assigned a task-limit for virtual memory (VMEM).
846        In addition, every node is assigned a node-limit for VMEM usage.
847        A TT ensures that a task is killed if it, and
848        its descendants, use VMEM over the task's per-task limit. It also
849        ensures that one or more tasks are killed if the sum total of VMEM
850        usage by all tasks, and their descendents, cross the node-limit.</p>
851<p>Users can, optionally, specify the VMEM task-limit per job. If no
852        such limit is provided, a default limit is used. A node-limit can be
853        set per node.</p>
854<p>Currently the memory monitoring and management is only supported
855        in Linux platform.</p>
856<p>To enable monitoring for a TT, the
857        following parameters all need to be set:</p>
858<table class="ForrestTable" cellspacing="1" cellpadding="4">
859         
860<tr>
861<th colspan="1" rowspan="1">Name</th><th colspan="1" rowspan="1">Type</th><th colspan="1" rowspan="1">Description</th>
862</tr>
863         
864<tr>
865<td colspan="1" rowspan="1">mapred.tasktracker.vmem.reserved</td><td colspan="1" rowspan="1">long</td>
866            <td colspan="1" rowspan="1">A number, in bytes, that represents an offset. The total VMEM on
867            the machine, minus this offset, is the VMEM node-limit for all
868            tasks, and their descendants, spawned by the TT.
869          </td>
870</tr>
871         
872<tr>
873<td colspan="1" rowspan="1">mapred.task.default.maxvmem</td><td colspan="1" rowspan="1">long</td>
874            <td colspan="1" rowspan="1">A number, in bytes, that represents the default VMEM task-limit
875            associated with a task. Unless overridden by a job's setting,
876            this number defines the VMEM task-limit.   
877          </td>
878</tr>
879         
880<tr>
881<td colspan="1" rowspan="1">mapred.task.limit.maxvmem</td><td colspan="1" rowspan="1">long</td>
882            <td colspan="1" rowspan="1">A number, in bytes, that represents the upper VMEM task-limit
883            associated with a task. Users, when specifying a VMEM task-limit
884            for their tasks, should not specify a limit which exceeds this amount.
885          </td>
886</tr>
887       
888</table>
889<p>In addition, the following parameters can also be configured.</p>
890<table class="ForrestTable" cellspacing="1" cellpadding="4">
891         
892<tr>
893<th colspan="1" rowspan="1">Name</th><th colspan="1" rowspan="1">Type</th><th colspan="1" rowspan="1">Description</th>
894</tr>
895         
896<tr>
897<td colspan="1" rowspan="1">mapred.tasktracker.taskmemorymanager.monitoring-interval</td>
898            <td colspan="1" rowspan="1">long</td>
899            <td colspan="1" rowspan="1">The time interval, in milliseconds, between which the TT
900            checks for any memory violation. The default value is 5000 msec
901            (5 seconds).
902          </td>
903</tr>
904       
905</table>
906<p>Here's how the memory monitoring works for a TT.</p>
907<ol>
908         
909<li>If one or more of the configuration parameters described
910          above are missing or -1 is specified , memory monitoring is
911          disabled for the TT.
912          </li>
913         
914<li>In addition, monitoring is disabled if
915          <span class="codefrag">mapred.task.default.maxvmem</span> is greater than
916          <span class="codefrag">mapred.task.limit.maxvmem</span>.
917          </li>
918         
919<li>If a TT receives a task whose task-limit is set by the user
920          to a value larger than <span class="codefrag">mapred.task.limit.maxvmem</span>, it
921          logs a warning but executes the task.
922          </li> 
923         
924<li>Periodically, the TT checks the following:
925          <ul>
926           
927<li>If any task's current VMEM usage is greater than that task's
928            VMEM task-limit, the task is killed and reason for killing
929            the task is logged in task diagonistics . Such a task is considered
930            failed, i.e., the killing counts towards the task's failure count.
931            </li> 
932           
933<li>If the sum total of VMEM used by all tasks and descendants is
934            greater than the node-limit, the TT kills enough tasks, in the
935            order of least progress made, till the overall VMEM usage falls
936            below the node-limt. Such killed tasks are not considered failed
937            and their killing does not count towards the tasks' failure counts.
938            </li>
939         
940</ul>
941         
942</li>
943       
944</ol>
945<p>Schedulers can choose to ease the monitoring pressure on the TT by
946        preventing too many tasks from running on a node and by scheduling
947        tasks only if the TT has enough VMEM free. In addition, Schedulers may
948        choose to consider the physical memory (RAM) available on the node
949        as well. To enable Scheduler support, TTs report their memory settings
950        to the JobTracker in every heartbeat. Before getting into details,
951        consider the following additional memory-related parameters than can be
952        configured to enable better scheduling:</p>
953<table class="ForrestTable" cellspacing="1" cellpadding="4">
954         
955<tr>
956<th colspan="1" rowspan="1">Name</th><th colspan="1" rowspan="1">Type</th><th colspan="1" rowspan="1">Description</th>
957</tr>
958         
959<tr>
960<td colspan="1" rowspan="1">mapred.tasktracker.pmem.reserved</td><td colspan="1" rowspan="1">int</td>
961            <td colspan="1" rowspan="1">A number, in bytes, that represents an offset. The total
962            physical memory (RAM) on the machine, minus this offset, is the
963            recommended RAM node-limit. The RAM node-limit is a hint to a
964            Scheduler to scheduler only so many tasks such that the sum
965            total of their RAM requirements does not exceed this limit.
966            RAM usage is not monitored by a TT.   
967          </td>
968</tr>
969       
970</table>
971<p>A TT reports the following memory-related numbers in every
972        heartbeat:</p>
973<ul>
974         
975<li>The total VMEM available on the node.</li>
976         
977<li>The value of <span class="codefrag">mapred.tasktracker.vmem.reserved</span>,
978           if set.</li>
979         
980<li>The total RAM available on the node.</li> 
981         
982<li>The value of <span class="codefrag">mapred.tasktracker.pmem.reserved</span>,
983           if set.</li>
984         
985</ul>
986<a name="N104FA"></a><a name="Slaves"></a>
987<h4>Slaves</h4>
988<p>Typically you choose one machine in the cluster to act as the
989          <span class="codefrag">NameNode</span> and one machine as to act as the
990          <span class="codefrag">JobTracker</span>, exclusively. The rest of the machines act as
991          both a <span class="codefrag">DataNode</span> and <span class="codefrag">TaskTracker</span> and are
992          referred to as <em>slaves</em>.</p>
993<p>List all slave hostnames or IP addresses in your
994          <span class="codefrag">conf/slaves</span> file, one per line.</p>
995<a name="N10519"></a><a name="Logging"></a>
996<h4>Logging</h4>
997<p>Hadoop uses the <a href="http://logging.apache.org/log4j/">Apache
998          log4j</a> via the <a href="http://commons.apache.org/logging/">Apache
999          Commons Logging</a> framework for logging. Edit the
1000          <span class="codefrag">conf/log4j.properties</span> file to customize the Hadoop
1001          daemons' logging configuration (log-formats and so on).</p>
1002<a name="N1052D"></a><a name="History+Logging"></a>
1003<h5>History Logging</h5>
1004<p> The job history files are stored in central location
1005            <span class="codefrag"> hadoop.job.history.location </span> which can be on DFS also,
1006            whose default value is <span class="codefrag">${HADOOP_LOG_DIR}/history</span>.
1007            The history web UI is accessible from job tracker web UI.</p>
1008<p> The history files are also logged to user specified directory
1009            <span class="codefrag">hadoop.job.history.user.location</span> 
1010            which defaults to job output directory. The files are stored in
1011            "_logs/history/" in the specified directory. Hence, by default
1012            they will be in "mapred.output.dir/_logs/history/". User can stop
1013            logging by giving the value <span class="codefrag">none</span> for
1014            <span class="codefrag">hadoop.job.history.user.location</span> 
1015</p>
1016<p> User can view the history logs summary in specified directory
1017            using the following command <br>
1018           
1019<span class="codefrag">$ bin/hadoop job -history output-dir</span>
1020<br> 
1021            This command will print job details, failed and killed tip
1022            details. <br>
1023            More details about the job such as successful tasks and
1024            task attempts made for each task can be viewed using the 
1025            following command <br>
1026           
1027<span class="codefrag">$ bin/hadoop job -history all output-dir</span>
1028<br>
1029</p>
1030<p>Once all the necessary configuration is complete, distribute the files
1031      to the <span class="codefrag">HADOOP_CONF_DIR</span> directory on all the machines,
1032      typically <span class="codefrag">${HADOOP_HOME}/conf</span>.</p>
1033</div>
1034   
1035<a name="N10565"></a><a name="Cluster+Restartability"></a>
1036<h2 class="h3">Cluster Restartability</h2>
1037<div class="section">
1038<a name="N1056B"></a><a name="Map%2FReduce"></a>
1039<h3 class="h4">Map/Reduce</h3>
1040<p>The job tracker restart can recover running jobs if
1041        <span class="codefrag">mapred.jobtracker.restart.recover</span> is set true and
1042        <a href="#Logging">JobHistory logging</a> is enabled. Also
1043        <span class="codefrag">mapred.jobtracker.job.history.block.size</span> value should be
1044        set to an optimal value to dump job history to disk as soon as
1045        possible, the typical value is 3145728(3MB).</p>
1046</div>
1047   
1048   
1049<a name="N10580"></a><a name="Hadoop+Rack+Awareness"></a>
1050<h2 class="h3">Hadoop Rack Awareness</h2>
1051<div class="section">
1052<p>The HDFS and the Map/Reduce components are rack-aware.</p>
1053<p>The <span class="codefrag">NameNode</span> and the <span class="codefrag">JobTracker</span> obtains the
1054      <span class="codefrag">rack id</span> of the slaves in the cluster by invoking an API
1055      <a href="api/org/apache/hadoop/net/DNSToSwitchMapping.html#resolve(java.util.List)">resolve</a> in an administrator configured
1056      module. The API resolves the slave's DNS name (also IP address) to a
1057      rack id. What module to use can be configured using the configuration
1058      item <span class="codefrag">topology.node.switch.mapping.impl</span>. The default
1059      implementation of the same runs a script/command configured using
1060      <span class="codefrag">topology.script.file.name</span>. If topology.script.file.name is
1061      not set, the rack id <span class="codefrag">/default-rack</span> is returned for any
1062      passed IP address. The additional configuration in the Map/Reduce
1063      part is <span class="codefrag">mapred.cache.task.levels</span> which determines the number
1064      of levels (in the network topology) of caches. So, for example, if it is
1065      the default value of 2, two levels of caches will be constructed -
1066      one for hosts (host -&gt; task mapping) and another for racks
1067      (rack -&gt; task mapping).
1068      </p>
1069</div>
1070   
1071   
1072<a name="N105A6"></a><a name="Hadoop+Startup"></a>
1073<h2 class="h3">Hadoop Startup</h2>
1074<div class="section">
1075<p>To start a Hadoop cluster you will need to start both the HDFS and
1076      Map/Reduce cluster.</p>
1077<p>
1078        Format a new distributed filesystem:<br>
1079       
1080<span class="codefrag">$ bin/hadoop namenode -format</span>
1081     
1082</p>
1083<p>
1084        Start the HDFS with the following command, run on the designated
1085        <span class="codefrag">NameNode</span>:<br>
1086       
1087<span class="codefrag">$ bin/start-dfs.sh</span>
1088     
1089</p>
1090<p>The <span class="codefrag">bin/start-dfs.sh</span> script also consults the
1091      <span class="codefrag">${HADOOP_CONF_DIR}/slaves</span> file on the <span class="codefrag">NameNode</span> 
1092      and starts the <span class="codefrag">DataNode</span> daemon on all the listed slaves.</p>
1093<p>
1094        Start Map-Reduce with the following command, run on the designated
1095        <span class="codefrag">JobTracker</span>:<br>
1096       
1097<span class="codefrag">$ bin/start-mapred.sh</span>
1098     
1099</p>
1100<p>The <span class="codefrag">bin/start-mapred.sh</span> script also consults the
1101      <span class="codefrag">${HADOOP_CONF_DIR}/slaves</span> file on the <span class="codefrag">JobTracker</span> 
1102      and starts the <span class="codefrag">TaskTracker</span> daemon on all the listed slaves.
1103      </p>
1104</div>
1105   
1106   
1107<a name="N105EC"></a><a name="Hadoop+Shutdown"></a>
1108<h2 class="h3">Hadoop Shutdown</h2>
1109<div class="section">
1110<p>
1111        Stop HDFS with the following command, run on the designated
1112        <span class="codefrag">NameNode</span>:<br>
1113       
1114<span class="codefrag">$ bin/stop-dfs.sh</span>
1115     
1116</p>
1117<p>The <span class="codefrag">bin/stop-dfs.sh</span> script also consults the
1118      <span class="codefrag">${HADOOP_CONF_DIR}/slaves</span> file on the <span class="codefrag">NameNode</span> 
1119      and stops the <span class="codefrag">DataNode</span> daemon on all the listed slaves.</p>
1120<p>
1121        Stop Map/Reduce with the following command, run on the designated
1122        the designated <span class="codefrag">JobTracker</span>:<br>
1123       
1124<span class="codefrag">$ bin/stop-mapred.sh</span>
1125<br>
1126     
1127</p>
1128<p>The <span class="codefrag">bin/stop-mapred.sh</span> script also consults the
1129      <span class="codefrag">${HADOOP_CONF_DIR}/slaves</span> file on the <span class="codefrag">JobTracker</span> 
1130      and stops the <span class="codefrag">TaskTracker</span> daemon on all the listed slaves.</p>
1131</div>
1132 
1133</div>
1134<!--+
1135    |end content
1136    +-->
1137<div class="clearboth">&nbsp;</div>
1138</div>
1139<div id="footer">
1140<!--+
1141    |start bottomstrip
1142    +-->
1143<div class="lastmodified">
1144<script type="text/javascript"><!--
1145document.write("Last Published: " + document.lastModified);
1146//  --></script>
1147</div>
1148<div class="copyright">
1149        Copyright &copy;
1150         2008 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
1151</div>
1152<!--+
1153    |end bottomstrip
1154    +-->
1155</div>
1156</body>
1157</html>
Note: See TracBrowser for help on using the repository browser.