[120] | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> |
---|
| 2 | <html> |
---|
| 3 | <head> |
---|
| 4 | <META http-equiv="Content-Type" content="text/html; charset=UTF-8"> |
---|
| 5 | <meta content="Apache Forrest" name="Generator"> |
---|
| 6 | <meta name="Forrest-version" content="0.8"> |
---|
| 7 | <meta name="Forrest-skin-name" content="pelt"> |
---|
| 8 | <title>Cluster Setup</title> |
---|
| 9 | <link type="text/css" href="skin/basic.css" rel="stylesheet"> |
---|
| 10 | <link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet"> |
---|
| 11 | <link media="print" type="text/css" href="skin/print.css" rel="stylesheet"> |
---|
| 12 | <link type="text/css" href="skin/profile.css" rel="stylesheet"> |
---|
| 13 | <script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script> |
---|
| 14 | <link rel="shortcut icon" href="images/favicon.ico"> |
---|
| 15 | </head> |
---|
| 16 | <body onload="init()"> |
---|
| 17 | <script type="text/javascript">ndeSetTextSize();</script> |
---|
| 18 | <div id="top"> |
---|
| 19 | <!--+ |
---|
| 20 | |breadtrail |
---|
| 21 | +--> |
---|
| 22 | <div class="breadtrail"> |
---|
| 23 | <a href="http://www.apache.org/">Apache</a> > <a href="http://hadoop.apache.org/">Hadoop</a> > <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script> |
---|
| 24 | </div> |
---|
| 25 | <!--+ |
---|
| 26 | |header |
---|
| 27 | +--> |
---|
| 28 | <div class="header"> |
---|
| 29 | <!--+ |
---|
| 30 | |start group logo |
---|
| 31 | +--> |
---|
| 32 | <div class="grouplogo"> |
---|
| 33 | <a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a> |
---|
| 34 | </div> |
---|
| 35 | <!--+ |
---|
| 36 | |end group logo |
---|
| 37 | +--> |
---|
| 38 | <!--+ |
---|
| 39 | |start Project Logo |
---|
| 40 | +--> |
---|
| 41 | <div class="projectlogo"> |
---|
| 42 | <a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.gif" title="Scalable Computing Platform"></a> |
---|
| 43 | </div> |
---|
| 44 | <!--+ |
---|
| 45 | |end Project Logo |
---|
| 46 | +--> |
---|
| 47 | <!--+ |
---|
| 48 | |start Search |
---|
| 49 | +--> |
---|
| 50 | <div class="searchbox"> |
---|
| 51 | <form action="http://www.google.com/search" method="get" class="roundtopsmall"> |
---|
| 52 | <input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google"> |
---|
| 53 | <input name="Search" value="Search" type="submit"> |
---|
| 54 | </form> |
---|
| 55 | </div> |
---|
| 56 | <!--+ |
---|
| 57 | |end search |
---|
| 58 | +--> |
---|
| 59 | <!--+ |
---|
| 60 | |start Tabs |
---|
| 61 | +--> |
---|
| 62 | <ul id="tabs"> |
---|
| 63 | <li> |
---|
| 64 | <a class="unselected" href="http://hadoop.apache.org/core/">Project</a> |
---|
| 65 | </li> |
---|
| 66 | <li> |
---|
| 67 | <a class="unselected" href="http://wiki.apache.org/hadoop">Wiki</a> |
---|
| 68 | </li> |
---|
| 69 | <li class="current"> |
---|
| 70 | <a class="selected" href="index.html">Hadoop 0.20 Documentation</a> |
---|
| 71 | </li> |
---|
| 72 | </ul> |
---|
| 73 | <!--+ |
---|
| 74 | |end Tabs |
---|
| 75 | +--> |
---|
| 76 | </div> |
---|
| 77 | </div> |
---|
| 78 | <div id="main"> |
---|
| 79 | <div id="publishedStrip"> |
---|
| 80 | <!--+ |
---|
| 81 | |start Subtabs |
---|
| 82 | +--> |
---|
| 83 | <div id="level2tabs"></div> |
---|
| 84 | <!--+ |
---|
| 85 | |end Endtabs |
---|
| 86 | +--> |
---|
| 87 | <script type="text/javascript"><!-- |
---|
| 88 | document.write("Last Published: " + document.lastModified); |
---|
| 89 | // --></script> |
---|
| 90 | </div> |
---|
| 91 | <!--+ |
---|
| 92 | |breadtrail |
---|
| 93 | +--> |
---|
| 94 | <div class="breadtrail"> |
---|
| 95 | |
---|
| 96 | |
---|
| 97 | </div> |
---|
| 98 | <!--+ |
---|
| 99 | |start Menu, mainarea |
---|
| 100 | +--> |
---|
| 101 | <!--+ |
---|
| 102 | |start Menu |
---|
| 103 | +--> |
---|
| 104 | <div id="menu"> |
---|
| 105 | <div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Getting Started</div> |
---|
| 106 | <div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;"> |
---|
| 107 | <div class="menuitem"> |
---|
| 108 | <a href="index.html">Overview</a> |
---|
| 109 | </div> |
---|
| 110 | <div class="menuitem"> |
---|
| 111 | <a href="quickstart.html">Quick Start</a> |
---|
| 112 | </div> |
---|
| 113 | <div class="menupage"> |
---|
| 114 | <div class="menupagetitle">Cluster Setup</div> |
---|
| 115 | </div> |
---|
| 116 | <div class="menuitem"> |
---|
| 117 | <a href="mapred_tutorial.html">Map/Reduce Tutorial</a> |
---|
| 118 | </div> |
---|
| 119 | </div> |
---|
| 120 | <div onclick="SwitchMenu('menu_1.2', 'skin/')" id="menu_1.2Title" class="menutitle">Programming Guides</div> |
---|
| 121 | <div id="menu_1.2" class="menuitemgroup"> |
---|
| 122 | <div class="menuitem"> |
---|
| 123 | <a href="commands_manual.html">Commands</a> |
---|
| 124 | </div> |
---|
| 125 | <div class="menuitem"> |
---|
| 126 | <a href="distcp.html">DistCp</a> |
---|
| 127 | </div> |
---|
| 128 | <div class="menuitem"> |
---|
| 129 | <a href="native_libraries.html">Native Libraries</a> |
---|
| 130 | </div> |
---|
| 131 | <div class="menuitem"> |
---|
| 132 | <a href="streaming.html">Streaming</a> |
---|
| 133 | </div> |
---|
| 134 | <div class="menuitem"> |
---|
| 135 | <a href="fair_scheduler.html">Fair Scheduler</a> |
---|
| 136 | </div> |
---|
| 137 | <div class="menuitem"> |
---|
| 138 | <a href="capacity_scheduler.html">Capacity Scheduler</a> |
---|
| 139 | </div> |
---|
| 140 | <div class="menuitem"> |
---|
| 141 | <a href="service_level_auth.html">Service Level Authorization</a> |
---|
| 142 | </div> |
---|
| 143 | <div class="menuitem"> |
---|
| 144 | <a href="vaidya.html">Vaidya</a> |
---|
| 145 | </div> |
---|
| 146 | <div class="menuitem"> |
---|
| 147 | <a href="hadoop_archives.html">Archives</a> |
---|
| 148 | </div> |
---|
| 149 | </div> |
---|
| 150 | <div onclick="SwitchMenu('menu_1.3', 'skin/')" id="menu_1.3Title" class="menutitle">HDFS</div> |
---|
| 151 | <div id="menu_1.3" class="menuitemgroup"> |
---|
| 152 | <div class="menuitem"> |
---|
| 153 | <a href="hdfs_user_guide.html">User Guide</a> |
---|
| 154 | </div> |
---|
| 155 | <div class="menuitem"> |
---|
| 156 | <a href="hdfs_design.html">Architecture</a> |
---|
| 157 | </div> |
---|
| 158 | <div class="menuitem"> |
---|
| 159 | <a href="hdfs_shell.html">File System Shell Guide</a> |
---|
| 160 | </div> |
---|
| 161 | <div class="menuitem"> |
---|
| 162 | <a href="hdfs_permissions_guide.html">Permissions Guide</a> |
---|
| 163 | </div> |
---|
| 164 | <div class="menuitem"> |
---|
| 165 | <a href="hdfs_quota_admin_guide.html">Quotas Guide</a> |
---|
| 166 | </div> |
---|
| 167 | <div class="menuitem"> |
---|
| 168 | <a href="SLG_user_guide.html">Synthetic Load Generator Guide</a> |
---|
| 169 | </div> |
---|
| 170 | <div class="menuitem"> |
---|
| 171 | <a href="libhdfs.html">C API libhdfs</a> |
---|
| 172 | </div> |
---|
| 173 | </div> |
---|
| 174 | <div onclick="SwitchMenu('menu_1.4', 'skin/')" id="menu_1.4Title" class="menutitle">HOD</div> |
---|
| 175 | <div id="menu_1.4" class="menuitemgroup"> |
---|
| 176 | <div class="menuitem"> |
---|
| 177 | <a href="hod_user_guide.html">User Guide</a> |
---|
| 178 | </div> |
---|
| 179 | <div class="menuitem"> |
---|
| 180 | <a href="hod_admin_guide.html">Admin Guide</a> |
---|
| 181 | </div> |
---|
| 182 | <div class="menuitem"> |
---|
| 183 | <a href="hod_config_guide.html">Config Guide</a> |
---|
| 184 | </div> |
---|
| 185 | </div> |
---|
| 186 | <div onclick="SwitchMenu('menu_1.5', 'skin/')" id="menu_1.5Title" class="menutitle">Miscellaneous</div> |
---|
| 187 | <div id="menu_1.5" class="menuitemgroup"> |
---|
| 188 | <div class="menuitem"> |
---|
| 189 | <a href="api/index.html">API Docs</a> |
---|
| 190 | </div> |
---|
| 191 | <div class="menuitem"> |
---|
| 192 | <a href="jdiff/changes.html">API Changes</a> |
---|
| 193 | </div> |
---|
| 194 | <div class="menuitem"> |
---|
| 195 | <a href="http://wiki.apache.org/hadoop/">Wiki</a> |
---|
| 196 | </div> |
---|
| 197 | <div class="menuitem"> |
---|
| 198 | <a href="http://wiki.apache.org/hadoop/FAQ">FAQ</a> |
---|
| 199 | </div> |
---|
| 200 | <div class="menuitem"> |
---|
| 201 | <a href="releasenotes.html">Release Notes</a> |
---|
| 202 | </div> |
---|
| 203 | <div class="menuitem"> |
---|
| 204 | <a href="changes.html">Change Log</a> |
---|
| 205 | </div> |
---|
| 206 | </div> |
---|
| 207 | <div id="credit"></div> |
---|
| 208 | <div id="roundbottom"> |
---|
| 209 | <img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div> |
---|
| 210 | <!--+ |
---|
| 211 | |alternative credits |
---|
| 212 | +--> |
---|
| 213 | <div id="credit2"></div> |
---|
| 214 | </div> |
---|
| 215 | <!--+ |
---|
| 216 | |end Menu |
---|
| 217 | +--> |
---|
| 218 | <!--+ |
---|
| 219 | |start content |
---|
| 220 | +--> |
---|
| 221 | <div id="content"> |
---|
| 222 | <div title="Portable Document Format" class="pdflink"> |
---|
| 223 | <a class="dida" href="cluster_setup.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br> |
---|
| 224 | PDF</a> |
---|
| 225 | </div> |
---|
| 226 | <h1>Cluster Setup</h1> |
---|
| 227 | <div id="minitoc-area"> |
---|
| 228 | <ul class="minitoc"> |
---|
| 229 | <li> |
---|
| 230 | <a href="#Purpose">Purpose</a> |
---|
| 231 | </li> |
---|
| 232 | <li> |
---|
| 233 | <a href="#Pre-requisites">Pre-requisites</a> |
---|
| 234 | </li> |
---|
| 235 | <li> |
---|
| 236 | <a href="#Installation">Installation</a> |
---|
| 237 | </li> |
---|
| 238 | <li> |
---|
| 239 | <a href="#Configuration">Configuration</a> |
---|
| 240 | <ul class="minitoc"> |
---|
| 241 | <li> |
---|
| 242 | <a href="#Configuration+Files">Configuration Files</a> |
---|
| 243 | </li> |
---|
| 244 | <li> |
---|
| 245 | <a href="#Site+Configuration">Site Configuration</a> |
---|
| 246 | <ul class="minitoc"> |
---|
| 247 | <li> |
---|
| 248 | <a href="#Configuring+the+Environment+of+the+Hadoop+Daemons">Configuring the Environment of the Hadoop Daemons</a> |
---|
| 249 | </li> |
---|
| 250 | <li> |
---|
| 251 | <a href="#Configuring+the+Hadoop+Daemons">Configuring the Hadoop Daemons</a> |
---|
| 252 | </li> |
---|
| 253 | <li> |
---|
| 254 | <a href="#Memory+monitoring"> Memory monitoring</a> |
---|
| 255 | </li> |
---|
| 256 | <li> |
---|
| 257 | <a href="#Slaves">Slaves</a> |
---|
| 258 | </li> |
---|
| 259 | <li> |
---|
| 260 | <a href="#Logging">Logging</a> |
---|
| 261 | </li> |
---|
| 262 | </ul> |
---|
| 263 | </li> |
---|
| 264 | </ul> |
---|
| 265 | </li> |
---|
| 266 | <li> |
---|
| 267 | <a href="#Cluster+Restartability">Cluster Restartability</a> |
---|
| 268 | <ul class="minitoc"> |
---|
| 269 | <li> |
---|
| 270 | <a href="#Map%2FReduce">Map/Reduce</a> |
---|
| 271 | </li> |
---|
| 272 | </ul> |
---|
| 273 | </li> |
---|
| 274 | <li> |
---|
| 275 | <a href="#Hadoop+Rack+Awareness">Hadoop Rack Awareness</a> |
---|
| 276 | </li> |
---|
| 277 | <li> |
---|
| 278 | <a href="#Hadoop+Startup">Hadoop Startup</a> |
---|
| 279 | </li> |
---|
| 280 | <li> |
---|
| 281 | <a href="#Hadoop+Shutdown">Hadoop Shutdown</a> |
---|
| 282 | </li> |
---|
| 283 | </ul> |
---|
| 284 | </div> |
---|
| 285 | |
---|
| 286 | |
---|
| 287 | <a name="N1000D"></a><a name="Purpose"></a> |
---|
| 288 | <h2 class="h3">Purpose</h2> |
---|
| 289 | <div class="section"> |
---|
| 290 | <p>This document describes how to install, configure and manage non-trivial |
---|
| 291 | Hadoop clusters ranging from a few nodes to extremely large clusters with |
---|
| 292 | thousands of nodes.</p> |
---|
| 293 | <p> |
---|
| 294 | To play with Hadoop, you may first want to install Hadoop on a single machine (see <a href="quickstart.html"> Hadoop Quick Start</a>). |
---|
| 295 | </p> |
---|
| 296 | </div> |
---|
| 297 | |
---|
| 298 | |
---|
| 299 | <a name="N1001E"></a><a name="Pre-requisites"></a> |
---|
| 300 | <h2 class="h3">Pre-requisites</h2> |
---|
| 301 | <div class="section"> |
---|
| 302 | <ol> |
---|
| 303 | |
---|
| 304 | <li> |
---|
| 305 | Make sure all <a href="quickstart.html#PreReqs">requisite</a> software |
---|
| 306 | is installed on all nodes in your cluster. |
---|
| 307 | </li> |
---|
| 308 | |
---|
| 309 | <li> |
---|
| 310 | |
---|
| 311 | <a href="quickstart.html#Download">Get</a> the Hadoop software. |
---|
| 312 | </li> |
---|
| 313 | |
---|
| 314 | </ol> |
---|
| 315 | </div> |
---|
| 316 | |
---|
| 317 | |
---|
| 318 | <a name="N10036"></a><a name="Installation"></a> |
---|
| 319 | <h2 class="h3">Installation</h2> |
---|
| 320 | <div class="section"> |
---|
| 321 | <p>Installing a Hadoop cluster typically involves unpacking the software |
---|
| 322 | on all the machines in the cluster.</p> |
---|
| 323 | <p>Typically one machine in the cluster is designated as the |
---|
| 324 | <span class="codefrag">NameNode</span> and another machine the as <span class="codefrag">JobTracker</span>, |
---|
| 325 | exclusively. These are the <em>masters</em>. The rest of the machines in |
---|
| 326 | the cluster act as both <span class="codefrag">DataNode</span> <em>and</em> |
---|
| 327 | <span class="codefrag">TaskTracker</span>. These are the <em>slaves</em>.</p> |
---|
| 328 | <p>The root of the distribution is referred to as |
---|
| 329 | <span class="codefrag">HADOOP_HOME</span>. All machines in the cluster usually have the same |
---|
| 330 | <span class="codefrag">HADOOP_HOME</span> path.</p> |
---|
| 331 | </div> |
---|
| 332 | |
---|
| 333 | |
---|
| 334 | <a name="N10061"></a><a name="Configuration"></a> |
---|
| 335 | <h2 class="h3">Configuration</h2> |
---|
| 336 | <div class="section"> |
---|
| 337 | <p>The following sections describe how to configure a Hadoop cluster.</p> |
---|
| 338 | <a name="N1006A"></a><a name="Configuration+Files"></a> |
---|
| 339 | <h3 class="h4">Configuration Files</h3> |
---|
| 340 | <p>Hadoop configuration is driven by two types of important |
---|
| 341 | configuration files:</p> |
---|
| 342 | <ol> |
---|
| 343 | |
---|
| 344 | <li> |
---|
| 345 | Read-only default configuration - |
---|
| 346 | <a href="http://hadoop.apache.org/core/docs/current/core-default.html">src/core/core-default.xml</a>, |
---|
| 347 | <a href="http://hadoop.apache.org/core/docs/current/hdfs-default.html">src/hdfs/hdfs-default.xml</a> and |
---|
| 348 | <a href="http://hadoop.apache.org/core/docs/current/mapred-default.html">src/mapred/mapred-default.xml</a>. |
---|
| 349 | </li> |
---|
| 350 | |
---|
| 351 | <li> |
---|
| 352 | Site-specific configuration - |
---|
| 353 | <em>conf/core-site.xml</em>, |
---|
| 354 | <em>conf/hdfs-site.xml</em> and |
---|
| 355 | <em>conf/mapred-site.xml</em>. |
---|
| 356 | </li> |
---|
| 357 | |
---|
| 358 | </ol> |
---|
| 359 | <p>To learn more about how the Hadoop framework is controlled by these |
---|
| 360 | configuration files, look |
---|
| 361 | <a href="api/org/apache/hadoop/conf/Configuration.html">here</a>.</p> |
---|
| 362 | <p>Additionally, you can control the Hadoop scripts found in the |
---|
| 363 | <span class="codefrag">bin/</span> directory of the distribution, by setting site-specific |
---|
| 364 | values via the <span class="codefrag">conf/hadoop-env.sh</span>.</p> |
---|
| 365 | <a name="N100A2"></a><a name="Site+Configuration"></a> |
---|
| 366 | <h3 class="h4">Site Configuration</h3> |
---|
| 367 | <p>To configure the Hadoop cluster you will need to configure the |
---|
| 368 | <em>environment</em> in which the Hadoop daemons execute as well as |
---|
| 369 | the <em>configuration parameters</em> for the Hadoop daemons.</p> |
---|
| 370 | <p>The Hadoop daemons are <span class="codefrag">NameNode</span>/<span class="codefrag">DataNode</span> |
---|
| 371 | and <span class="codefrag">JobTracker</span>/<span class="codefrag">TaskTracker</span>.</p> |
---|
| 372 | <a name="N100C0"></a><a name="Configuring+the+Environment+of+the+Hadoop+Daemons"></a> |
---|
| 373 | <h4>Configuring the Environment of the Hadoop Daemons</h4> |
---|
| 374 | <p>Administrators should use the <span class="codefrag">conf/hadoop-env.sh</span> script |
---|
| 375 | to do site-specific customization of the Hadoop daemons' process |
---|
| 376 | environment.</p> |
---|
| 377 | <p>At the very least you should specify the |
---|
| 378 | <span class="codefrag">JAVA_HOME</span> so that it is correctly defined on each |
---|
| 379 | remote node.</p> |
---|
| 380 | <p>Administrators can configure individual daemons using the |
---|
| 381 | configuration options <span class="codefrag">HADOOP_*_OPTS</span>. Various options |
---|
| 382 | available are shown below in the table. </p> |
---|
| 383 | <table class="ForrestTable" cellspacing="1" cellpadding="4"> |
---|
| 384 | |
---|
| 385 | <tr> |
---|
| 386 | <th colspan="1" rowspan="1">Daemon</th><th colspan="1" rowspan="1">Configure Options</th> |
---|
| 387 | </tr> |
---|
| 388 | |
---|
| 389 | <tr> |
---|
| 390 | <td colspan="1" rowspan="1">NameNode</td><td colspan="1" rowspan="1">HADOOP_NAMENODE_OPTS</td> |
---|
| 391 | </tr> |
---|
| 392 | |
---|
| 393 | <tr> |
---|
| 394 | <td colspan="1" rowspan="1">DataNode</td><td colspan="1" rowspan="1">HADOOP_DATANODE_OPTS</td> |
---|
| 395 | </tr> |
---|
| 396 | |
---|
| 397 | <tr> |
---|
| 398 | <td colspan="1" rowspan="1">SecondaryNamenode</td> |
---|
| 399 | <td colspan="1" rowspan="1">HADOOP_SECONDARYNAMENODE_OPTS</td> |
---|
| 400 | </tr> |
---|
| 401 | |
---|
| 402 | <tr> |
---|
| 403 | <td colspan="1" rowspan="1">JobTracker</td><td colspan="1" rowspan="1">HADOOP_JOBTRACKER_OPTS</td> |
---|
| 404 | </tr> |
---|
| 405 | |
---|
| 406 | <tr> |
---|
| 407 | <td colspan="1" rowspan="1">TaskTracker</td><td colspan="1" rowspan="1">HADOOP_TASKTRACKER_OPTS</td> |
---|
| 408 | </tr> |
---|
| 409 | |
---|
| 410 | </table> |
---|
| 411 | <p> For example, To configure Namenode to use parallelGC, the |
---|
| 412 | following statement should be added in <span class="codefrag">hadoop-env.sh</span> : |
---|
| 413 | <br> |
---|
| 414 | <span class="codefrag"> |
---|
| 415 | export HADOOP_NAMENODE_OPTS="-XX:+UseParallelGC ${HADOOP_NAMENODE_OPTS}" |
---|
| 416 | </span> |
---|
| 417 | <br> |
---|
| 418 | </p> |
---|
| 419 | <p>Other useful configuration parameters that you can customize |
---|
| 420 | include:</p> |
---|
| 421 | <ul> |
---|
| 422 | |
---|
| 423 | <li> |
---|
| 424 | |
---|
| 425 | <span class="codefrag">HADOOP_LOG_DIR</span> - The directory where the daemons' |
---|
| 426 | log files are stored. They are automatically created if they don't |
---|
| 427 | exist. |
---|
| 428 | </li> |
---|
| 429 | |
---|
| 430 | <li> |
---|
| 431 | |
---|
| 432 | <span class="codefrag">HADOOP_HEAPSIZE</span> - The maximum amount of heapsize |
---|
| 433 | to use, in MB e.g. <span class="codefrag">1000MB</span>. This is used to |
---|
| 434 | configure the heap size for the hadoop daemon. By default, |
---|
| 435 | the value is <span class="codefrag">1000MB</span>. |
---|
| 436 | </li> |
---|
| 437 | |
---|
| 438 | </ul> |
---|
| 439 | <a name="N1013B"></a><a name="Configuring+the+Hadoop+Daemons"></a> |
---|
| 440 | <h4>Configuring the Hadoop Daemons</h4> |
---|
| 441 | <p>This section deals with important parameters to be specified in the |
---|
| 442 | following: |
---|
| 443 | <br> |
---|
| 444 | |
---|
| 445 | <span class="codefrag">conf/core-site.xml</span>:</p> |
---|
| 446 | <table class="ForrestTable" cellspacing="1" cellpadding="4"> |
---|
| 447 | |
---|
| 448 | <tr> |
---|
| 449 | |
---|
| 450 | <th colspan="1" rowspan="1">Parameter</th> |
---|
| 451 | <th colspan="1" rowspan="1">Value</th> |
---|
| 452 | <th colspan="1" rowspan="1">Notes</th> |
---|
| 453 | |
---|
| 454 | </tr> |
---|
| 455 | |
---|
| 456 | <tr> |
---|
| 457 | |
---|
| 458 | <td colspan="1" rowspan="1">fs.default.name</td> |
---|
| 459 | <td colspan="1" rowspan="1">URI of <span class="codefrag">NameNode</span>.</td> |
---|
| 460 | <td colspan="1" rowspan="1"><em>hdfs://hostname/</em></td> |
---|
| 461 | |
---|
| 462 | </tr> |
---|
| 463 | |
---|
| 464 | </table> |
---|
| 465 | <p> |
---|
| 466 | <br> |
---|
| 467 | <span class="codefrag">conf/hdfs-site.xml</span>:</p> |
---|
| 468 | <table class="ForrestTable" cellspacing="1" cellpadding="4"> |
---|
| 469 | |
---|
| 470 | <tr> |
---|
| 471 | |
---|
| 472 | <th colspan="1" rowspan="1">Parameter</th> |
---|
| 473 | <th colspan="1" rowspan="1">Value</th> |
---|
| 474 | <th colspan="1" rowspan="1">Notes</th> |
---|
| 475 | |
---|
| 476 | </tr> |
---|
| 477 | |
---|
| 478 | <tr> |
---|
| 479 | |
---|
| 480 | <td colspan="1" rowspan="1">dfs.name.dir</td> |
---|
| 481 | <td colspan="1" rowspan="1"> |
---|
| 482 | Path on the local filesystem where the <span class="codefrag">NameNode</span> |
---|
| 483 | stores the namespace and transactions logs persistently.</td> |
---|
| 484 | <td colspan="1" rowspan="1"> |
---|
| 485 | If this is a comma-delimited list of directories then the name |
---|
| 486 | table is replicated in all of the directories, for redundancy. |
---|
| 487 | </td> |
---|
| 488 | |
---|
| 489 | </tr> |
---|
| 490 | |
---|
| 491 | <tr> |
---|
| 492 | |
---|
| 493 | <td colspan="1" rowspan="1">dfs.data.dir</td> |
---|
| 494 | <td colspan="1" rowspan="1"> |
---|
| 495 | Comma separated list of paths on the local filesystem of a |
---|
| 496 | <span class="codefrag">DataNode</span> where it should store its blocks. |
---|
| 497 | </td> |
---|
| 498 | <td colspan="1" rowspan="1"> |
---|
| 499 | If this is a comma-delimited list of directories, then data will |
---|
| 500 | be stored in all named directories, typically on different |
---|
| 501 | devices. |
---|
| 502 | </td> |
---|
| 503 | |
---|
| 504 | </tr> |
---|
| 505 | |
---|
| 506 | </table> |
---|
| 507 | <p> |
---|
| 508 | <br> |
---|
| 509 | <span class="codefrag">conf/mapred-site.xml</span>:</p> |
---|
| 510 | <table class="ForrestTable" cellspacing="1" cellpadding="4"> |
---|
| 511 | |
---|
| 512 | <tr> |
---|
| 513 | |
---|
| 514 | <th colspan="1" rowspan="1">Parameter</th> |
---|
| 515 | <th colspan="1" rowspan="1">Value</th> |
---|
| 516 | <th colspan="1" rowspan="1">Notes</th> |
---|
| 517 | |
---|
| 518 | </tr> |
---|
| 519 | |
---|
| 520 | <tr> |
---|
| 521 | |
---|
| 522 | <td colspan="1" rowspan="1">mapred.job.tracker</td> |
---|
| 523 | <td colspan="1" rowspan="1">Host or IP and port of <span class="codefrag">JobTracker</span>.</td> |
---|
| 524 | <td colspan="1" rowspan="1"><em>host:port</em> pair.</td> |
---|
| 525 | |
---|
| 526 | </tr> |
---|
| 527 | |
---|
| 528 | <tr> |
---|
| 529 | |
---|
| 530 | <td colspan="1" rowspan="1">mapred.system.dir</td> |
---|
| 531 | <td colspan="1" rowspan="1"> |
---|
| 532 | Path on the HDFS where where the Map/Reduce framework stores |
---|
| 533 | system files e.g. <span class="codefrag">/hadoop/mapred/system/</span>. |
---|
| 534 | </td> |
---|
| 535 | <td colspan="1" rowspan="1"> |
---|
| 536 | This is in the default filesystem (HDFS) and must be accessible |
---|
| 537 | from both the server and client machines. |
---|
| 538 | </td> |
---|
| 539 | |
---|
| 540 | </tr> |
---|
| 541 | |
---|
| 542 | <tr> |
---|
| 543 | |
---|
| 544 | <td colspan="1" rowspan="1">mapred.local.dir</td> |
---|
| 545 | <td colspan="1" rowspan="1"> |
---|
| 546 | Comma-separated list of paths on the local filesystem where |
---|
| 547 | temporary Map/Reduce data is written. |
---|
| 548 | </td> |
---|
| 549 | <td colspan="1" rowspan="1">Multiple paths help spread disk i/o.</td> |
---|
| 550 | |
---|
| 551 | </tr> |
---|
| 552 | |
---|
| 553 | <tr> |
---|
| 554 | |
---|
| 555 | <td colspan="1" rowspan="1">mapred.tasktracker.{map|reduce}.tasks.maximum</td> |
---|
| 556 | <td colspan="1" rowspan="1"> |
---|
| 557 | The maximum number of Map/Reduce tasks, which are run |
---|
| 558 | simultaneously on a given <span class="codefrag">TaskTracker</span>, individually. |
---|
| 559 | </td> |
---|
| 560 | <td colspan="1" rowspan="1"> |
---|
| 561 | Defaults to 2 (2 maps and 2 reduces), but vary it depending on |
---|
| 562 | your hardware. |
---|
| 563 | </td> |
---|
| 564 | |
---|
| 565 | </tr> |
---|
| 566 | |
---|
| 567 | <tr> |
---|
| 568 | |
---|
| 569 | <td colspan="1" rowspan="1">dfs.hosts/dfs.hosts.exclude</td> |
---|
| 570 | <td colspan="1" rowspan="1">List of permitted/excluded DataNodes.</td> |
---|
| 571 | <td colspan="1" rowspan="1"> |
---|
| 572 | If necessary, use these files to control the list of allowable |
---|
| 573 | datanodes. |
---|
| 574 | </td> |
---|
| 575 | |
---|
| 576 | </tr> |
---|
| 577 | |
---|
| 578 | <tr> |
---|
| 579 | |
---|
| 580 | <td colspan="1" rowspan="1">mapred.hosts/mapred.hosts.exclude</td> |
---|
| 581 | <td colspan="1" rowspan="1">List of permitted/excluded TaskTrackers.</td> |
---|
| 582 | <td colspan="1" rowspan="1"> |
---|
| 583 | If necessary, use these files to control the list of allowable |
---|
| 584 | TaskTrackers. |
---|
| 585 | </td> |
---|
| 586 | |
---|
| 587 | </tr> |
---|
| 588 | |
---|
| 589 | <tr> |
---|
| 590 | |
---|
| 591 | <td colspan="1" rowspan="1">mapred.queue.names</td> |
---|
| 592 | <td colspan="1" rowspan="1">Comma separated list of queues to which jobs can be submitted.</td> |
---|
| 593 | <td colspan="1" rowspan="1"> |
---|
| 594 | The Map/Reduce system always supports atleast one queue |
---|
| 595 | with the name as <em>default</em>. Hence, this parameter's |
---|
| 596 | value should always contain the string <em>default</em>. |
---|
| 597 | Some job schedulers supported in Hadoop, like the |
---|
| 598 | <a href="capacity_scheduler.html">Capacity |
---|
| 599 | Scheduler</a>, support multiple queues. If such a scheduler is |
---|
| 600 | being used, the list of configured queue names must be |
---|
| 601 | specified here. Once queues are defined, users can submit |
---|
| 602 | jobs to a queue using the property name |
---|
| 603 | <em>mapred.job.queue.name</em> in the job configuration. |
---|
| 604 | There could be a separate |
---|
| 605 | configuration file for configuring properties of these |
---|
| 606 | queues that is managed by the scheduler. |
---|
| 607 | Refer to the documentation of the scheduler for information on |
---|
| 608 | the same. |
---|
| 609 | </td> |
---|
| 610 | |
---|
| 611 | </tr> |
---|
| 612 | |
---|
| 613 | <tr> |
---|
| 614 | |
---|
| 615 | <td colspan="1" rowspan="1">mapred.acls.enabled</td> |
---|
| 616 | <td colspan="1" rowspan="1">Specifies whether ACLs are supported for controlling job |
---|
| 617 | submission and administration</td> |
---|
| 618 | <td colspan="1" rowspan="1"> |
---|
| 619 | If <em>true</em>, ACLs would be checked while submitting |
---|
| 620 | and administering jobs. ACLs can be specified using the |
---|
| 621 | configuration parameters of the form |
---|
| 622 | <em>mapred.queue.queue-name.acl-name</em>, defined below. |
---|
| 623 | </td> |
---|
| 624 | |
---|
| 625 | </tr> |
---|
| 626 | |
---|
| 627 | <tr> |
---|
| 628 | |
---|
| 629 | <td colspan="1" rowspan="1">mapred.queue.<em>queue-name</em>.acl-submit-job</td> |
---|
| 630 | <td colspan="1" rowspan="1">List of users and groups that can submit jobs to the |
---|
| 631 | specified <em>queue-name</em>.</td> |
---|
| 632 | <td colspan="1" rowspan="1"> |
---|
| 633 | The list of users and groups are both comma separated |
---|
| 634 | list of names. The two lists are separated by a blank. |
---|
| 635 | Example: <em>user1,user2 group1,group2</em>. |
---|
| 636 | If you wish to define only a list of groups, provide |
---|
| 637 | a blank at the beginning of the value. |
---|
| 638 | </td> |
---|
| 639 | |
---|
| 640 | </tr> |
---|
| 641 | |
---|
| 642 | <tr> |
---|
| 643 | |
---|
| 644 | <td colspan="1" rowspan="1">mapred.queue.<em>queue-name</em>.acl-administer-job</td> |
---|
| 645 | <td colspan="1" rowspan="1">List of users and groups that can change the priority |
---|
| 646 | or kill jobs that have been submitted to the |
---|
| 647 | specified <em>queue-name</em>.</td> |
---|
| 648 | <td colspan="1" rowspan="1"> |
---|
| 649 | The list of users and groups are both comma separated |
---|
| 650 | list of names. The two lists are separated by a blank. |
---|
| 651 | Example: <em>user1,user2 group1,group2</em>. |
---|
| 652 | If you wish to define only a list of groups, provide |
---|
| 653 | a blank at the beginning of the value. Note that an |
---|
| 654 | owner of a job can always change the priority or kill |
---|
| 655 | his/her own job, irrespective of the ACLs. |
---|
| 656 | </td> |
---|
| 657 | |
---|
| 658 | </tr> |
---|
| 659 | |
---|
| 660 | </table> |
---|
| 661 | <p>Typically all the above parameters are marked as |
---|
| 662 | <a href="api/org/apache/hadoop/conf/Configuration.html#FinalParams"> |
---|
| 663 | final</a> to ensure that they cannot be overriden by user-applications. |
---|
| 664 | </p> |
---|
| 665 | <a name="N102BF"></a><a name="Real-World+Cluster+Configurations"></a> |
---|
| 666 | <h5>Real-World Cluster Configurations</h5> |
---|
| 667 | <p>This section lists some non-default configuration parameters which |
---|
| 668 | have been used to run the <em>sort</em> benchmark on very large |
---|
| 669 | clusters.</p> |
---|
| 670 | <ul> |
---|
| 671 | |
---|
| 672 | <li> |
---|
| 673 | |
---|
| 674 | <p>Some non-default configuration values used to run sort900, |
---|
| 675 | that is 9TB of data sorted on a cluster with 900 nodes:</p> |
---|
| 676 | |
---|
| 677 | <table class="ForrestTable" cellspacing="1" cellpadding="4"> |
---|
| 678 | |
---|
| 679 | <tr> |
---|
| 680 | |
---|
| 681 | <th colspan="1" rowspan="1">Configuration File</th> |
---|
| 682 | <th colspan="1" rowspan="1">Parameter</th> |
---|
| 683 | <th colspan="1" rowspan="1">Value</th> |
---|
| 684 | <th colspan="1" rowspan="1">Notes</th> |
---|
| 685 | |
---|
| 686 | </tr> |
---|
| 687 | |
---|
| 688 | <tr> |
---|
| 689 | |
---|
| 690 | <td colspan="1" rowspan="1">conf/hdfs-site.xml</td> |
---|
| 691 | <td colspan="1" rowspan="1">dfs.block.size</td> |
---|
| 692 | <td colspan="1" rowspan="1">134217728</td> |
---|
| 693 | <td colspan="1" rowspan="1">HDFS blocksize of 128MB for large file-systems.</td> |
---|
| 694 | |
---|
| 695 | </tr> |
---|
| 696 | |
---|
| 697 | <tr> |
---|
| 698 | |
---|
| 699 | <td colspan="1" rowspan="1">conf/hdfs-site.xml</td> |
---|
| 700 | <td colspan="1" rowspan="1">dfs.namenode.handler.count</td> |
---|
| 701 | <td colspan="1" rowspan="1">40</td> |
---|
| 702 | <td colspan="1" rowspan="1"> |
---|
| 703 | More NameNode server threads to handle RPCs from large |
---|
| 704 | number of DataNodes. |
---|
| 705 | </td> |
---|
| 706 | |
---|
| 707 | </tr> |
---|
| 708 | |
---|
| 709 | <tr> |
---|
| 710 | |
---|
| 711 | <td colspan="1" rowspan="1">conf/mapred-site.xml</td> |
---|
| 712 | <td colspan="1" rowspan="1">mapred.reduce.parallel.copies</td> |
---|
| 713 | <td colspan="1" rowspan="1">20</td> |
---|
| 714 | <td colspan="1" rowspan="1"> |
---|
| 715 | Higher number of parallel copies run by reduces to fetch |
---|
| 716 | outputs from very large number of maps. |
---|
| 717 | </td> |
---|
| 718 | |
---|
| 719 | </tr> |
---|
| 720 | |
---|
| 721 | <tr> |
---|
| 722 | |
---|
| 723 | <td colspan="1" rowspan="1">conf/mapred-site.xml</td> |
---|
| 724 | <td colspan="1" rowspan="1">mapred.child.java.opts</td> |
---|
| 725 | <td colspan="1" rowspan="1">-Xmx512M</td> |
---|
| 726 | <td colspan="1" rowspan="1"> |
---|
| 727 | Larger heap-size for child jvms of maps/reduces. |
---|
| 728 | </td> |
---|
| 729 | |
---|
| 730 | </tr> |
---|
| 731 | |
---|
| 732 | <tr> |
---|
| 733 | |
---|
| 734 | <td colspan="1" rowspan="1">conf/core-site.xml</td> |
---|
| 735 | <td colspan="1" rowspan="1">fs.inmemory.size.mb</td> |
---|
| 736 | <td colspan="1" rowspan="1">200</td> |
---|
| 737 | <td colspan="1" rowspan="1"> |
---|
| 738 | Larger amount of memory allocated for the in-memory |
---|
| 739 | file-system used to merge map-outputs at the reduces. |
---|
| 740 | </td> |
---|
| 741 | |
---|
| 742 | </tr> |
---|
| 743 | |
---|
| 744 | <tr> |
---|
| 745 | |
---|
| 746 | <td colspan="1" rowspan="1">conf/core-site.xml</td> |
---|
| 747 | <td colspan="1" rowspan="1">io.sort.factor</td> |
---|
| 748 | <td colspan="1" rowspan="1">100</td> |
---|
| 749 | <td colspan="1" rowspan="1">More streams merged at once while sorting files.</td> |
---|
| 750 | |
---|
| 751 | </tr> |
---|
| 752 | |
---|
| 753 | <tr> |
---|
| 754 | |
---|
| 755 | <td colspan="1" rowspan="1">conf/core-site.xml</td> |
---|
| 756 | <td colspan="1" rowspan="1">io.sort.mb</td> |
---|
| 757 | <td colspan="1" rowspan="1">200</td> |
---|
| 758 | <td colspan="1" rowspan="1">Higher memory-limit while sorting data.</td> |
---|
| 759 | |
---|
| 760 | </tr> |
---|
| 761 | |
---|
| 762 | <tr> |
---|
| 763 | |
---|
| 764 | <td colspan="1" rowspan="1">conf/core-site.xml</td> |
---|
| 765 | <td colspan="1" rowspan="1">io.file.buffer.size</td> |
---|
| 766 | <td colspan="1" rowspan="1">131072</td> |
---|
| 767 | <td colspan="1" rowspan="1">Size of read/write buffer used in SequenceFiles.</td> |
---|
| 768 | |
---|
| 769 | </tr> |
---|
| 770 | |
---|
| 771 | </table> |
---|
| 772 | |
---|
| 773 | </li> |
---|
| 774 | |
---|
| 775 | <li> |
---|
| 776 | |
---|
| 777 | <p>Updates to some configuration values to run sort1400 and |
---|
| 778 | sort2000, that is 14TB of data sorted on 1400 nodes and 20TB of |
---|
| 779 | data sorted on 2000 nodes:</p> |
---|
| 780 | |
---|
| 781 | <table class="ForrestTable" cellspacing="1" cellpadding="4"> |
---|
| 782 | |
---|
| 783 | <tr> |
---|
| 784 | |
---|
| 785 | <th colspan="1" rowspan="1">Configuration File</th> |
---|
| 786 | <th colspan="1" rowspan="1">Parameter</th> |
---|
| 787 | <th colspan="1" rowspan="1">Value</th> |
---|
| 788 | <th colspan="1" rowspan="1">Notes</th> |
---|
| 789 | |
---|
| 790 | </tr> |
---|
| 791 | |
---|
| 792 | <tr> |
---|
| 793 | |
---|
| 794 | <td colspan="1" rowspan="1">conf/mapred-site.xml</td> |
---|
| 795 | <td colspan="1" rowspan="1">mapred.job.tracker.handler.count</td> |
---|
| 796 | <td colspan="1" rowspan="1">60</td> |
---|
| 797 | <td colspan="1" rowspan="1"> |
---|
| 798 | More JobTracker server threads to handle RPCs from large |
---|
| 799 | number of TaskTrackers. |
---|
| 800 | </td> |
---|
| 801 | |
---|
| 802 | </tr> |
---|
| 803 | |
---|
| 804 | <tr> |
---|
| 805 | |
---|
| 806 | <td colspan="1" rowspan="1">conf/mapred-site.xml</td> |
---|
| 807 | <td colspan="1" rowspan="1">mapred.reduce.parallel.copies</td> |
---|
| 808 | <td colspan="1" rowspan="1">50</td> |
---|
| 809 | <td colspan="1" rowspan="1"></td> |
---|
| 810 | |
---|
| 811 | </tr> |
---|
| 812 | |
---|
| 813 | <tr> |
---|
| 814 | |
---|
| 815 | <td colspan="1" rowspan="1">conf/mapred-site.xml</td> |
---|
| 816 | <td colspan="1" rowspan="1">tasktracker.http.threads</td> |
---|
| 817 | <td colspan="1" rowspan="1">50</td> |
---|
| 818 | <td colspan="1" rowspan="1"> |
---|
| 819 | More worker threads for the TaskTracker's http server. The |
---|
| 820 | http server is used by reduces to fetch intermediate |
---|
| 821 | map-outputs. |
---|
| 822 | </td> |
---|
| 823 | |
---|
| 824 | </tr> |
---|
| 825 | |
---|
| 826 | <tr> |
---|
| 827 | |
---|
| 828 | <td colspan="1" rowspan="1">conf/mapred-site.xml</td> |
---|
| 829 | <td colspan="1" rowspan="1">mapred.child.java.opts</td> |
---|
| 830 | <td colspan="1" rowspan="1">-Xmx1024M</td> |
---|
| 831 | <td colspan="1" rowspan="1">Larger heap-size for child jvms of maps/reduces.</td> |
---|
| 832 | |
---|
| 833 | </tr> |
---|
| 834 | |
---|
| 835 | </table> |
---|
| 836 | |
---|
| 837 | </li> |
---|
| 838 | |
---|
| 839 | </ul> |
---|
| 840 | <a name="N10423"></a><a name="Memory+monitoring"></a> |
---|
| 841 | <h4> Memory monitoring</h4> |
---|
| 842 | <p>A <span class="codefrag">TaskTracker</span>(TT) can be configured to monitor memory |
---|
| 843 | usage of tasks it spawns, so that badly-behaved jobs do not bring |
---|
| 844 | down a machine due to excess memory consumption. With monitoring |
---|
| 845 | enabled, every task is assigned a task-limit for virtual memory (VMEM). |
---|
| 846 | In addition, every node is assigned a node-limit for VMEM usage. |
---|
| 847 | A TT ensures that a task is killed if it, and |
---|
| 848 | its descendants, use VMEM over the task's per-task limit. It also |
---|
| 849 | ensures that one or more tasks are killed if the sum total of VMEM |
---|
| 850 | usage by all tasks, and their descendents, cross the node-limit.</p> |
---|
| 851 | <p>Users can, optionally, specify the VMEM task-limit per job. If no |
---|
| 852 | such limit is provided, a default limit is used. A node-limit can be |
---|
| 853 | set per node.</p> |
---|
| 854 | <p>Currently the memory monitoring and management is only supported |
---|
| 855 | in Linux platform.</p> |
---|
| 856 | <p>To enable monitoring for a TT, the |
---|
| 857 | following parameters all need to be set:</p> |
---|
| 858 | <table class="ForrestTable" cellspacing="1" cellpadding="4"> |
---|
| 859 | |
---|
| 860 | <tr> |
---|
| 861 | <th colspan="1" rowspan="1">Name</th><th colspan="1" rowspan="1">Type</th><th colspan="1" rowspan="1">Description</th> |
---|
| 862 | </tr> |
---|
| 863 | |
---|
| 864 | <tr> |
---|
| 865 | <td colspan="1" rowspan="1">mapred.tasktracker.vmem.reserved</td><td colspan="1" rowspan="1">long</td> |
---|
| 866 | <td colspan="1" rowspan="1">A number, in bytes, that represents an offset. The total VMEM on |
---|
| 867 | the machine, minus this offset, is the VMEM node-limit for all |
---|
| 868 | tasks, and their descendants, spawned by the TT. |
---|
| 869 | </td> |
---|
| 870 | </tr> |
---|
| 871 | |
---|
| 872 | <tr> |
---|
| 873 | <td colspan="1" rowspan="1">mapred.task.default.maxvmem</td><td colspan="1" rowspan="1">long</td> |
---|
| 874 | <td colspan="1" rowspan="1">A number, in bytes, that represents the default VMEM task-limit |
---|
| 875 | associated with a task. Unless overridden by a job's setting, |
---|
| 876 | this number defines the VMEM task-limit. |
---|
| 877 | </td> |
---|
| 878 | </tr> |
---|
| 879 | |
---|
| 880 | <tr> |
---|
| 881 | <td colspan="1" rowspan="1">mapred.task.limit.maxvmem</td><td colspan="1" rowspan="1">long</td> |
---|
| 882 | <td colspan="1" rowspan="1">A number, in bytes, that represents the upper VMEM task-limit |
---|
| 883 | associated with a task. Users, when specifying a VMEM task-limit |
---|
| 884 | for their tasks, should not specify a limit which exceeds this amount. |
---|
| 885 | </td> |
---|
| 886 | </tr> |
---|
| 887 | |
---|
| 888 | </table> |
---|
| 889 | <p>In addition, the following parameters can also be configured.</p> |
---|
| 890 | <table class="ForrestTable" cellspacing="1" cellpadding="4"> |
---|
| 891 | |
---|
| 892 | <tr> |
---|
| 893 | <th colspan="1" rowspan="1">Name</th><th colspan="1" rowspan="1">Type</th><th colspan="1" rowspan="1">Description</th> |
---|
| 894 | </tr> |
---|
| 895 | |
---|
| 896 | <tr> |
---|
| 897 | <td colspan="1" rowspan="1">mapred.tasktracker.taskmemorymanager.monitoring-interval</td> |
---|
| 898 | <td colspan="1" rowspan="1">long</td> |
---|
| 899 | <td colspan="1" rowspan="1">The time interval, in milliseconds, between which the TT |
---|
| 900 | checks for any memory violation. The default value is 5000 msec |
---|
| 901 | (5 seconds). |
---|
| 902 | </td> |
---|
| 903 | </tr> |
---|
| 904 | |
---|
| 905 | </table> |
---|
| 906 | <p>Here's how the memory monitoring works for a TT.</p> |
---|
| 907 | <ol> |
---|
| 908 | |
---|
| 909 | <li>If one or more of the configuration parameters described |
---|
| 910 | above are missing or -1 is specified , memory monitoring is |
---|
| 911 | disabled for the TT. |
---|
| 912 | </li> |
---|
| 913 | |
---|
| 914 | <li>In addition, monitoring is disabled if |
---|
| 915 | <span class="codefrag">mapred.task.default.maxvmem</span> is greater than |
---|
| 916 | <span class="codefrag">mapred.task.limit.maxvmem</span>. |
---|
| 917 | </li> |
---|
| 918 | |
---|
| 919 | <li>If a TT receives a task whose task-limit is set by the user |
---|
| 920 | to a value larger than <span class="codefrag">mapred.task.limit.maxvmem</span>, it |
---|
| 921 | logs a warning but executes the task. |
---|
| 922 | </li> |
---|
| 923 | |
---|
| 924 | <li>Periodically, the TT checks the following: |
---|
| 925 | <ul> |
---|
| 926 | |
---|
| 927 | <li>If any task's current VMEM usage is greater than that task's |
---|
| 928 | VMEM task-limit, the task is killed and reason for killing |
---|
| 929 | the task is logged in task diagonistics . Such a task is considered |
---|
| 930 | failed, i.e., the killing counts towards the task's failure count. |
---|
| 931 | </li> |
---|
| 932 | |
---|
| 933 | <li>If the sum total of VMEM used by all tasks and descendants is |
---|
| 934 | greater than the node-limit, the TT kills enough tasks, in the |
---|
| 935 | order of least progress made, till the overall VMEM usage falls |
---|
| 936 | below the node-limt. Such killed tasks are not considered failed |
---|
| 937 | and their killing does not count towards the tasks' failure counts. |
---|
| 938 | </li> |
---|
| 939 | |
---|
| 940 | </ul> |
---|
| 941 | |
---|
| 942 | </li> |
---|
| 943 | |
---|
| 944 | </ol> |
---|
| 945 | <p>Schedulers can choose to ease the monitoring pressure on the TT by |
---|
| 946 | preventing too many tasks from running on a node and by scheduling |
---|
| 947 | tasks only if the TT has enough VMEM free. In addition, Schedulers may |
---|
| 948 | choose to consider the physical memory (RAM) available on the node |
---|
| 949 | as well. To enable Scheduler support, TTs report their memory settings |
---|
| 950 | to the JobTracker in every heartbeat. Before getting into details, |
---|
| 951 | consider the following additional memory-related parameters than can be |
---|
| 952 | configured to enable better scheduling:</p> |
---|
| 953 | <table class="ForrestTable" cellspacing="1" cellpadding="4"> |
---|
| 954 | |
---|
| 955 | <tr> |
---|
| 956 | <th colspan="1" rowspan="1">Name</th><th colspan="1" rowspan="1">Type</th><th colspan="1" rowspan="1">Description</th> |
---|
| 957 | </tr> |
---|
| 958 | |
---|
| 959 | <tr> |
---|
| 960 | <td colspan="1" rowspan="1">mapred.tasktracker.pmem.reserved</td><td colspan="1" rowspan="1">int</td> |
---|
| 961 | <td colspan="1" rowspan="1">A number, in bytes, that represents an offset. The total |
---|
| 962 | physical memory (RAM) on the machine, minus this offset, is the |
---|
| 963 | recommended RAM node-limit. The RAM node-limit is a hint to a |
---|
| 964 | Scheduler to scheduler only so many tasks such that the sum |
---|
| 965 | total of their RAM requirements does not exceed this limit. |
---|
| 966 | RAM usage is not monitored by a TT. |
---|
| 967 | </td> |
---|
| 968 | </tr> |
---|
| 969 | |
---|
| 970 | </table> |
---|
| 971 | <p>A TT reports the following memory-related numbers in every |
---|
| 972 | heartbeat:</p> |
---|
| 973 | <ul> |
---|
| 974 | |
---|
| 975 | <li>The total VMEM available on the node.</li> |
---|
| 976 | |
---|
| 977 | <li>The value of <span class="codefrag">mapred.tasktracker.vmem.reserved</span>, |
---|
| 978 | if set.</li> |
---|
| 979 | |
---|
| 980 | <li>The total RAM available on the node.</li> |
---|
| 981 | |
---|
| 982 | <li>The value of <span class="codefrag">mapred.tasktracker.pmem.reserved</span>, |
---|
| 983 | if set.</li> |
---|
| 984 | |
---|
| 985 | </ul> |
---|
| 986 | <a name="N104FA"></a><a name="Slaves"></a> |
---|
| 987 | <h4>Slaves</h4> |
---|
| 988 | <p>Typically you choose one machine in the cluster to act as the |
---|
| 989 | <span class="codefrag">NameNode</span> and one machine as to act as the |
---|
| 990 | <span class="codefrag">JobTracker</span>, exclusively. The rest of the machines act as |
---|
| 991 | both a <span class="codefrag">DataNode</span> and <span class="codefrag">TaskTracker</span> and are |
---|
| 992 | referred to as <em>slaves</em>.</p> |
---|
| 993 | <p>List all slave hostnames or IP addresses in your |
---|
| 994 | <span class="codefrag">conf/slaves</span> file, one per line.</p> |
---|
| 995 | <a name="N10519"></a><a name="Logging"></a> |
---|
| 996 | <h4>Logging</h4> |
---|
| 997 | <p>Hadoop uses the <a href="http://logging.apache.org/log4j/">Apache |
---|
| 998 | log4j</a> via the <a href="http://commons.apache.org/logging/">Apache |
---|
| 999 | Commons Logging</a> framework for logging. Edit the |
---|
| 1000 | <span class="codefrag">conf/log4j.properties</span> file to customize the Hadoop |
---|
| 1001 | daemons' logging configuration (log-formats and so on).</p> |
---|
| 1002 | <a name="N1052D"></a><a name="History+Logging"></a> |
---|
| 1003 | <h5>History Logging</h5> |
---|
| 1004 | <p> The job history files are stored in central location |
---|
| 1005 | <span class="codefrag"> hadoop.job.history.location </span> which can be on DFS also, |
---|
| 1006 | whose default value is <span class="codefrag">${HADOOP_LOG_DIR}/history</span>. |
---|
| 1007 | The history web UI is accessible from job tracker web UI.</p> |
---|
| 1008 | <p> The history files are also logged to user specified directory |
---|
| 1009 | <span class="codefrag">hadoop.job.history.user.location</span> |
---|
| 1010 | which defaults to job output directory. The files are stored in |
---|
| 1011 | "_logs/history/" in the specified directory. Hence, by default |
---|
| 1012 | they will be in "mapred.output.dir/_logs/history/". User can stop |
---|
| 1013 | logging by giving the value <span class="codefrag">none</span> for |
---|
| 1014 | <span class="codefrag">hadoop.job.history.user.location</span> |
---|
| 1015 | </p> |
---|
| 1016 | <p> User can view the history logs summary in specified directory |
---|
| 1017 | using the following command <br> |
---|
| 1018 | |
---|
| 1019 | <span class="codefrag">$ bin/hadoop job -history output-dir</span> |
---|
| 1020 | <br> |
---|
| 1021 | This command will print job details, failed and killed tip |
---|
| 1022 | details. <br> |
---|
| 1023 | More details about the job such as successful tasks and |
---|
| 1024 | task attempts made for each task can be viewed using the |
---|
| 1025 | following command <br> |
---|
| 1026 | |
---|
| 1027 | <span class="codefrag">$ bin/hadoop job -history all output-dir</span> |
---|
| 1028 | <br> |
---|
| 1029 | </p> |
---|
| 1030 | <p>Once all the necessary configuration is complete, distribute the files |
---|
| 1031 | to the <span class="codefrag">HADOOP_CONF_DIR</span> directory on all the machines, |
---|
| 1032 | typically <span class="codefrag">${HADOOP_HOME}/conf</span>.</p> |
---|
| 1033 | </div> |
---|
| 1034 | |
---|
| 1035 | <a name="N10565"></a><a name="Cluster+Restartability"></a> |
---|
| 1036 | <h2 class="h3">Cluster Restartability</h2> |
---|
| 1037 | <div class="section"> |
---|
| 1038 | <a name="N1056B"></a><a name="Map%2FReduce"></a> |
---|
| 1039 | <h3 class="h4">Map/Reduce</h3> |
---|
| 1040 | <p>The job tracker restart can recover running jobs if |
---|
| 1041 | <span class="codefrag">mapred.jobtracker.restart.recover</span> is set true and |
---|
| 1042 | <a href="#Logging">JobHistory logging</a> is enabled. Also |
---|
| 1043 | <span class="codefrag">mapred.jobtracker.job.history.block.size</span> value should be |
---|
| 1044 | set to an optimal value to dump job history to disk as soon as |
---|
| 1045 | possible, the typical value is 3145728(3MB).</p> |
---|
| 1046 | </div> |
---|
| 1047 | |
---|
| 1048 | |
---|
| 1049 | <a name="N10580"></a><a name="Hadoop+Rack+Awareness"></a> |
---|
| 1050 | <h2 class="h3">Hadoop Rack Awareness</h2> |
---|
| 1051 | <div class="section"> |
---|
| 1052 | <p>The HDFS and the Map/Reduce components are rack-aware.</p> |
---|
| 1053 | <p>The <span class="codefrag">NameNode</span> and the <span class="codefrag">JobTracker</span> obtains the |
---|
| 1054 | <span class="codefrag">rack id</span> of the slaves in the cluster by invoking an API |
---|
| 1055 | <a href="api/org/apache/hadoop/net/DNSToSwitchMapping.html#resolve(java.util.List)">resolve</a> in an administrator configured |
---|
| 1056 | module. The API resolves the slave's DNS name (also IP address) to a |
---|
| 1057 | rack id. What module to use can be configured using the configuration |
---|
| 1058 | item <span class="codefrag">topology.node.switch.mapping.impl</span>. The default |
---|
| 1059 | implementation of the same runs a script/command configured using |
---|
| 1060 | <span class="codefrag">topology.script.file.name</span>. If topology.script.file.name is |
---|
| 1061 | not set, the rack id <span class="codefrag">/default-rack</span> is returned for any |
---|
| 1062 | passed IP address. The additional configuration in the Map/Reduce |
---|
| 1063 | part is <span class="codefrag">mapred.cache.task.levels</span> which determines the number |
---|
| 1064 | of levels (in the network topology) of caches. So, for example, if it is |
---|
| 1065 | the default value of 2, two levels of caches will be constructed - |
---|
| 1066 | one for hosts (host -> task mapping) and another for racks |
---|
| 1067 | (rack -> task mapping). |
---|
| 1068 | </p> |
---|
| 1069 | </div> |
---|
| 1070 | |
---|
| 1071 | |
---|
| 1072 | <a name="N105A6"></a><a name="Hadoop+Startup"></a> |
---|
| 1073 | <h2 class="h3">Hadoop Startup</h2> |
---|
| 1074 | <div class="section"> |
---|
| 1075 | <p>To start a Hadoop cluster you will need to start both the HDFS and |
---|
| 1076 | Map/Reduce cluster.</p> |
---|
| 1077 | <p> |
---|
| 1078 | Format a new distributed filesystem:<br> |
---|
| 1079 | |
---|
| 1080 | <span class="codefrag">$ bin/hadoop namenode -format</span> |
---|
| 1081 | |
---|
| 1082 | </p> |
---|
| 1083 | <p> |
---|
| 1084 | Start the HDFS with the following command, run on the designated |
---|
| 1085 | <span class="codefrag">NameNode</span>:<br> |
---|
| 1086 | |
---|
| 1087 | <span class="codefrag">$ bin/start-dfs.sh</span> |
---|
| 1088 | |
---|
| 1089 | </p> |
---|
| 1090 | <p>The <span class="codefrag">bin/start-dfs.sh</span> script also consults the |
---|
| 1091 | <span class="codefrag">${HADOOP_CONF_DIR}/slaves</span> file on the <span class="codefrag">NameNode</span> |
---|
| 1092 | and starts the <span class="codefrag">DataNode</span> daemon on all the listed slaves.</p> |
---|
| 1093 | <p> |
---|
| 1094 | Start Map-Reduce with the following command, run on the designated |
---|
| 1095 | <span class="codefrag">JobTracker</span>:<br> |
---|
| 1096 | |
---|
| 1097 | <span class="codefrag">$ bin/start-mapred.sh</span> |
---|
| 1098 | |
---|
| 1099 | </p> |
---|
| 1100 | <p>The <span class="codefrag">bin/start-mapred.sh</span> script also consults the |
---|
| 1101 | <span class="codefrag">${HADOOP_CONF_DIR}/slaves</span> file on the <span class="codefrag">JobTracker</span> |
---|
| 1102 | and starts the <span class="codefrag">TaskTracker</span> daemon on all the listed slaves. |
---|
| 1103 | </p> |
---|
| 1104 | </div> |
---|
| 1105 | |
---|
| 1106 | |
---|
| 1107 | <a name="N105EC"></a><a name="Hadoop+Shutdown"></a> |
---|
| 1108 | <h2 class="h3">Hadoop Shutdown</h2> |
---|
| 1109 | <div class="section"> |
---|
| 1110 | <p> |
---|
| 1111 | Stop HDFS with the following command, run on the designated |
---|
| 1112 | <span class="codefrag">NameNode</span>:<br> |
---|
| 1113 | |
---|
| 1114 | <span class="codefrag">$ bin/stop-dfs.sh</span> |
---|
| 1115 | |
---|
| 1116 | </p> |
---|
| 1117 | <p>The <span class="codefrag">bin/stop-dfs.sh</span> script also consults the |
---|
| 1118 | <span class="codefrag">${HADOOP_CONF_DIR}/slaves</span> file on the <span class="codefrag">NameNode</span> |
---|
| 1119 | and stops the <span class="codefrag">DataNode</span> daemon on all the listed slaves.</p> |
---|
| 1120 | <p> |
---|
| 1121 | Stop Map/Reduce with the following command, run on the designated |
---|
| 1122 | the designated <span class="codefrag">JobTracker</span>:<br> |
---|
| 1123 | |
---|
| 1124 | <span class="codefrag">$ bin/stop-mapred.sh</span> |
---|
| 1125 | <br> |
---|
| 1126 | |
---|
| 1127 | </p> |
---|
| 1128 | <p>The <span class="codefrag">bin/stop-mapred.sh</span> script also consults the |
---|
| 1129 | <span class="codefrag">${HADOOP_CONF_DIR}/slaves</span> file on the <span class="codefrag">JobTracker</span> |
---|
| 1130 | and stops the <span class="codefrag">TaskTracker</span> daemon on all the listed slaves.</p> |
---|
| 1131 | </div> |
---|
| 1132 | |
---|
| 1133 | </div> |
---|
| 1134 | <!--+ |
---|
| 1135 | |end content |
---|
| 1136 | +--> |
---|
| 1137 | <div class="clearboth"> </div> |
---|
| 1138 | </div> |
---|
| 1139 | <div id="footer"> |
---|
| 1140 | <!--+ |
---|
| 1141 | |start bottomstrip |
---|
| 1142 | +--> |
---|
| 1143 | <div class="lastmodified"> |
---|
| 1144 | <script type="text/javascript"><!-- |
---|
| 1145 | document.write("Last Published: " + document.lastModified); |
---|
| 1146 | // --></script> |
---|
| 1147 | </div> |
---|
| 1148 | <div class="copyright"> |
---|
| 1149 | Copyright © |
---|
| 1150 | 2008 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a> |
---|
| 1151 | </div> |
---|
| 1152 | <!--+ |
---|
| 1153 | |end bottomstrip |
---|
| 1154 | +--> |
---|
| 1155 | </div> |
---|
| 1156 | </body> |
---|
| 1157 | </html> |
---|