[120] | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> |
---|
| 2 | <html> |
---|
| 3 | <head> |
---|
| 4 | <META http-equiv="Content-Type" content="text/html; charset=UTF-8"> |
---|
| 5 | <meta content="Apache Forrest" name="Generator"> |
---|
| 6 | <meta name="Forrest-version" content="0.8"> |
---|
| 7 | <meta name="Forrest-skin-name" content="pelt"> |
---|
| 8 | <title>Hadoop Map/Reduceæçš</title> |
---|
| 9 | <link type="text/css" href="skin/basic.css" rel="stylesheet"> |
---|
| 10 | <link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet"> |
---|
| 11 | <link media="print" type="text/css" href="skin/print.css" rel="stylesheet"> |
---|
| 12 | <link type="text/css" href="skin/profile.css" rel="stylesheet"> |
---|
| 13 | <script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script> |
---|
| 14 | <link rel="shortcut icon" href="images/favicon.ico"> |
---|
| 15 | </head> |
---|
| 16 | <body onload="init()"> |
---|
| 17 | <script type="text/javascript">ndeSetTextSize();</script> |
---|
| 18 | <div id="top"> |
---|
| 19 | <!--+ |
---|
| 20 | |breadtrail |
---|
| 21 | +--> |
---|
| 22 | <div class="breadtrail"> |
---|
| 23 | <a href="http://www.apache.org/">Apache</a> > <a href="http://hadoop.apache.org/">Hadoop</a> > <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script> |
---|
| 24 | </div> |
---|
| 25 | <!--+ |
---|
| 26 | |header |
---|
| 27 | +--> |
---|
| 28 | <div class="header"> |
---|
| 29 | <!--+ |
---|
| 30 | |start group logo |
---|
| 31 | +--> |
---|
| 32 | <div class="grouplogo"> |
---|
| 33 | <a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a> |
---|
| 34 | </div> |
---|
| 35 | <!--+ |
---|
| 36 | |end group logo |
---|
| 37 | +--> |
---|
| 38 | <!--+ |
---|
| 39 | |start Project Logo |
---|
| 40 | +--> |
---|
| 41 | <div class="projectlogo"> |
---|
| 42 | <a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.gif" title="Scalable Computing Platform"></a> |
---|
| 43 | </div> |
---|
| 44 | <!--+ |
---|
| 45 | |end Project Logo |
---|
| 46 | +--> |
---|
| 47 | <!--+ |
---|
| 48 | |start Search |
---|
| 49 | +--> |
---|
| 50 | <div class="searchbox"> |
---|
| 51 | <form action="http://www.google.com/search" method="get" class="roundtopsmall"> |
---|
| 52 | <input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google"> |
---|
| 53 | <input name="Search" value="Search" type="submit"> |
---|
| 54 | </form> |
---|
| 55 | </div> |
---|
| 56 | <!--+ |
---|
| 57 | |end search |
---|
| 58 | +--> |
---|
| 59 | <!--+ |
---|
| 60 | |start Tabs |
---|
| 61 | +--> |
---|
| 62 | <ul id="tabs"> |
---|
| 63 | <li> |
---|
| 64 | <a class="unselected" href="http://hadoop.apache.org/core/">项ç®</a> |
---|
| 65 | </li> |
---|
| 66 | <li> |
---|
| 67 | <a class="unselected" href="http://wiki.apache.org/hadoop">绎åº</a> |
---|
| 68 | </li> |
---|
| 69 | <li class="current"> |
---|
| 70 | <a class="selected" href="index.html">Hadoop 0.18ææ¡£</a> |
---|
| 71 | </li> |
---|
| 72 | </ul> |
---|
| 73 | <!--+ |
---|
| 74 | |end Tabs |
---|
| 75 | +--> |
---|
| 76 | </div> |
---|
| 77 | </div> |
---|
| 78 | <div id="main"> |
---|
| 79 | <div id="publishedStrip"> |
---|
| 80 | <!--+ |
---|
| 81 | |start Subtabs |
---|
| 82 | +--> |
---|
| 83 | <div id="level2tabs"></div> |
---|
| 84 | <!--+ |
---|
| 85 | |end Endtabs |
---|
| 86 | +--> |
---|
| 87 | <script type="text/javascript"><!-- |
---|
| 88 | document.write("Last Published: " + document.lastModified); |
---|
| 89 | // --></script> |
---|
| 90 | </div> |
---|
| 91 | <!--+ |
---|
| 92 | |breadtrail |
---|
| 93 | +--> |
---|
| 94 | <div class="breadtrail"> |
---|
| 95 | |
---|
| 96 | |
---|
| 97 | </div> |
---|
| 98 | <!--+ |
---|
| 99 | |start Menu, mainarea |
---|
| 100 | +--> |
---|
| 101 | <!--+ |
---|
| 102 | |start Menu |
---|
| 103 | +--> |
---|
| 104 | <div id="menu"> |
---|
| 105 | <div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">ææ¡£</div> |
---|
| 106 | <div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;"> |
---|
| 107 | <div class="menuitem"> |
---|
| 108 | <a href="index.html">æŠè¿°</a> |
---|
| 109 | </div> |
---|
| 110 | <div class="menuitem"> |
---|
| 111 | <a href="quickstart.html">å¿«éå
¥éš</a> |
---|
| 112 | </div> |
---|
| 113 | <div class="menuitem"> |
---|
| 114 | <a href="cluster_setup.html">é矀æ建</a> |
---|
| 115 | </div> |
---|
| 116 | <div class="menuitem"> |
---|
| 117 | <a href="hdfs_design.html">HDFSææ¶è®Ÿè®¡</a> |
---|
| 118 | </div> |
---|
| 119 | <div class="menuitem"> |
---|
| 120 | <a href="hdfs_user_guide.html">HDFS䜿çšæå</a> |
---|
| 121 | </div> |
---|
| 122 | <div class="menuitem"> |
---|
| 123 | <a href="hdfs_permissions_guide.html">HDFSæéæå</a> |
---|
| 124 | </div> |
---|
| 125 | <div class="menuitem"> |
---|
| 126 | <a href="hdfs_quota_admin_guide.html">HDFSé
é¢ç®¡çæå</a> |
---|
| 127 | </div> |
---|
| 128 | <div class="menuitem"> |
---|
| 129 | <a href="commands_manual.html">åœä»€æå</a> |
---|
| 130 | </div> |
---|
| 131 | <div class="menuitem"> |
---|
| 132 | <a href="hdfs_shell.html">FS Shell䜿çšæå</a> |
---|
| 133 | </div> |
---|
| 134 | <div class="menuitem"> |
---|
| 135 | <a href="distcp.html">DistCp䜿çšæå</a> |
---|
| 136 | </div> |
---|
| 137 | <div class="menupage"> |
---|
| 138 | <div class="menupagetitle">Map-Reduceæçš</div> |
---|
| 139 | </div> |
---|
| 140 | <div class="menuitem"> |
---|
| 141 | <a href="native_libraries.html">Hadoopæ¬å°åº</a> |
---|
| 142 | </div> |
---|
| 143 | <div class="menuitem"> |
---|
| 144 | <a href="streaming.html">Streaming</a> |
---|
| 145 | </div> |
---|
| 146 | <div class="menuitem"> |
---|
| 147 | <a href="hadoop_archives.html">Hadoop Archives</a> |
---|
| 148 | </div> |
---|
| 149 | <div class="menuitem"> |
---|
| 150 | <a href="hod.html">Hadoop On Demand</a> |
---|
| 151 | </div> |
---|
| 152 | <div class="menuitem"> |
---|
| 153 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/index.html">APIåè</a> |
---|
| 154 | </div> |
---|
| 155 | <div class="menuitem"> |
---|
| 156 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/jdiff/changes.html">API Changes</a> |
---|
| 157 | </div> |
---|
| 158 | <div class="menuitem"> |
---|
| 159 | <a href="http://wiki.apache.org/hadoop/">绎åº</a> |
---|
| 160 | </div> |
---|
| 161 | <div class="menuitem"> |
---|
| 162 | <a href="http://wiki.apache.org/hadoop/FAQ">åžžè§é®é¢</a> |
---|
| 163 | </div> |
---|
| 164 | <div class="menuitem"> |
---|
| 165 | <a href="http://hadoop.apache.org/core/mailing_lists.html">é®ä»¶åè¡š</a> |
---|
| 166 | </div> |
---|
| 167 | <div class="menuitem"> |
---|
| 168 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/releasenotes.html">åè¡è¯Žæ</a> |
---|
| 169 | </div> |
---|
| 170 | <div class="menuitem"> |
---|
| 171 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/changes.html">åæŽæ¥å¿</a> |
---|
| 172 | </div> |
---|
| 173 | </div> |
---|
| 174 | <div id="credit"></div> |
---|
| 175 | <div id="roundbottom"> |
---|
| 176 | <img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div> |
---|
| 177 | <!--+ |
---|
| 178 | |alternative credits |
---|
| 179 | +--> |
---|
| 180 | <div id="credit2"></div> |
---|
| 181 | </div> |
---|
| 182 | <!--+ |
---|
| 183 | |end Menu |
---|
| 184 | +--> |
---|
| 185 | <!--+ |
---|
| 186 | |start content |
---|
| 187 | +--> |
---|
| 188 | <div id="content"> |
---|
| 189 | <div title="Portable Document Format" class="pdflink"> |
---|
| 190 | <a class="dida" href="mapred_tutorial.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br> |
---|
| 191 | PDF</a> |
---|
| 192 | </div> |
---|
| 193 | <h1>Hadoop Map/Reduceæçš</h1> |
---|
| 194 | <div id="minitoc-area"> |
---|
| 195 | <ul class="minitoc"> |
---|
| 196 | <li> |
---|
| 197 | <a href="#%E7%9B%AE%E7%9A%84">ç®ç</a> |
---|
| 198 | </li> |
---|
| 199 | <li> |
---|
| 200 | <a href="#%E5%85%88%E5%86%B3%E6%9D%A1%E4%BB%B6">å
å³æ¡ä»¶</a> |
---|
| 201 | </li> |
---|
| 202 | <li> |
---|
| 203 | <a href="#%E6%A6%82%E8%BF%B0">æŠè¿°</a> |
---|
| 204 | </li> |
---|
| 205 | <li> |
---|
| 206 | <a href="#%E8%BE%93%E5%85%A5%E4%B8%8E%E8%BE%93%E5%87%BA">èŸå
¥äžèŸåº</a> |
---|
| 207 | </li> |
---|
| 208 | <li> |
---|
| 209 | <a href="#%E4%BE%8B%E5%AD%90%EF%BC%9AWordCount+v1.0">äŸåïŒWordCount v1.0</a> |
---|
| 210 | <ul class="minitoc"> |
---|
| 211 | <li> |
---|
| 212 | <a href="#%E6%BA%90%E4%BB%A3%E7%A0%81">æºä»£ç </a> |
---|
| 213 | </li> |
---|
| 214 | <li> |
---|
| 215 | <a href="#%E7%94%A8%E6%B3%95">çšæ³</a> |
---|
| 216 | </li> |
---|
| 217 | <li> |
---|
| 218 | <a href="#%E8%A7%A3%E9%87%8A">解é</a> |
---|
| 219 | </li> |
---|
| 220 | </ul> |
---|
| 221 | </li> |
---|
| 222 | <li> |
---|
| 223 | <a href="#Map%2FReduce+-+%E7%94%A8%E6%88%B7%E7%95%8C%E9%9D%A2">Map/Reduce - çšæ·çé¢</a> |
---|
| 224 | <ul class="minitoc"> |
---|
| 225 | <li> |
---|
| 226 | <a href="#%E6%A0%B8%E5%BF%83%E5%8A%9F%E8%83%BD%E6%8F%8F%E8%BF%B0">æ žå¿åèœæè¿°</a> |
---|
| 227 | <ul class="minitoc"> |
---|
| 228 | <li> |
---|
| 229 | <a href="#Mapper">Mapper</a> |
---|
| 230 | </li> |
---|
| 231 | <li> |
---|
| 232 | <a href="#Reducer">Reducer</a> |
---|
| 233 | </li> |
---|
| 234 | <li> |
---|
| 235 | <a href="#Partitioner">Partitioner</a> |
---|
| 236 | </li> |
---|
| 237 | <li> |
---|
| 238 | <a href="#Reporter">Reporter</a> |
---|
| 239 | </li> |
---|
| 240 | <li> |
---|
| 241 | <a href="#OutputCollector">OutputCollector</a> |
---|
| 242 | </li> |
---|
| 243 | </ul> |
---|
| 244 | </li> |
---|
| 245 | <li> |
---|
| 246 | <a href="#%E4%BD%9C%E4%B8%9A%E9%85%8D%E7%BD%AE">äœäžé
眮</a> |
---|
| 247 | </li> |
---|
| 248 | <li> |
---|
| 249 | <a href="#%E4%BB%BB%E5%8A%A1%E7%9A%84%E6%89%A7%E8%A1%8C%E5%92%8C%E7%8E%AF%E5%A2%83">ä»»å¡çæ§è¡åç¯å¢</a> |
---|
| 250 | </li> |
---|
| 251 | <li> |
---|
| 252 | <a href="#%E4%BD%9C%E4%B8%9A%E7%9A%84%E6%8F%90%E4%BA%A4%E4%B8%8E%E7%9B%91%E6%8E%A7">äœäžçæ亀äžçæ§</a> |
---|
| 253 | <ul class="minitoc"> |
---|
| 254 | <li> |
---|
| 255 | <a href="#%E4%BD%9C%E4%B8%9A%E7%9A%84%E6%8E%A7%E5%88%B6">äœäžçæ§å¶</a> |
---|
| 256 | </li> |
---|
| 257 | </ul> |
---|
| 258 | </li> |
---|
| 259 | <li> |
---|
| 260 | <a href="#%E4%BD%9C%E4%B8%9A%E7%9A%84%E8%BE%93%E5%85%A5">äœäžçèŸå
¥</a> |
---|
| 261 | <ul class="minitoc"> |
---|
| 262 | <li> |
---|
| 263 | <a href="#InputSplit">InputSplit</a> |
---|
| 264 | </li> |
---|
| 265 | <li> |
---|
| 266 | <a href="#RecordReader">RecordReader</a> |
---|
| 267 | </li> |
---|
| 268 | </ul> |
---|
| 269 | </li> |
---|
| 270 | <li> |
---|
| 271 | <a href="#%E4%BD%9C%E4%B8%9A%E7%9A%84%E8%BE%93%E5%87%BA">äœäžçèŸåº</a> |
---|
| 272 | <ul class="minitoc"> |
---|
| 273 | <li> |
---|
| 274 | <a href="#%E4%BB%BB%E5%8A%A1%E7%9A%84Side-Effect+File">ä»»å¡çSide-Effect File</a> |
---|
| 275 | </li> |
---|
| 276 | <li> |
---|
| 277 | <a href="#RecordWriter">RecordWriter</a> |
---|
| 278 | </li> |
---|
| 279 | </ul> |
---|
| 280 | </li> |
---|
| 281 | <li> |
---|
| 282 | <a href="#%E5%85%B6%E4%BB%96%E6%9C%89%E7%94%A8%E7%9A%84%E7%89%B9%E6%80%A7">å
¶ä»æçšçç¹æ§</a> |
---|
| 283 | <ul class="minitoc"> |
---|
| 284 | <li> |
---|
| 285 | <a href="#Counters">Counters</a> |
---|
| 286 | </li> |
---|
| 287 | <li> |
---|
| 288 | <a href="#DistributedCache">DistributedCache</a> |
---|
| 289 | </li> |
---|
| 290 | <li> |
---|
| 291 | <a href="#Tool">Tool</a> |
---|
| 292 | </li> |
---|
| 293 | <li> |
---|
| 294 | <a href="#IsolationRunner">IsolationRunner</a> |
---|
| 295 | </li> |
---|
| 296 | <li> |
---|
| 297 | <a href="#Profiling">Profiling</a> |
---|
| 298 | </li> |
---|
| 299 | <li> |
---|
| 300 | <a href="#%E8%B0%83%E8%AF%95">è°è¯</a> |
---|
| 301 | </li> |
---|
| 302 | <li> |
---|
| 303 | <a href="#JobControl">JobControl</a> |
---|
| 304 | </li> |
---|
| 305 | <li> |
---|
| 306 | <a href="#%E6%95%B0%E6%8D%AE%E5%8E%8B%E7%BC%A9">æ°æ®å猩</a> |
---|
| 307 | </li> |
---|
| 308 | </ul> |
---|
| 309 | </li> |
---|
| 310 | </ul> |
---|
| 311 | </li> |
---|
| 312 | <li> |
---|
| 313 | <a href="#%E4%BE%8B%E5%AD%90%EF%BC%9AWordCount+v2.0">äŸåïŒWordCount v2.0</a> |
---|
| 314 | <ul class="minitoc"> |
---|
| 315 | <li> |
---|
| 316 | <a href="#%E6%BA%90%E4%BB%A3%E7%A0%81-N10DC0">æºä»£ç </a> |
---|
| 317 | </li> |
---|
| 318 | <li> |
---|
| 319 | <a href="#%E8%BF%90%E8%A1%8C%E6%A0%B7%E4%BE%8B">è¿è¡æ ·äŸ</a> |
---|
| 320 | </li> |
---|
| 321 | <li> |
---|
| 322 | <a href="#%E7%A8%8B%E5%BA%8F%E8%A6%81%E7%82%B9">çšåºèŠç¹</a> |
---|
| 323 | </li> |
---|
| 324 | </ul> |
---|
| 325 | </li> |
---|
| 326 | </ul> |
---|
| 327 | </div> |
---|
| 328 | |
---|
| 329 | |
---|
| 330 | <a name="N1000D"></a><a name="%E7%9B%AE%E7%9A%84"></a> |
---|
| 331 | <h2 class="h3">ç®ç</h2> |
---|
| 332 | <div class="section"> |
---|
| 333 | <p>è¿ç¯æçšä»çšæ·çè§åºŠåºåïŒå
šé¢å°ä»ç»äºHadoop Map/Reduceæ¡æ¶çå䞪æ¹é¢ã</p> |
---|
| 334 | </div> |
---|
| 335 | |
---|
| 336 | |
---|
| 337 | <a name="N10017"></a><a name="%E5%85%88%E5%86%B3%E6%9D%A1%E4%BB%B6"></a> |
---|
| 338 | <h2 class="h3">å
å³æ¡ä»¶</h2> |
---|
| 339 | <div class="section"> |
---|
| 340 | <p>请å
确讀Hadoop被æ£ç¡®å®è£
ãé
眮åæ£åžžè¿è¡äžãæŽå€ä¿¡æ¯è§ïŒ</p> |
---|
| 341 | <ul> |
---|
| 342 | |
---|
| 343 | <li> |
---|
| 344 | |
---|
| 345 | <a href="quickstart.html">Hadoopå¿«éå
¥éš</a>对å次䜿çšè
ã |
---|
| 346 | </li> |
---|
| 347 | |
---|
| 348 | <li> |
---|
| 349 | |
---|
| 350 | <a href="cluster_setup.html">Hadoopé矀æ建</a>对倧è§æš¡ååžåŒé矀ã |
---|
| 351 | </li> |
---|
| 352 | |
---|
| 353 | </ul> |
---|
| 354 | </div> |
---|
| 355 | |
---|
| 356 | |
---|
| 357 | <a name="N10032"></a><a name="%E6%A6%82%E8%BF%B0"></a> |
---|
| 358 | <h2 class="h3">æŠè¿°</h2> |
---|
| 359 | <div class="section"> |
---|
| 360 | <p>Hadoop Map/Reduceæ¯äžäžªäœ¿çšç®æç蜯件æ¡æ¶ïŒåºäºå®ååºæ¥çåºçšçšåºèœå€è¿è¡åšç±äžå䞪åçšæºåšç»æç倧åé矀äžïŒå¹¶ä»¥äžç§å¯é 容éçæ¹åŒå¹¶è¡å€çäžT级å«çæ°æ®éã</p> |
---|
| 361 | <p>äžäžªMap/Reduce <em>äœäžïŒjobïŒ</em> éåžžäŒæèŸå
¥çæ°æ®éåå䞺è¥å¹²ç¬ç«çæ°æ®åïŒç± |
---|
| 362 | <em>mapä»»å¡ïŒtaskïŒ</em>以å®å
šå¹¶è¡çæ¹åŒå€çå®ä»¬ãæ¡æ¶äŒå¯¹mapçèŸåºå
è¿è¡æåºïŒ |
---|
| 363 | ç¶åæç»æèŸå
¥ç»<em>reduceä»»å¡</em>ãéåžžäœäžçèŸå
¥åèŸåºéœäŒè¢«ååšåšæ件系ç»äžã |
---|
| 364 | æŽäžªæ¡æ¶èŽèŽ£ä»»å¡çè°åºŠåçæ§ïŒä»¥åéæ°æ§è¡å·²ç»å€±èŽ¥çä»»å¡ã</p> |
---|
| 365 | <p>éåžžïŒMap/Reduceæ¡æ¶å<a href="hdfs_design.html">ååžåŒæ件系ç»</a>æ¯è¿è¡åšäžç»çžåçèç¹äžçïŒä¹å°±æ¯è¯ŽïŒè®¡ç®èç¹åååšèç¹éåžžåšäžèµ·ãè¿ç§é
眮å
讞æ¡æ¶åšé£äºå·²ç»å奜æ°æ®çèç¹äžé«æå°è°åºŠä»»å¡ïŒè¿å¯ä»¥äœ¿æŽäžªé矀ççœç»åžŠå®œè¢«éåžžé«æå°å©çšã</p> |
---|
| 366 | <p>Map/Reduceæ¡æ¶ç±äžäžªåç¬çmaster <span class="codefrag">JobTracker</span> åæ¯äžªé矀èç¹äžäžªslave <span class="codefrag">TaskTracker</span>å
±åç»æãmasterèŽèŽ£è°åºŠææäžäžªäœäžçææä»»å¡ïŒè¿äºä»»å¡ååžåšäžåçslaveäžïŒmasterçæ§å®ä»¬çæ§è¡ïŒéæ°æ§è¡å·²ç»å€±èŽ¥çä»»å¡ãèslaveä»
èŽèŽ£æ§è¡ç±masteræ掟çä»»å¡ã</p> |
---|
| 367 | <p>åºçšçšåºè³å°åºè¯¥ææèŸå
¥/èŸåºçäœçœ®ïŒè·¯åŸïŒïŒå¹¶éè¿å®ç°åéçæ¥å£ææœè±¡ç±»æäŸmapåreduceåœæ°ãåå äžå
¶ä»äœäžçåæ°ïŒå°±ææäº<em>äœäžé
眮ïŒjob configurationïŒ</em>ãç¶åïŒHadoopç <em>job client</em>æ亀äœäžïŒjarå
/å¯æ§è¡çšåºçïŒåé
眮信æ¯ç»<span class="codefrag">JobTracker</span>ïŒåè
èŽèŽ£ååè¿äºèœ¯ä»¶åé
眮信æ¯ç»slaveãè°åºŠä»»å¡å¹¶çæ§å®ä»¬çæ§è¡ïŒåæ¶æäŸç¶æåè¯æä¿¡æ¯ç»job-clientã</p> |
---|
| 368 | <p>èœç¶Hadoopæ¡æ¶æ¯çšJava<sup>TM</sup>å®ç°çïŒäœMap/Reduceåºçšçšåºåäžäžå®èŠçš |
---|
| 369 | Javaæ¥å ã</p> |
---|
| 370 | <ul> |
---|
| 371 | |
---|
| 372 | <li> |
---|
| 373 | |
---|
| 374 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/streaming/package-summary.html"> |
---|
| 375 | Hadoop Streaming</a>æ¯äžç§è¿è¡äœäžçå®çšå·¥å
·ïŒå®å
讞çšæ·å建åè¿è¡ä»»äœå¯æ§è¡çšåº |
---|
| 376 | ïŒäŸåŠïŒShellå·¥å
·ïŒæ¥å䞺mapperåreducerã |
---|
| 377 | </li> |
---|
| 378 | |
---|
| 379 | <li> |
---|
| 380 | |
---|
| 381 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/pipes/package-summary.html"> |
---|
| 382 | Hadoop Pipes</a>æ¯äžäžªäž<a href="http://www.swig.org/">SWIG</a>å
Œå®¹çC++ API |
---|
| 383 | ïŒæ²¡æåºäºJNI<sup>TM</sup>ææ¯ïŒïŒå®ä¹å¯çšäºå®ç°Map/Reduceåºçšçšåºã |
---|
| 384 | </li> |
---|
| 385 | |
---|
| 386 | </ul> |
---|
| 387 | </div> |
---|
| 388 | |
---|
| 389 | |
---|
| 390 | <a name="N10082"></a><a name="%E8%BE%93%E5%85%A5%E4%B8%8E%E8%BE%93%E5%87%BA"></a> |
---|
| 391 | <h2 class="h3">èŸå
¥äžèŸåº</h2> |
---|
| 392 | <div class="section"> |
---|
| 393 | <p>Map/Reduceæ¡æ¶è¿èœ¬åš<span class="codefrag"><key, value></span> é®åŒå¯¹äžïŒä¹å°±æ¯è¯ŽïŒ |
---|
| 394 | æ¡æ¶æäœäžçèŸå
¥ç䞺æ¯äžç»<span class="codefrag"><key, value></span> é®åŒå¯¹ïŒåæ ·ä¹äº§åºäžç» |
---|
| 395 | <span class="codefrag"><key, value></span> é®åŒå¯¹å䞺äœäžçèŸåºïŒè¿äž€ç»é®åŒå¯¹çç±»åå¯èœäžåã</p> |
---|
| 396 | <p>æ¡æ¶éèŠå¯¹<span class="codefrag">key</span>å<span class="codefrag">value</span>çç±»(classes)è¿è¡åºååæäœïŒ |
---|
| 397 | å æ€ïŒè¿äºç±»éèŠå®ç° <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/io/Writable.html">Writable</a>æ¥å£ã |
---|
| 398 | åŠå€ïŒäžºäºæ¹äŸ¿æ¡æ¶æ§è¡æåºæäœïŒ<span class="codefrag">key</span>ç±»å¿
é¡»å®ç° |
---|
| 399 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/io/WritableComparable.html"> |
---|
| 400 | WritableComparable</a>æ¥å£ã |
---|
| 401 | </p> |
---|
| 402 | <p>äžäžªMap/Reduce äœäžçèŸå
¥åèŸåºç±»ååŠäžæ瀺ïŒ</p> |
---|
| 403 | <p> |
---|
| 404 | (input) <span class="codefrag"><k1, v1></span> |
---|
| 405 | -> |
---|
| 406 | <strong>map</strong> |
---|
| 407 | -> |
---|
| 408 | <span class="codefrag"><k2, v2></span> |
---|
| 409 | -> |
---|
| 410 | <strong>combine</strong> |
---|
| 411 | -> |
---|
| 412 | <span class="codefrag"><k2, v2></span> |
---|
| 413 | -> |
---|
| 414 | <strong>reduce</strong> |
---|
| 415 | -> |
---|
| 416 | <span class="codefrag"><k3, v3></span> (output) |
---|
| 417 | </p> |
---|
| 418 | </div> |
---|
| 419 | |
---|
| 420 | |
---|
| 421 | <a name="N100C4"></a><a name="%E4%BE%8B%E5%AD%90%EF%BC%9AWordCount+v1.0"></a> |
---|
| 422 | <h2 class="h3">äŸåïŒWordCount v1.0</h2> |
---|
| 423 | <div class="section"> |
---|
| 424 | <p>åšæ·±å
¥ç»èä¹åïŒè®©æ们å
çäžäžªMap/Reduceçåºçšç€ºäŸïŒä»¥äŸ¿å¯¹å®ä»¬çå·¥äœæ¹åŒæäžäžªåæ¥ç讀è¯ã</p> |
---|
| 425 | <p> |
---|
| 426 | <span class="codefrag">WordCount</span>æ¯äžäžªç®åçåºçšïŒå®å¯ä»¥è®¡ç®åºæå®æ°æ®éäžæ¯äžäžªåè¯åºç°ç次æ°ã</p> |
---|
| 427 | <p>è¿äžªåºçšéçšäº |
---|
| 428 | <a href="quickstart.html#Standalone+Operation">åæºæš¡åŒ</a>ïŒ |
---|
| 429 | <a href="quickstart.html#SingleNodeSetup">䌪ååžåŒæš¡åŒ</a> æ |
---|
| 430 | <a href="quickstart.html#Fully-Distributed+Operation">å®å
šååžåŒæš¡åŒ</a> |
---|
| 431 | äžç§Hadoopå®è£
æ¹åŒã</p> |
---|
| 432 | <a name="N100E1"></a><a name="%E6%BA%90%E4%BB%A3%E7%A0%81"></a> |
---|
| 433 | <h3 class="h4">æºä»£ç </h3> |
---|
| 434 | <table class="ForrestTable" cellspacing="1" cellpadding="4"> |
---|
| 435 | |
---|
| 436 | <tr> |
---|
| 437 | |
---|
| 438 | <th colspan="1" rowspan="1"></th> |
---|
| 439 | <th colspan="1" rowspan="1">WordCount.java</th> |
---|
| 440 | |
---|
| 441 | </tr> |
---|
| 442 | |
---|
| 443 | <tr> |
---|
| 444 | |
---|
| 445 | <td colspan="1" rowspan="1">1.</td> |
---|
| 446 | <td colspan="1" rowspan="1"> |
---|
| 447 | <span class="codefrag">package org.myorg;</span> |
---|
| 448 | </td> |
---|
| 449 | |
---|
| 450 | </tr> |
---|
| 451 | |
---|
| 452 | <tr> |
---|
| 453 | |
---|
| 454 | <td colspan="1" rowspan="1">2.</td> |
---|
| 455 | <td colspan="1" rowspan="1"></td> |
---|
| 456 | |
---|
| 457 | </tr> |
---|
| 458 | |
---|
| 459 | <tr> |
---|
| 460 | |
---|
| 461 | <td colspan="1" rowspan="1">3.</td> |
---|
| 462 | <td colspan="1" rowspan="1"> |
---|
| 463 | <span class="codefrag">import java.io.IOException;</span> |
---|
| 464 | </td> |
---|
| 465 | |
---|
| 466 | </tr> |
---|
| 467 | |
---|
| 468 | <tr> |
---|
| 469 | |
---|
| 470 | <td colspan="1" rowspan="1">4.</td> |
---|
| 471 | <td colspan="1" rowspan="1"> |
---|
| 472 | <span class="codefrag">import java.util.*;</span> |
---|
| 473 | </td> |
---|
| 474 | |
---|
| 475 | </tr> |
---|
| 476 | |
---|
| 477 | <tr> |
---|
| 478 | |
---|
| 479 | <td colspan="1" rowspan="1">5.</td> |
---|
| 480 | <td colspan="1" rowspan="1"></td> |
---|
| 481 | |
---|
| 482 | </tr> |
---|
| 483 | |
---|
| 484 | <tr> |
---|
| 485 | |
---|
| 486 | <td colspan="1" rowspan="1">6.</td> |
---|
| 487 | <td colspan="1" rowspan="1"> |
---|
| 488 | <span class="codefrag">import org.apache.hadoop.fs.Path;</span> |
---|
| 489 | </td> |
---|
| 490 | |
---|
| 491 | </tr> |
---|
| 492 | |
---|
| 493 | <tr> |
---|
| 494 | |
---|
| 495 | <td colspan="1" rowspan="1">7.</td> |
---|
| 496 | <td colspan="1" rowspan="1"> |
---|
| 497 | <span class="codefrag">import org.apache.hadoop.conf.*;</span> |
---|
| 498 | </td> |
---|
| 499 | |
---|
| 500 | </tr> |
---|
| 501 | |
---|
| 502 | <tr> |
---|
| 503 | |
---|
| 504 | <td colspan="1" rowspan="1">8.</td> |
---|
| 505 | <td colspan="1" rowspan="1"> |
---|
| 506 | <span class="codefrag">import org.apache.hadoop.io.*;</span> |
---|
| 507 | </td> |
---|
| 508 | |
---|
| 509 | </tr> |
---|
| 510 | |
---|
| 511 | <tr> |
---|
| 512 | |
---|
| 513 | <td colspan="1" rowspan="1">9.</td> |
---|
| 514 | <td colspan="1" rowspan="1"> |
---|
| 515 | <span class="codefrag">import org.apache.hadoop.mapred.*;</span> |
---|
| 516 | </td> |
---|
| 517 | |
---|
| 518 | </tr> |
---|
| 519 | |
---|
| 520 | <tr> |
---|
| 521 | |
---|
| 522 | <td colspan="1" rowspan="1">10.</td> |
---|
| 523 | <td colspan="1" rowspan="1"> |
---|
| 524 | <span class="codefrag">import org.apache.hadoop.util.*;</span> |
---|
| 525 | </td> |
---|
| 526 | |
---|
| 527 | </tr> |
---|
| 528 | |
---|
| 529 | <tr> |
---|
| 530 | |
---|
| 531 | <td colspan="1" rowspan="1">11.</td> |
---|
| 532 | <td colspan="1" rowspan="1"></td> |
---|
| 533 | |
---|
| 534 | </tr> |
---|
| 535 | |
---|
| 536 | <tr> |
---|
| 537 | |
---|
| 538 | <td colspan="1" rowspan="1">12.</td> |
---|
| 539 | <td colspan="1" rowspan="1"> |
---|
| 540 | <span class="codefrag">public class WordCount {</span> |
---|
| 541 | </td> |
---|
| 542 | |
---|
| 543 | </tr> |
---|
| 544 | |
---|
| 545 | <tr> |
---|
| 546 | |
---|
| 547 | <td colspan="1" rowspan="1">13.</td> |
---|
| 548 | <td colspan="1" rowspan="1"></td> |
---|
| 549 | |
---|
| 550 | </tr> |
---|
| 551 | |
---|
| 552 | <tr> |
---|
| 553 | |
---|
| 554 | <td colspan="1" rowspan="1">14.</td> |
---|
| 555 | <td colspan="1" rowspan="1"> |
---|
| 556 | |
---|
| 557 | <span class="codefrag"> |
---|
| 558 | public static class Map extends MapReduceBase |
---|
| 559 | implements Mapper<LongWritable, Text, Text, IntWritable> { |
---|
| 560 | </span> |
---|
| 561 | </td> |
---|
| 562 | |
---|
| 563 | </tr> |
---|
| 564 | |
---|
| 565 | <tr> |
---|
| 566 | |
---|
| 567 | <td colspan="1" rowspan="1">15.</td> |
---|
| 568 | <td colspan="1" rowspan="1"> |
---|
| 569 | |
---|
| 570 | <span class="codefrag"> |
---|
| 571 | private final static IntWritable one = new IntWritable(1); |
---|
| 572 | </span> |
---|
| 573 | </td> |
---|
| 574 | |
---|
| 575 | </tr> |
---|
| 576 | |
---|
| 577 | <tr> |
---|
| 578 | |
---|
| 579 | <td colspan="1" rowspan="1">16.</td> |
---|
| 580 | <td colspan="1" rowspan="1"> |
---|
| 581 | |
---|
| 582 | <span class="codefrag">private Text word = new Text();</span> |
---|
| 583 | </td> |
---|
| 584 | |
---|
| 585 | </tr> |
---|
| 586 | |
---|
| 587 | <tr> |
---|
| 588 | |
---|
| 589 | <td colspan="1" rowspan="1">17.</td> |
---|
| 590 | <td colspan="1" rowspan="1"></td> |
---|
| 591 | |
---|
| 592 | </tr> |
---|
| 593 | |
---|
| 594 | <tr> |
---|
| 595 | |
---|
| 596 | <td colspan="1" rowspan="1">18.</td> |
---|
| 597 | <td colspan="1" rowspan="1"> |
---|
| 598 | |
---|
| 599 | <span class="codefrag"> |
---|
| 600 | public void map(LongWritable key, Text value, |
---|
| 601 | OutputCollector<Text, IntWritable> output, |
---|
| 602 | Reporter reporter) throws IOException { |
---|
| 603 | </span> |
---|
| 604 | </td> |
---|
| 605 | |
---|
| 606 | </tr> |
---|
| 607 | |
---|
| 608 | <tr> |
---|
| 609 | |
---|
| 610 | <td colspan="1" rowspan="1">19.</td> |
---|
| 611 | <td colspan="1" rowspan="1"> |
---|
| 612 | |
---|
| 613 | <span class="codefrag">String line = value.toString();</span> |
---|
| 614 | </td> |
---|
| 615 | |
---|
| 616 | </tr> |
---|
| 617 | |
---|
| 618 | <tr> |
---|
| 619 | |
---|
| 620 | <td colspan="1" rowspan="1">20.</td> |
---|
| 621 | <td colspan="1" rowspan="1"> |
---|
| 622 | |
---|
| 623 | <span class="codefrag">StringTokenizer tokenizer = new StringTokenizer(line);</span> |
---|
| 624 | </td> |
---|
| 625 | |
---|
| 626 | </tr> |
---|
| 627 | |
---|
| 628 | <tr> |
---|
| 629 | |
---|
| 630 | <td colspan="1" rowspan="1">21.</td> |
---|
| 631 | <td colspan="1" rowspan="1"> |
---|
| 632 | |
---|
| 633 | <span class="codefrag">while (tokenizer.hasMoreTokens()) {</span> |
---|
| 634 | </td> |
---|
| 635 | |
---|
| 636 | </tr> |
---|
| 637 | |
---|
| 638 | <tr> |
---|
| 639 | |
---|
| 640 | <td colspan="1" rowspan="1">22.</td> |
---|
| 641 | <td colspan="1" rowspan="1"> |
---|
| 642 | |
---|
| 643 | <span class="codefrag">word.set(tokenizer.nextToken());</span> |
---|
| 644 | </td> |
---|
| 645 | |
---|
| 646 | </tr> |
---|
| 647 | |
---|
| 648 | <tr> |
---|
| 649 | |
---|
| 650 | <td colspan="1" rowspan="1">23.</td> |
---|
| 651 | <td colspan="1" rowspan="1"> |
---|
| 652 | |
---|
| 653 | <span class="codefrag">output.collect(word, one);</span> |
---|
| 654 | </td> |
---|
| 655 | |
---|
| 656 | </tr> |
---|
| 657 | |
---|
| 658 | <tr> |
---|
| 659 | |
---|
| 660 | <td colspan="1" rowspan="1">24.</td> |
---|
| 661 | <td colspan="1" rowspan="1"> |
---|
| 662 | |
---|
| 663 | <span class="codefrag">}</span> |
---|
| 664 | </td> |
---|
| 665 | |
---|
| 666 | </tr> |
---|
| 667 | |
---|
| 668 | <tr> |
---|
| 669 | |
---|
| 670 | <td colspan="1" rowspan="1">25.</td> |
---|
| 671 | <td colspan="1" rowspan="1"> |
---|
| 672 | |
---|
| 673 | <span class="codefrag">}</span> |
---|
| 674 | </td> |
---|
| 675 | |
---|
| 676 | </tr> |
---|
| 677 | |
---|
| 678 | <tr> |
---|
| 679 | |
---|
| 680 | <td colspan="1" rowspan="1">26.</td> |
---|
| 681 | <td colspan="1" rowspan="1"> |
---|
| 682 | |
---|
| 683 | <span class="codefrag">}</span> |
---|
| 684 | </td> |
---|
| 685 | |
---|
| 686 | </tr> |
---|
| 687 | |
---|
| 688 | <tr> |
---|
| 689 | |
---|
| 690 | <td colspan="1" rowspan="1">27.</td> |
---|
| 691 | <td colspan="1" rowspan="1"></td> |
---|
| 692 | |
---|
| 693 | </tr> |
---|
| 694 | |
---|
| 695 | <tr> |
---|
| 696 | |
---|
| 697 | <td colspan="1" rowspan="1">28.</td> |
---|
| 698 | <td colspan="1" rowspan="1"> |
---|
| 699 | |
---|
| 700 | <span class="codefrag"> |
---|
| 701 | public static class Reduce extends MapReduceBase implements |
---|
| 702 | Reducer<Text, IntWritable, Text, IntWritable> { |
---|
| 703 | </span> |
---|
| 704 | </td> |
---|
| 705 | |
---|
| 706 | </tr> |
---|
| 707 | |
---|
| 708 | <tr> |
---|
| 709 | |
---|
| 710 | <td colspan="1" rowspan="1">29.</td> |
---|
| 711 | <td colspan="1" rowspan="1"> |
---|
| 712 | |
---|
| 713 | <span class="codefrag"> |
---|
| 714 | public void reduce(Text key, Iterator<IntWritable> values, |
---|
| 715 | OutputCollector<Text, IntWritable> output, |
---|
| 716 | Reporter reporter) throws IOException { |
---|
| 717 | </span> |
---|
| 718 | </td> |
---|
| 719 | |
---|
| 720 | </tr> |
---|
| 721 | |
---|
| 722 | <tr> |
---|
| 723 | |
---|
| 724 | <td colspan="1" rowspan="1">30.</td> |
---|
| 725 | <td colspan="1" rowspan="1"> |
---|
| 726 | |
---|
| 727 | <span class="codefrag">int sum = 0;</span> |
---|
| 728 | </td> |
---|
| 729 | |
---|
| 730 | </tr> |
---|
| 731 | |
---|
| 732 | <tr> |
---|
| 733 | |
---|
| 734 | <td colspan="1" rowspan="1">31.</td> |
---|
| 735 | <td colspan="1" rowspan="1"> |
---|
| 736 | |
---|
| 737 | <span class="codefrag">while (values.hasNext()) {</span> |
---|
| 738 | </td> |
---|
| 739 | |
---|
| 740 | </tr> |
---|
| 741 | |
---|
| 742 | <tr> |
---|
| 743 | |
---|
| 744 | <td colspan="1" rowspan="1">32.</td> |
---|
| 745 | <td colspan="1" rowspan="1"> |
---|
| 746 | |
---|
| 747 | <span class="codefrag">sum += values.next().get();</span> |
---|
| 748 | </td> |
---|
| 749 | |
---|
| 750 | </tr> |
---|
| 751 | |
---|
| 752 | <tr> |
---|
| 753 | |
---|
| 754 | <td colspan="1" rowspan="1">33.</td> |
---|
| 755 | <td colspan="1" rowspan="1"> |
---|
| 756 | |
---|
| 757 | <span class="codefrag">}</span> |
---|
| 758 | </td> |
---|
| 759 | |
---|
| 760 | </tr> |
---|
| 761 | |
---|
| 762 | <tr> |
---|
| 763 | |
---|
| 764 | <td colspan="1" rowspan="1">34.</td> |
---|
| 765 | <td colspan="1" rowspan="1"> |
---|
| 766 | |
---|
| 767 | <span class="codefrag">output.collect(key, new IntWritable(sum));</span> |
---|
| 768 | </td> |
---|
| 769 | |
---|
| 770 | </tr> |
---|
| 771 | |
---|
| 772 | <tr> |
---|
| 773 | |
---|
| 774 | <td colspan="1" rowspan="1">35.</td> |
---|
| 775 | <td colspan="1" rowspan="1"> |
---|
| 776 | |
---|
| 777 | <span class="codefrag">}</span> |
---|
| 778 | </td> |
---|
| 779 | |
---|
| 780 | </tr> |
---|
| 781 | |
---|
| 782 | <tr> |
---|
| 783 | |
---|
| 784 | <td colspan="1" rowspan="1">36.</td> |
---|
| 785 | <td colspan="1" rowspan="1"> |
---|
| 786 | |
---|
| 787 | <span class="codefrag">}</span> |
---|
| 788 | </td> |
---|
| 789 | |
---|
| 790 | </tr> |
---|
| 791 | |
---|
| 792 | <tr> |
---|
| 793 | |
---|
| 794 | <td colspan="1" rowspan="1">37.</td> |
---|
| 795 | <td colspan="1" rowspan="1"></td> |
---|
| 796 | |
---|
| 797 | </tr> |
---|
| 798 | |
---|
| 799 | <tr> |
---|
| 800 | |
---|
| 801 | <td colspan="1" rowspan="1">38.</td> |
---|
| 802 | <td colspan="1" rowspan="1"> |
---|
| 803 | |
---|
| 804 | <span class="codefrag"> |
---|
| 805 | public static void main(String[] args) throws Exception { |
---|
| 806 | </span> |
---|
| 807 | </td> |
---|
| 808 | |
---|
| 809 | </tr> |
---|
| 810 | |
---|
| 811 | <tr> |
---|
| 812 | |
---|
| 813 | <td colspan="1" rowspan="1">39.</td> |
---|
| 814 | <td colspan="1" rowspan="1"> |
---|
| 815 | |
---|
| 816 | <span class="codefrag"> |
---|
| 817 | JobConf conf = new JobConf(WordCount.class); |
---|
| 818 | </span> |
---|
| 819 | </td> |
---|
| 820 | |
---|
| 821 | </tr> |
---|
| 822 | |
---|
| 823 | <tr> |
---|
| 824 | |
---|
| 825 | <td colspan="1" rowspan="1">40.</td> |
---|
| 826 | <td colspan="1" rowspan="1"> |
---|
| 827 | |
---|
| 828 | <span class="codefrag">conf.setJobName("wordcount");</span> |
---|
| 829 | </td> |
---|
| 830 | |
---|
| 831 | </tr> |
---|
| 832 | |
---|
| 833 | <tr> |
---|
| 834 | |
---|
| 835 | <td colspan="1" rowspan="1">41.</td> |
---|
| 836 | <td colspan="1" rowspan="1"></td> |
---|
| 837 | |
---|
| 838 | </tr> |
---|
| 839 | |
---|
| 840 | <tr> |
---|
| 841 | |
---|
| 842 | <td colspan="1" rowspan="1">42.</td> |
---|
| 843 | <td colspan="1" rowspan="1"> |
---|
| 844 | |
---|
| 845 | <span class="codefrag">conf.setOutputKeyClass(Text.class);</span> |
---|
| 846 | </td> |
---|
| 847 | |
---|
| 848 | </tr> |
---|
| 849 | |
---|
| 850 | <tr> |
---|
| 851 | |
---|
| 852 | <td colspan="1" rowspan="1">43.</td> |
---|
| 853 | <td colspan="1" rowspan="1"> |
---|
| 854 | |
---|
| 855 | <span class="codefrag">conf.setOutputValueClass(IntWritable.class);</span> |
---|
| 856 | </td> |
---|
| 857 | |
---|
| 858 | </tr> |
---|
| 859 | |
---|
| 860 | <tr> |
---|
| 861 | |
---|
| 862 | <td colspan="1" rowspan="1">44.</td> |
---|
| 863 | <td colspan="1" rowspan="1"></td> |
---|
| 864 | |
---|
| 865 | </tr> |
---|
| 866 | |
---|
| 867 | <tr> |
---|
| 868 | |
---|
| 869 | <td colspan="1" rowspan="1">45.</td> |
---|
| 870 | <td colspan="1" rowspan="1"> |
---|
| 871 | |
---|
| 872 | <span class="codefrag">conf.setMapperClass(Map.class);</span> |
---|
| 873 | </td> |
---|
| 874 | |
---|
| 875 | </tr> |
---|
| 876 | |
---|
| 877 | <tr> |
---|
| 878 | |
---|
| 879 | <td colspan="1" rowspan="1">46.</td> |
---|
| 880 | <td colspan="1" rowspan="1"> |
---|
| 881 | |
---|
| 882 | <span class="codefrag">conf.setCombinerClass(Reduce.class);</span> |
---|
| 883 | </td> |
---|
| 884 | |
---|
| 885 | </tr> |
---|
| 886 | |
---|
| 887 | <tr> |
---|
| 888 | |
---|
| 889 | <td colspan="1" rowspan="1">47.</td> |
---|
| 890 | <td colspan="1" rowspan="1"> |
---|
| 891 | |
---|
| 892 | <span class="codefrag">conf.setReducerClass(Reduce.class);</span> |
---|
| 893 | </td> |
---|
| 894 | |
---|
| 895 | </tr> |
---|
| 896 | |
---|
| 897 | <tr> |
---|
| 898 | |
---|
| 899 | <td colspan="1" rowspan="1">48.</td> |
---|
| 900 | <td colspan="1" rowspan="1"></td> |
---|
| 901 | |
---|
| 902 | </tr> |
---|
| 903 | |
---|
| 904 | <tr> |
---|
| 905 | |
---|
| 906 | <td colspan="1" rowspan="1">49.</td> |
---|
| 907 | <td colspan="1" rowspan="1"> |
---|
| 908 | |
---|
| 909 | <span class="codefrag">conf.setInputFormat(TextInputFormat.class);</span> |
---|
| 910 | </td> |
---|
| 911 | |
---|
| 912 | </tr> |
---|
| 913 | |
---|
| 914 | <tr> |
---|
| 915 | |
---|
| 916 | <td colspan="1" rowspan="1">50.</td> |
---|
| 917 | <td colspan="1" rowspan="1"> |
---|
| 918 | |
---|
| 919 | <span class="codefrag">conf.setOutputFormat(TextOutputFormat.class);</span> |
---|
| 920 | </td> |
---|
| 921 | |
---|
| 922 | </tr> |
---|
| 923 | |
---|
| 924 | <tr> |
---|
| 925 | |
---|
| 926 | <td colspan="1" rowspan="1">51.</td> |
---|
| 927 | <td colspan="1" rowspan="1"></td> |
---|
| 928 | |
---|
| 929 | </tr> |
---|
| 930 | |
---|
| 931 | <tr> |
---|
| 932 | |
---|
| 933 | <td colspan="1" rowspan="1">52.</td> |
---|
| 934 | <td colspan="1" rowspan="1"> |
---|
| 935 | |
---|
| 936 | <span class="codefrag">FileInputFormat.setInputPaths(conf, new Path(args[0]));</span> |
---|
| 937 | </td> |
---|
| 938 | |
---|
| 939 | </tr> |
---|
| 940 | |
---|
| 941 | <tr> |
---|
| 942 | |
---|
| 943 | <td colspan="1" rowspan="1">53.</td> |
---|
| 944 | <td colspan="1" rowspan="1"> |
---|
| 945 | |
---|
| 946 | <span class="codefrag">FileOutputFormat.setOutputPath(conf, new Path(args[1]));</span> |
---|
| 947 | </td> |
---|
| 948 | |
---|
| 949 | </tr> |
---|
| 950 | |
---|
| 951 | <tr> |
---|
| 952 | |
---|
| 953 | <td colspan="1" rowspan="1">54.</td> |
---|
| 954 | <td colspan="1" rowspan="1"></td> |
---|
| 955 | |
---|
| 956 | </tr> |
---|
| 957 | |
---|
| 958 | <tr> |
---|
| 959 | |
---|
| 960 | <td colspan="1" rowspan="1">55.</td> |
---|
| 961 | <td colspan="1" rowspan="1"> |
---|
| 962 | |
---|
| 963 | <span class="codefrag">JobClient.runJob(conf);</span> |
---|
| 964 | </td> |
---|
| 965 | |
---|
| 966 | </tr> |
---|
| 967 | |
---|
| 968 | <tr> |
---|
| 969 | |
---|
| 970 | <td colspan="1" rowspan="1">57.</td> |
---|
| 971 | <td colspan="1" rowspan="1"> |
---|
| 972 | |
---|
| 973 | <span class="codefrag">}</span> |
---|
| 974 | </td> |
---|
| 975 | |
---|
| 976 | </tr> |
---|
| 977 | |
---|
| 978 | <tr> |
---|
| 979 | |
---|
| 980 | <td colspan="1" rowspan="1">58.</td> |
---|
| 981 | <td colspan="1" rowspan="1"> |
---|
| 982 | <span class="codefrag">}</span> |
---|
| 983 | </td> |
---|
| 984 | |
---|
| 985 | </tr> |
---|
| 986 | |
---|
| 987 | <tr> |
---|
| 988 | |
---|
| 989 | <td colspan="1" rowspan="1">59.</td> |
---|
| 990 | <td colspan="1" rowspan="1"></td> |
---|
| 991 | |
---|
| 992 | </tr> |
---|
| 993 | |
---|
| 994 | </table> |
---|
| 995 | <a name="N10463"></a><a name="%E7%94%A8%E6%B3%95"></a> |
---|
| 996 | <h3 class="h4">çšæ³</h3> |
---|
| 997 | <p>å讟ç¯å¢åé<span class="codefrag">HADOOP_HOME</span>对åºå®è£
æ¶çæ ¹ç®åœïŒ<span class="codefrag">HADOOP_VERSION</span>对åºHadoopçåœåå®è£
çæ¬ïŒçŒè¯<span class="codefrag">WordCount.java</span>æ¥å建jarå
ïŒå¯åŠäžæäœïŒ</p> |
---|
| 998 | <p> |
---|
| 999 | |
---|
| 1000 | <span class="codefrag">$ mkdir wordcount_classes</span> |
---|
| 1001 | <br> |
---|
| 1002 | |
---|
| 1003 | <span class="codefrag"> |
---|
| 1004 | $ javac -classpath ${HADOOP_HOME}/hadoop-${HADOOP_VERSION}-core.jar |
---|
| 1005 | -d wordcount_classes WordCount.java |
---|
| 1006 | </span> |
---|
| 1007 | <br> |
---|
| 1008 | |
---|
| 1009 | <span class="codefrag">$ jar -cvf /usr/joe/wordcount.jar -C wordcount_classes/ .</span> |
---|
| 1010 | |
---|
| 1011 | </p> |
---|
| 1012 | <p>å讟ïŒ</p> |
---|
| 1013 | <ul> |
---|
| 1014 | |
---|
| 1015 | <li> |
---|
| 1016 | |
---|
| 1017 | <span class="codefrag">/usr/joe/wordcount/input</span> - æ¯HDFSäžçèŸå
¥è·¯åŸ |
---|
| 1018 | </li> |
---|
| 1019 | |
---|
| 1020 | <li> |
---|
| 1021 | |
---|
| 1022 | <span class="codefrag">/usr/joe/wordcount/output</span> - æ¯HDFSäžçèŸåºè·¯åŸ |
---|
| 1023 | </li> |
---|
| 1024 | |
---|
| 1025 | </ul> |
---|
| 1026 | <p>çšç€ºäŸææ¬æ件å䞺èŸå
¥ïŒ</p> |
---|
| 1027 | <p> |
---|
| 1028 | |
---|
| 1029 | <span class="codefrag">$ bin/hadoop dfs -ls /usr/joe/wordcount/input/</span> |
---|
| 1030 | <br> |
---|
| 1031 | |
---|
| 1032 | <span class="codefrag">/usr/joe/wordcount/input/file01</span> |
---|
| 1033 | <br> |
---|
| 1034 | |
---|
| 1035 | <span class="codefrag">/usr/joe/wordcount/input/file02</span> |
---|
| 1036 | <br> |
---|
| 1037 | |
---|
| 1038 | <br> |
---|
| 1039 | |
---|
| 1040 | <span class="codefrag">$ bin/hadoop dfs -cat /usr/joe/wordcount/input/file01</span> |
---|
| 1041 | <br> |
---|
| 1042 | |
---|
| 1043 | <span class="codefrag">Hello World Bye World</span> |
---|
| 1044 | <br> |
---|
| 1045 | |
---|
| 1046 | <br> |
---|
| 1047 | |
---|
| 1048 | <span class="codefrag">$ bin/hadoop dfs -cat /usr/joe/wordcount/input/file02</span> |
---|
| 1049 | <br> |
---|
| 1050 | |
---|
| 1051 | <span class="codefrag">Hello Hadoop Goodbye Hadoop</span> |
---|
| 1052 | |
---|
| 1053 | </p> |
---|
| 1054 | <p>è¿è¡åºçšçšåºïŒ</p> |
---|
| 1055 | <p> |
---|
| 1056 | |
---|
| 1057 | <span class="codefrag"> |
---|
| 1058 | $ bin/hadoop jar /usr/joe/wordcount.jar org.myorg.WordCount |
---|
| 1059 | /usr/joe/wordcount/input /usr/joe/wordcount/output |
---|
| 1060 | </span> |
---|
| 1061 | |
---|
| 1062 | </p> |
---|
| 1063 | <p>èŸåºæ¯ïŒ</p> |
---|
| 1064 | <p> |
---|
| 1065 | |
---|
| 1066 | <span class="codefrag"> |
---|
| 1067 | $ bin/hadoop dfs -cat /usr/joe/wordcount/output/part-00000 |
---|
| 1068 | </span> |
---|
| 1069 | |
---|
| 1070 | <br> |
---|
| 1071 | |
---|
| 1072 | <span class="codefrag">Bye 1</span> |
---|
| 1073 | <br> |
---|
| 1074 | |
---|
| 1075 | <span class="codefrag">Goodbye 1</span> |
---|
| 1076 | <br> |
---|
| 1077 | |
---|
| 1078 | <span class="codefrag">Hadoop 2</span> |
---|
| 1079 | <br> |
---|
| 1080 | |
---|
| 1081 | <span class="codefrag">Hello 2</span> |
---|
| 1082 | <br> |
---|
| 1083 | |
---|
| 1084 | <span class="codefrag">World 2</span> |
---|
| 1085 | <br> |
---|
| 1086 | |
---|
| 1087 | </p> |
---|
| 1088 | <p> åºçšçšåºèœå€äœ¿çš<span class="codefrag">-files</span>é项æ¥æå®äžäžªç±éå·åéçè·¯åŸåè¡šïŒè¿äºè·¯åŸæ¯taskçåœåå·¥äœç®åœã䜿çšé项<span class="codefrag">-libjars</span>å¯ä»¥åmapåreduceçclasspathäžæ·»å jarå
ã䜿çš<span class="codefrag">-archives</span>é项çšåºå¯ä»¥äŒ éæ¡£æ¡æ件å䞺åæ°ïŒè¿äºæ¡£æ¡æ件äŒè¢«è§£å并äžåštaskçåœåå·¥äœç®åœäžäŒå建äžäžªæå解åçæçç®åœç笊å·éŸæ¥ïŒä»¥å猩å
çåååœåïŒã |
---|
| 1089 | æå
³åœä»€è¡é项çæŽå€ç»è请åè |
---|
| 1090 | <a href="commands_manual.html">Commands manual</a>ã</p> |
---|
| 1091 | <p>䜿çš<span class="codefrag">-libjars</span>å<span class="codefrag">-files</span>è¿è¡<span class="codefrag">wordcount</span>äŸåïŒ<br> |
---|
| 1092 | |
---|
| 1093 | <span class="codefrag"> hadoop jar hadoop-examples.jar wordcount -files cachefile.txt |
---|
| 1094 | -libjars mylib.jar input output </span> |
---|
| 1095 | |
---|
| 1096 | </p> |
---|
| 1097 | <a name="N10504"></a><a name="%E8%A7%A3%E9%87%8A"></a> |
---|
| 1098 | <h3 class="h4">解é</h3> |
---|
| 1099 | <p> |
---|
| 1100 | <span class="codefrag">WordCount</span>åºçšçšåºéåžžçŽæªäºåœã</p> |
---|
| 1101 | <p> |
---|
| 1102 | <span class="codefrag">Mapper</span>(14-26è¡)äžç<span class="codefrag">map</span>æ¹æ³(18-25è¡)éè¿æå®ç |
---|
| 1103 | <span class="codefrag">TextInputFormat</span>(49è¡)äžæ¬¡å€çäžè¡ãç¶åïŒå®éè¿<span class="codefrag">StringTokenizer</span> |
---|
| 1104 | ä»¥ç©ºæ Œäžºåé笊å°äžè¡åå䞺è¥å¹²tokensïŒä¹åïŒèŸåº<span class="codefrag">< <word>, 1></span> |
---|
| 1105 | 圢åŒçé®åŒå¯¹ã</p> |
---|
| 1106 | <p> |
---|
| 1107 | 对äºç€ºäŸäžç第äžäžªèŸå
¥ïŒmapèŸåºæ¯ïŒ<br> |
---|
| 1108 | |
---|
| 1109 | <span class="codefrag">< Hello, 1></span> |
---|
| 1110 | <br> |
---|
| 1111 | |
---|
| 1112 | <span class="codefrag">< World, 1></span> |
---|
| 1113 | <br> |
---|
| 1114 | |
---|
| 1115 | <span class="codefrag">< Bye, 1></span> |
---|
| 1116 | <br> |
---|
| 1117 | |
---|
| 1118 | <span class="codefrag">< World, 1></span> |
---|
| 1119 | <br> |
---|
| 1120 | |
---|
| 1121 | </p> |
---|
| 1122 | <p> |
---|
| 1123 | 第äºäžªèŸå
¥ïŒmapèŸåºæ¯ïŒ<br> |
---|
| 1124 | |
---|
| 1125 | <span class="codefrag">< Hello, 1></span> |
---|
| 1126 | <br> |
---|
| 1127 | |
---|
| 1128 | <span class="codefrag">< Hadoop, 1></span> |
---|
| 1129 | <br> |
---|
| 1130 | |
---|
| 1131 | <span class="codefrag">< Goodbye, 1></span> |
---|
| 1132 | <br> |
---|
| 1133 | |
---|
| 1134 | <span class="codefrag">< Hadoop, 1></span> |
---|
| 1135 | <br> |
---|
| 1136 | |
---|
| 1137 | </p> |
---|
| 1138 | <p>å
³äºç»æäžäžªæå®äœäžçmapæ°ç®çç¡®å®ïŒä»¥ååŠäœä»¥æŽç²Ÿç»çæ¹åŒå»æ§å¶è¿äºmapïŒæ们å°åšæçšçåç»éšååŠä¹ å°æŽå€çå
容ã</p> |
---|
| 1139 | <p> |
---|
| 1140 | <span class="codefrag">WordCount</span>è¿æå®äºäžäžª<span class="codefrag">combiner</span> (46è¡)ãå æ€ïŒæ¯æ¬¡mapè¿è¡ä¹åïŒäŒå¯¹èŸåºæç
§<em>key</em>è¿è¡æåºïŒç¶åæèŸåºäŒ éç»æ¬å°çcombinerïŒæç
§äœäžçé
眮äžReduceräžæ ·ïŒïŒè¿è¡æ¬å°èåã</p> |
---|
| 1141 | <p> |
---|
| 1142 | 第äžäžªmapçèŸåºæ¯ïŒ<br> |
---|
| 1143 | |
---|
| 1144 | <span class="codefrag">< Bye, 1></span> |
---|
| 1145 | <br> |
---|
| 1146 | |
---|
| 1147 | <span class="codefrag">< Hello, 1></span> |
---|
| 1148 | <br> |
---|
| 1149 | |
---|
| 1150 | <span class="codefrag">< World, 2></span> |
---|
| 1151 | <br> |
---|
| 1152 | |
---|
| 1153 | </p> |
---|
| 1154 | <p> |
---|
| 1155 | 第äºäžªmapçèŸåºæ¯ïŒ<br> |
---|
| 1156 | |
---|
| 1157 | <span class="codefrag">< Goodbye, 1></span> |
---|
| 1158 | <br> |
---|
| 1159 | |
---|
| 1160 | <span class="codefrag">< Hadoop, 2></span> |
---|
| 1161 | <br> |
---|
| 1162 | |
---|
| 1163 | <span class="codefrag">< Hello, 1></span> |
---|
| 1164 | <br> |
---|
| 1165 | |
---|
| 1166 | </p> |
---|
| 1167 | <p> |
---|
| 1168 | <span class="codefrag">Reducer</span>(28-36è¡)äžç<span class="codefrag">reduce</span>æ¹æ³(29-35è¡) |
---|
| 1169 | ä»
æ¯å°æ¯äžªkeyïŒæ¬äŸäžå°±æ¯åè¯ïŒåºç°ç次æ°æ±åã |
---|
| 1170 | </p> |
---|
| 1171 | <p> |
---|
| 1172 | å æ€è¿äžªäœäžçèŸåºå°±æ¯ïŒ<br> |
---|
| 1173 | |
---|
| 1174 | <span class="codefrag">< Bye, 1></span> |
---|
| 1175 | <br> |
---|
| 1176 | |
---|
| 1177 | <span class="codefrag">< Goodbye, 1></span> |
---|
| 1178 | <br> |
---|
| 1179 | |
---|
| 1180 | <span class="codefrag">< Hadoop, 2></span> |
---|
| 1181 | <br> |
---|
| 1182 | |
---|
| 1183 | <span class="codefrag">< Hello, 2></span> |
---|
| 1184 | <br> |
---|
| 1185 | |
---|
| 1186 | <span class="codefrag">< World, 2></span> |
---|
| 1187 | <br> |
---|
| 1188 | |
---|
| 1189 | </p> |
---|
| 1190 | <p>代ç äžç<span class="codefrag">run</span>æ¹æ³äžæå®äºäœäžçå 䞪æ¹é¢ïŒ |
---|
| 1191 | äŸåŠïŒéè¿åœä»€è¡äŒ éè¿æ¥çèŸå
¥/èŸåºè·¯åŸãkey/valueçç±»åãèŸå
¥/èŸåºçæ ŒåŒçç<span class="codefrag">JobConf</span>äžçé
眮信æ¯ãéåçšåºè°çšäº<span class="codefrag">JobClient.runJob</span>(55è¡)æ¥æ亀äœäžå¹¶äžçæ§å®çæ§è¡ã</p> |
---|
| 1192 | <p>æ们å°åšæ¬æçšçåç»éšååŠä¹ æŽå€çå
³äº<span class="codefrag">JobConf</span>ïŒ <span class="codefrag">JobClient</span>ïŒ |
---|
| 1193 | <span class="codefrag">Tool</span>åå
¶ä»æ¥å£åç±»(class)ã</p> |
---|
| 1194 | </div> |
---|
| 1195 | |
---|
| 1196 | |
---|
| 1197 | <a name="N105B5"></a><a name="Map%2FReduce+-+%E7%94%A8%E6%88%B7%E7%95%8C%E9%9D%A2"></a> |
---|
| 1198 | <h2 class="h3">Map/Reduce - çšæ·çé¢</h2> |
---|
| 1199 | <div class="section"> |
---|
| 1200 | <p>è¿éšåæ档䞺çšæ·å°äŒé¢äžŽçMap/Reduceæ¡æ¶äžçå䞪ç¯èæäŸäºéåœçç»èãè¿åºè¯¥äŒåž®å©çšæ·æŽç»ç²åºŠå°å»å®ç°ãé
眮åè°äŒäœäžãç¶èïŒè¯·æ³šææ¯äžªç±»/æ¥å£çjavadocææ¡£æäŸæå
šé¢çææ¡£ïŒæ¬æåªæ¯æ³èµ·å°æåçäœçšã |
---|
| 1201 | </p> |
---|
| 1202 | <p>æ们äŒå
çç<span class="codefrag">Mapper</span>å<span class="codefrag">Reducer</span>æ¥å£ãåºçšçšåºéåžžäŒéè¿æäŸ<span class="codefrag">map</span>å<span class="codefrag">reduce</span>æ¹æ³æ¥å®ç°å®ä»¬ã |
---|
| 1203 | </p> |
---|
| 1204 | <p>ç¶åïŒæ们äŒè®šè®ºå
¶ä»çæ žå¿æ¥å£ïŒå
¶äžå
æ¬ïŒ |
---|
| 1205 | <span class="codefrag">JobConf</span>ïŒ<span class="codefrag">JobClient</span>ïŒ<span class="codefrag">Partitioner</span>ïŒ |
---|
| 1206 | <span class="codefrag">OutputCollector</span>ïŒ<span class="codefrag">Reporter</span>ïŒ |
---|
| 1207 | <span class="codefrag">InputFormat</span>ïŒ<span class="codefrag">OutputFormat</span>ççã</p> |
---|
| 1208 | <p>æåïŒæ们å°éè¿è®šè®ºæ¡æ¶äžäžäºæçšçåèœç¹ïŒäŸåŠïŒ<span class="codefrag">DistributedCache</span>ïŒ |
---|
| 1209 | <span class="codefrag">IsolationRunner</span>ççïŒæ¥æ¶å°Ÿã</p> |
---|
| 1210 | <a name="N105EE"></a><a name="%E6%A0%B8%E5%BF%83%E5%8A%9F%E8%83%BD%E6%8F%8F%E8%BF%B0"></a> |
---|
| 1211 | <h3 class="h4">æ žå¿åèœæè¿°</h3> |
---|
| 1212 | <p>åºçšçšåºéåžžäŒéè¿æäŸ<span class="codefrag">map</span>å<span class="codefrag">reduce</span>æ¥å®ç° |
---|
| 1213 | <span class="codefrag">Mapper</span>å<span class="codefrag">Reducer</span>æ¥å£ïŒå®ä»¬ç»æäœäžçæ žå¿ã</p> |
---|
| 1214 | <a name="N10603"></a><a name="Mapper"></a> |
---|
| 1215 | <h4>Mapper</h4> |
---|
| 1216 | <p> |
---|
| 1217 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/Mapper.html"> |
---|
| 1218 | Mapper</a>å°èŸå
¥é®åŒå¯¹(key/value pair)æ å°å°äžç»äžéŽæ ŒåŒçé®åŒå¯¹éåã</p> |
---|
| 1219 | <p>Mapæ¯äžç±»å°èŸå
¥è®°åœé蜬æ¢äžºäžéŽæ ŒåŒè®°åœéçç¬ç«ä»»å¡ã |
---|
| 1220 | è¿ç§èœ¬æ¢çäžéŽæ ŒåŒè®°åœéäžéèŠäžèŸå
¥è®°åœéçç±»åäžèŽãäžäžªç»å®çèŸå
¥é®åŒå¯¹å¯ä»¥æ å°æ0䞪æå€äžªèŸåºé®åŒå¯¹ã</p> |
---|
| 1221 | <p>Hadoop Map/Reduceæ¡æ¶äžºæ¯äžäžª<span class="codefrag">InputSplit</span>产çäžäžªmapä»»å¡ïŒèæ¯äžª<span class="codefrag">InputSplit</span>æ¯ç±è¯¥äœäžç<span class="codefrag">InputFormat</span>产ççã</p> |
---|
| 1222 | <p>æŠæ¬å°è¯ŽïŒå¯¹<span class="codefrag">Mapper</span>çå®ç°è
éèŠéå |
---|
| 1223 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConfigurable.html#configure(org.apache.hadoop.mapred.JobConf)"> |
---|
| 1224 | JobConfigurable.configure(JobConf)</a>æ¹æ³ïŒè¿äžªæ¹æ³éèŠäŒ éäžäžª<span class="codefrag">JobConf</span>åæ°ïŒç®çæ¯å®æMapperçåå§åå·¥äœãç¶åïŒæ¡æ¶äžºè¿äžªä»»å¡ç<span class="codefrag">InputSplit</span>äžæ¯äžªé®åŒå¯¹è°çšäžæ¬¡ |
---|
| 1225 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/Mapper.html#map(K1, V1, org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)"> |
---|
| 1226 | map(WritableComparable, Writable, OutputCollector, Reporter)</a>æäœãåºçšçšåºå¯ä»¥éè¿éå<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/io/Closeable.html#close()">Closeable.close()</a>æ¹æ³æ¥æ§è¡çžåºçæž
çå·¥äœã</p> |
---|
| 1227 | <p>èŸåºé®åŒå¯¹äžéèŠäžèŸå
¥é®åŒå¯¹çç±»åäžèŽãäžäžªç»å®çèŸå
¥é®åŒå¯¹å¯ä»¥æ å°æ0䞪æå€äžªèŸåºé®åŒå¯¹ãéè¿è°çš<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/OutputCollector.html#collect(K, V)"> |
---|
| 1228 | OutputCollector.collect(WritableComparable,Writable)</a>å¯ä»¥æ¶éèŸåºçé®åŒå¯¹ã</p> |
---|
| 1229 | <p>åºçšçšåºå¯ä»¥äœ¿çš<span class="codefrag">Reporter</span>æ¥åè¿åºŠïŒè®Ÿå®åºçšçº§å«çç¶ææ¶æ¯ïŒæŽæ°<span class="codefrag">Counters</span>ïŒè®¡æ°åšïŒïŒæè
ä»
æ¯è¡šæèªå·±è¿è¡æ£åžžã</p> |
---|
| 1230 | <p>æ¡æ¶éåäŒæäžäžäžªç¹å®keyå
³èçææäžéŽè¿çšçåŒïŒvalueïŒåæç»ïŒç¶åæå®ä»¬äŒ ç»<span class="codefrag">Reducer</span>以产åºæç»çç»æãçšæ·å¯ä»¥éè¿ |
---|
| 1231 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setOutputKeyComparatorClass(java.lang.Class)"> |
---|
| 1232 | JobConf.setOutputKeyComparatorClass(Class)</a>æ¥æå®å
·äœèŽèŽ£åç»ç |
---|
| 1233 | <span class="codefrag">Comparator</span>ã</p> |
---|
| 1234 | <p> |
---|
| 1235 | <span class="codefrag">Mapper</span>çèŸåºè¢«æåºåïŒå°±è¢«ååç»æ¯äžª<span class="codefrag">Reducer</span>ãååçæ»æ°ç®åäžäžªäœäžçreduceä»»å¡çæ°ç®æ¯äžæ ·çãçšæ·å¯ä»¥éè¿å®ç°èªå®ä¹ç <span class="codefrag">Partitioner</span>æ¥æ§å¶åªäžªkey被åé
ç»åªäžª <span class="codefrag">Reducer</span>ã</p> |
---|
| 1236 | <p>çšæ·å¯éæ©éè¿<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setCombinerClass(java.lang.Class)"> |
---|
| 1237 | JobConf.setCombinerClass(Class)</a>æå®äžäžª<span class="codefrag">combiner</span>ïŒå®èŽèŽ£å¯¹äžéŽè¿çšçèŸåºè¿è¡æ¬å°çèéïŒè¿äŒæå©äºéäœä»<span class="codefrag">Mapper</span>å° |
---|
| 1238 | <span class="codefrag">Reducer</span>æ°æ®äŒ èŸéã |
---|
| 1239 | </p> |
---|
| 1240 | <p>è¿äºè¢«æ奜åºçäžéŽè¿çšçèŸåºç»æä¿åçæ ŒåŒæ¯(key-len, key, value-len, value)ïŒåºçšçšåºå¯ä»¥éè¿<span class="codefrag">JobConf</span>æ§å¶å¯¹è¿äºäžéŽç»ææ¯åŠè¿è¡å猩以åæä¹å猩ïŒäœ¿çšåªç§<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/io/compress/CompressionCodec.html"> |
---|
| 1241 | CompressionCodec</a>ã |
---|
| 1242 | </p> |
---|
| 1243 | <a name="N1067B"></a><a name="%E9%9C%80%E8%A6%81%E5%A4%9A%E5%B0%91%E4%B8%AAMap%EF%BC%9F"></a> |
---|
| 1244 | <h5>éèŠå€å°äžªMapïŒ</h5> |
---|
| 1245 | <p>Mapçæ°ç®éåžžæ¯ç±èŸå
¥æ°æ®ç倧å°å³å®çïŒäžè¬å°±æ¯ææèŸå
¥æ件çæ»åïŒblockïŒæ°ã</p> |
---|
| 1246 | <p>Mapæ£åžžç并è¡è§æš¡å€§èŽæ¯æ¯äžªèç¹ïŒnodeïŒå€§çºŠ10å°100䞪mapïŒå¯¹äºCPU |
---|
| 1247 | æ¶èèŸå°çmapä»»å¡å¯ä»¥è®Ÿå°300䞪巊å³ãç±äºæ¯äžªä»»å¡åå§åéèŠäžå®çæ¶éŽïŒå æ€ïŒæ¯èŸåççæ
åµæ¯mapæ§è¡çæ¶éŽè³å°è¶
è¿1åéã</p> |
---|
| 1248 | <p>è¿æ ·ïŒåŠæäœ èŸå
¥10TBçæ°æ®ïŒæ¯äžªåïŒblockïŒç倧å°æ¯128MBïŒäœ å°éèŠå€§çºŠ82,000䞪mapæ¥å®æä»»å¡ïŒé€éäœ¿çš |
---|
| 1249 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setNumMapTasks(int)"> |
---|
| 1250 | setNumMapTasks(int)</a>ïŒæ³šæïŒè¿éä»
ä»
æ¯å¯¹æ¡æ¶è¿è¡äºäžäžªæ瀺(hint)ïŒå®é
å³å®å çŽ è§<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setNumMapTasks(int)">è¿é</a>ïŒå°è¿äžªæ°åŒè®Ÿçœ®åŸæŽé«ã</p> |
---|
| 1251 | <a name="N10694"></a><a name="Reducer"></a> |
---|
| 1252 | <h4>Reducer</h4> |
---|
| 1253 | <p> |
---|
| 1254 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/Reducer.html"> |
---|
| 1255 | Reducer</a>å°äžäžäžªkeyå
³èçäžç»äžéŽæ°åŒéåœçºŠïŒreduceïŒäžºäžäžªæŽå°çæ°åŒéã</p> |
---|
| 1256 | <p>çšæ·å¯ä»¥éè¿<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setNumReduceTasks(int)"> |
---|
| 1257 | JobConf.setNumReduceTasks(int)</a>讟å®äžäžªäœäžäžreduceä»»å¡çæ°ç®ã</p> |
---|
| 1258 | <p>æŠæ¬å°è¯ŽïŒå¯¹<span class="codefrag">Reducer</span>çå®ç°è
éèŠéå |
---|
| 1259 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConfigurable.html#configure(org.apache.hadoop.mapred.JobConf)"> |
---|
| 1260 | JobConfigurable.configure(JobConf)</a>æ¹æ³ïŒè¿äžªæ¹æ³éèŠäŒ éäžäžª<span class="codefrag">JobConf</span>åæ°ïŒç®çæ¯å®æReducerçåå§åå·¥äœãç¶åïŒæ¡æ¶äžºæç»çèŸå
¥æ°æ®äžçæ¯äžª<span class="codefrag"><key, (list of values)></span>对è°çšäžæ¬¡ |
---|
| 1261 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/Reducer.html#reduce(K2, java.util.Iterator, org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)"> |
---|
| 1262 | reduce(WritableComparable, Iterator, OutputCollector, Reporter)</a>æ¹æ³ãä¹åïŒåºçšçšåºå¯ä»¥éè¿éå<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/io/Closeable.html#close()">Closeable.close()</a>æ¥æ§è¡çžåºçæž
çå·¥äœã</p> |
---|
| 1263 | <p> |
---|
| 1264 | <span class="codefrag">Reducer</span>æ3䞪䞻èŠé¶æ®µïŒshuffleãsortåreduceã |
---|
| 1265 | </p> |
---|
| 1266 | <a name="N106C4"></a><a name="Shuffle"></a> |
---|
| 1267 | <h5>Shuffle</h5> |
---|
| 1268 | <p> |
---|
| 1269 | <span class="codefrag">Reducer</span>çèŸå
¥å°±æ¯Mapperå·²ç»æ奜åºçèŸåºãåšè¿äžªé¶æ®µïŒæ¡æ¶éè¿HTTP䞺æ¯äžªReducerè·åŸææMapperèŸåºäžäžä¹çžå
³çååã</p> |
---|
| 1270 | <a name="N106D0"></a><a name="Sort"></a> |
---|
| 1271 | <h5>Sort</h5> |
---|
| 1272 | <p>è¿äžªé¶æ®µïŒæ¡æ¶å°æç
§keyçåŒå¯¹<span class="codefrag">Reducer</span>çèŸå
¥è¿è¡åç» |
---|
| 1273 | ïŒå 䞺äžåmapperçèŸåºäžå¯èœäŒæçžåçkeyïŒã</p> |
---|
| 1274 | <p>ShuffleåSort䞀䞪é¶æ®µæ¯åæ¶è¿è¡çïŒmapçèŸåºä¹æ¯äžèŸ¹è¢«ååäžèŸ¹è¢«å并çã</p> |
---|
| 1275 | <a name="N106DF"></a><a name="Secondary+Sort"></a> |
---|
| 1276 | <h5>Secondary Sort</h5> |
---|
| 1277 | <p>åŠæéèŠäžéŽè¿çšå¯¹keyçåç»è§ååreduceå对keyçåç»è§åäžåïŒé£ä¹å¯ä»¥éè¿<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setOutputValueGroupingComparator(java.lang.Class)"> |
---|
| 1278 | JobConf.setOutputValueGroupingComparator(Class)</a>æ¥æå®äžäžª<span class="codefrag">Comparator</span>ãåå äž |
---|
| 1279 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setOutputKeyComparatorClass(java.lang.Class)"> |
---|
| 1280 | JobConf.setOutputKeyComparatorClass(Class)</a>å¯çšäºæ§å¶äžéŽè¿çšçkeyåŠäœè¢«åç»ïŒæ以ç»å䞀è
å¯ä»¥å®ç°<em>æåŒçäºæ¬¡æåº</em>ã |
---|
| 1281 | </p> |
---|
| 1282 | <a name="N106F8"></a><a name="Reduce"></a> |
---|
| 1283 | <h5>Reduce</h5> |
---|
| 1284 | <p>åšè¿äžªé¶æ®µïŒæ¡æ¶äžºå·²åç»çèŸå
¥æ°æ®äžçæ¯äžª |
---|
| 1285 | <span class="codefrag"><key, (list of values)></span>对è°çšäžæ¬¡ |
---|
| 1286 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/Reducer.html#reduce(K2, java.util.Iterator, org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)"> |
---|
| 1287 | reduce(WritableComparable, Iterator, OutputCollector, Reporter)</a>æ¹æ³ã</p> |
---|
| 1288 | <p>Reduceä»»å¡çèŸåºéåžžæ¯éè¿è°çš |
---|
| 1289 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/OutputCollector.html#collect(K, V)"> |
---|
| 1290 | OutputCollector.collect(WritableComparable, Writable)</a>åå
¥ |
---|
| 1291 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/fs/FileSystem.html"> |
---|
| 1292 | æ件系ç»</a>çã</p> |
---|
| 1293 | <p>åºçšçšåºå¯ä»¥äœ¿çš<span class="codefrag">Reporter</span>æ¥åè¿åºŠïŒè®Ÿå®åºçšçšåºçº§å«çç¶ææ¶æ¯ïŒæŽæ°<span class="codefrag">Counters</span>ïŒè®¡æ°åšïŒïŒæè
ä»
æ¯è¡šæèªå·±è¿è¡æ£åžžã</p> |
---|
| 1294 | <p> |
---|
| 1295 | <span class="codefrag">Reducer</span>çèŸåºæ¯<em>没ææåºç</em>ã</p> |
---|
| 1296 | <a name="N10725"></a><a name="%E9%9C%80%E8%A6%81%E5%A4%9A%E5%B0%91%E4%B8%AAReduce%EF%BC%9F"></a> |
---|
| 1297 | <h5>éèŠå€å°äžªReduceïŒ</h5> |
---|
| 1298 | <p>Reduceçæ°ç®å»ºè®®æ¯<span class="codefrag">0.95</span>æ<span class="codefrag">1.75</span>ä¹ä»¥ |
---|
| 1299 | (<<em>no. of nodes</em>> * |
---|
| 1300 | <span class="codefrag">mapred.tasktracker.reduce.tasks.maximum</span>)ã |
---|
| 1301 | </p> |
---|
| 1302 | <p>çš0.95ïŒææreduceå¯ä»¥åšmapsäžå®ææ¶å°±ç«å»å¯åšïŒåŒå§äŒ èŸmapçèŸåºç»æãçš1.75ïŒé床快çèç¹å¯ä»¥åšå®æ第äžèœ®reduceä»»å¡åïŒå¯ä»¥åŒå§ç¬¬äºèœ®ïŒè¿æ ·å¯ä»¥åŸå°æ¯èŸå¥œçèŽèœœåè¡¡çææã</p> |
---|
| 1303 | <p>å¢å reduceçæ°ç®äŒå¢å æŽäžªæ¡æ¶çåŒéïŒäœå¯ä»¥æ¹åèŽèœœåè¡¡ïŒéäœç±äºæ§è¡å€±èŽ¥åžŠæ¥çèŽé¢åœ±åã</p> |
---|
| 1304 | <p>äžè¿°æ¯äŸå åæ¯æŽäœæ°ç®çšå°äžäºæ¯äžºäºç»æ¡æ¶äžçæšæµæ§ä»»å¡ïŒspeculative-tasksïŒ |
---|
| 1305 | æ倱莥çä»»å¡é¢çäžäºreduceçèµæºã</p> |
---|
| 1306 | <a name="N10744"></a><a name="%E6%97%A0Reducer"></a> |
---|
| 1307 | <h5>æ Reducer</h5> |
---|
| 1308 | <p>åŠæ没æåœçºŠèŠè¿è¡ïŒé£ä¹è®Ÿçœ®reduceä»»å¡çæ°ç®äžº<em>é¶</em>æ¯åæ³çã</p> |
---|
| 1309 | <p>è¿ç§æ
åµäžïŒmapä»»å¡çèŸåºäŒçŽæ¥è¢«åå
¥ç± |
---|
| 1310 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/FileOutputFormat.html#setOutputPath(org.apache.hadoop.mapred.JobConf,%20org.apache.hadoop.fs.Path)"> |
---|
| 1311 | setOutputPath(Path)</a>æå®çèŸåºè·¯åŸãæ¡æ¶åšæå®ä»¬åå
¥<span class="codefrag">FileSystem</span>ä¹å没æ对å®ä»¬è¿è¡æåºã |
---|
| 1312 | </p> |
---|
| 1313 | <a name="N1075C"></a><a name="Partitioner"></a> |
---|
| 1314 | <h4>Partitioner</h4> |
---|
| 1315 | <p> |
---|
| 1316 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/Partitioner.html"> |
---|
| 1317 | Partitioner</a>çšäºååé®åŒç©ºéŽïŒkey spaceïŒã</p> |
---|
| 1318 | <p>PartitionerèŽèŽ£æ§å¶mapèŸåºç»ækeyçåå²ãKeyïŒæè
äžäžªkeyåéïŒè¢«çšäºäº§çååºïŒé垞䜿çšçæ¯Hashåœæ°ãååºçæ°ç®äžäžäžªäœäžçreduceä»»å¡çæ°ç®æ¯äžæ ·çãå æ€ïŒå®æ§å¶å°äžéŽè¿çšçkeyïŒä¹å°±æ¯è¿æ¡è®°åœïŒåºè¯¥åéç»<span class="codefrag">m</span>䞪reduceä»»å¡äžçåªäžäžªæ¥è¿è¡reduceæäœã |
---|
| 1319 | </p> |
---|
| 1320 | <p> |
---|
| 1321 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/lib/HashPartitioner.html"> |
---|
| 1322 | HashPartitioner</a>æ¯é»è®€ç <span class="codefrag">Partitioner</span>ã </p> |
---|
| 1323 | <a name="N10778"></a><a name="Reporter"></a> |
---|
| 1324 | <h4>Reporter</h4> |
---|
| 1325 | <p> |
---|
| 1326 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/Reporter.html"> |
---|
| 1327 | Reporter</a>æ¯çšäºMap/Reduceåºçšçšåºæ¥åè¿åºŠïŒè®Ÿå®åºçšçº§å«çç¶ææ¶æ¯ïŒ |
---|
| 1328 | æŽæ°<span class="codefrag">Counters</span>ïŒè®¡æ°åšïŒçæºå¶ã</p> |
---|
| 1329 | <p> |
---|
| 1330 | <span class="codefrag">Mapper</span>å<span class="codefrag">Reducer</span>çå®ç°å¯ä»¥å©çš<span class="codefrag">Reporter</span> |
---|
| 1331 | æ¥æ¥åè¿åºŠïŒæè
ä»
æ¯è¡šæèªå·±è¿è¡æ£åžžãåšé£ç§åºçšçšåºéèŠè±åŸé¿æ¶éŽå€ç䞪å«é®åŒå¯¹çåºæ¯äžïŒè¿ç§æºå¶æ¯åŸå
³é®çïŒå 䞺æ¡æ¶å¯èœäŒä»¥äžºè¿äžªä»»å¡è¶
æ¶äºïŒä»èå°å®åŒºè¡ææ»ãåŠäžäžªé¿å
è¿ç§æ
åµåççæ¹åŒæ¯ïŒå°é
眮åæ°<span class="codefrag">mapred.task.timeout</span>讟眮䞺äžäžªè¶³å€é«çåŒïŒæè
å¹²è讟眮䞺é¶ïŒå没æè¶
æ¶éå¶äºïŒã |
---|
| 1332 | </p> |
---|
| 1333 | <p>åºçšçšåºå¯ä»¥çš<span class="codefrag">Reporter</span>æ¥æŽæ°<span class="codefrag">Counter</span>ïŒè®¡æ°åšïŒã |
---|
| 1334 | </p> |
---|
| 1335 | <a name="N1079F"></a><a name="OutputCollector"></a> |
---|
| 1336 | <h4>OutputCollector</h4> |
---|
| 1337 | <p> |
---|
| 1338 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/OutputCollector.html"> |
---|
| 1339 | OutputCollector</a>æ¯äžäžªMap/Reduceæ¡æ¶æäŸççšäºæ¶é |
---|
| 1340 | <span class="codefrag">Mapper</span>æ<span class="codefrag">Reducer</span>èŸåºæ°æ®çéçšæºå¶ |
---|
| 1341 | ïŒå
æ¬äžéŽèŸåºç»æåäœäžçèŸåºç»æïŒã</p> |
---|
| 1342 | <p>Hadoop Map/Reduceæ¡æ¶é垊äºäžäžªå
å«è®žå€å®çšåçmapperãreduceråpartitioner |
---|
| 1343 | ç<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/lib/package-summary.html">ç±»åº</a>ã</p> |
---|
| 1344 | <a name="N107BA"></a><a name="%E4%BD%9C%E4%B8%9A%E9%85%8D%E7%BD%AE"></a> |
---|
| 1345 | <h3 class="h4">äœäžé
眮</h3> |
---|
| 1346 | <p> |
---|
| 1347 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html"> |
---|
| 1348 | JobConf</a>代衚äžäžªMap/Reduceäœäžçé
眮ã</p> |
---|
| 1349 | <p> |
---|
| 1350 | <span class="codefrag">JobConf</span>æ¯çšæ·åHadoopæ¡æ¶æè¿°äžäžªMap/ReduceäœäžåŠäœæ§è¡çäž»èŠæ¥å£ãæ¡æ¶äŒæç
§<span class="codefrag">JobConf</span>æè¿°çä¿¡æ¯å¿ å®å°å»å°è¯å®æè¿äžªäœäžïŒç¶èïŒ</p> |
---|
| 1351 | <ul> |
---|
| 1352 | |
---|
| 1353 | <li> |
---|
| 1354 | äžäºåæ°å¯èœäŒè¢«ç®¡çè
æ 记䞺<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/conf/Configuration.html#FinalParams"> |
---|
| 1355 | final</a>ïŒè¿æå³å®ä»¬äžèœè¢«æŽæ¹ã |
---|
| 1356 | </li> |
---|
| 1357 | |
---|
| 1358 | <li> |
---|
| 1359 | äžäºäœäžçåæ°å¯ä»¥è¢«çŽæªäºåœå°è¿è¡è®Ÿçœ®ïŒäŸåŠïŒ |
---|
| 1360 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setNumReduceTasks(int)"> |
---|
| 1361 | setNumReduceTasks(int)</a>ïŒïŒèåŠäžäºåæ°åäžæ¡æ¶æè
äœäžçå
¶ä»åæ°ä¹éŽåŸ®åŠå°çžäºåœ±åïŒå¹¶äžè®Ÿçœ®èµ·æ¥æ¯èŸå€æïŒäŸåŠïŒ<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setNumMapTasks(int)"> |
---|
| 1362 | setNumMapTasks(int)</a>ïŒã |
---|
| 1363 | </li> |
---|
| 1364 | |
---|
| 1365 | </ul> |
---|
| 1366 | <p>éåžžïŒ<span class="codefrag">JobConf</span>äŒææ<span class="codefrag">Mapper</span>ãCombiner(åŠææçè¯)ã |
---|
| 1367 | <span class="codefrag">Partitioner</span>ã<span class="codefrag">Reducer</span>ã<span class="codefrag">InputFormat</span>å |
---|
| 1368 | <span class="codefrag">OutputFormat</span>çå
·äœå®ç°ã<span class="codefrag">JobConf</span>è¿èœæå®äžç»èŸå
¥æ件 |
---|
| 1369 | (<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/FileInputFormat.html#setInputPaths(org.apache.hadoop.mapred.JobConf,%20org.apache.hadoop.fs.Path[])">setInputPaths(JobConf, Path...)</a> |
---|
| 1370 | /<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/FileInputFormat.html#addInputPath(org.apache.hadoop.mapred.JobConf,%20org.apache.hadoop.fs.Path)">addInputPath(JobConf, Path)</a>) |
---|
| 1371 | å(<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/FileInputFormat.html#setInputPaths(org.apache.hadoop.mapred.JobConf,%20java.lang.String)">setInputPaths(JobConf, String)</a> |
---|
| 1372 | /<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/FileInputFormat.html#addInputPath(org.apache.hadoop.mapred.JobConf,%20java.lang.String)">addInputPaths(JobConf, String)</a>) |
---|
| 1373 | 以åèŸåºæ件åºè¯¥ååšåªå¿ |
---|
| 1374 | (<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/FileOutputFormat.html#setOutputPath(org.apache.hadoop.mapred.JobConf,%20org.apache.hadoop.fs.Path)">setOutputPath(Path)</a>)ã</p> |
---|
| 1375 | <p> |
---|
| 1376 | <span class="codefrag">JobConf</span>å¯éæ©å°å¯¹äœäžè®Ÿçœ®äžäºé«çº§é项ïŒäŸåŠïŒè®Ÿçœ®<span class="codefrag">Comparator</span>ïŒ |
---|
| 1377 | æŸå°<span class="codefrag">DistributedCache</span>äžçæ件ïŒäžéŽç»ææè
äœäžèŸåºç»ææ¯åŠéèŠå猩以åæä¹åçŒ©ïŒ |
---|
| 1378 | å©çšçšæ·æäŸçèæ¬(<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setMapDebugScript(java.lang.String)">setMapDebugScript(String)</a>/<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setReduceDebugScript(java.lang.String)">setReduceDebugScript(String)</a>) |
---|
| 1379 | è¿è¡è°è¯ïŒäœäžæ¯åŠå
讞<em>é¢é²æ§ïŒspeculativeïŒ</em>ä»»å¡çæ§è¡ |
---|
| 1380 | (<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setMapSpeculativeExecution(boolean)">setMapSpeculativeExecution(boolean)</a>)/(<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setReduceSpeculativeExecution(boolean)">setReduceSpeculativeExecution(boolean)</a>) |
---|
| 1381 | ïŒæ¯äžªä»»å¡æ倧çå°è¯æ¬¡æ° |
---|
| 1382 | (<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setMaxMapAttempts(int)">setMaxMapAttempts(int)</a>/<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setMaxReduceAttempts(int)">setMaxReduceAttempts(int)</a>) |
---|
| 1383 | ïŒäžäžªäœäžèœå®¹å¿çä»»å¡å€±èŽ¥ççŸåæ¯ |
---|
| 1384 | (<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setMaxMapTaskFailuresPercent(int)">setMaxMapTaskFailuresPercent(int)</a>/<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setMaxReduceTaskFailuresPercent(int)">setMaxReduceTaskFailuresPercent(int)</a>) |
---|
| 1385 | ïŒççã</p> |
---|
| 1386 | <p>åœç¶ïŒçšæ·èœäœ¿çš |
---|
| 1387 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/conf/Configuration.html#set(java.lang.String, java.lang.String)">set(String, String)</a>/<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/conf/Configuration.html#get(java.lang.String, java.lang.String)">get(String, String)</a> |
---|
| 1388 | æ¥è®Ÿçœ®æè
ååŸåºçšçšåºéèŠçä»»æåæ°ãç¶èïŒ<span class="codefrag">DistributedCache</span>ç䜿çšæ¯é¢å倧è§æš¡åªè¯»æ°æ®çã</p> |
---|
| 1389 | <a name="N1084C"></a><a name="%E4%BB%BB%E5%8A%A1%E7%9A%84%E6%89%A7%E8%A1%8C%E5%92%8C%E7%8E%AF%E5%A2%83"></a> |
---|
| 1390 | <h3 class="h4">ä»»å¡çæ§è¡åç¯å¢</h3> |
---|
| 1391 | <p> |
---|
| 1392 | <span class="codefrag">TaskTracker</span>æ¯åšäžäžªåç¬çjvmäžä»¥åè¿çšç圢åŒæ§è¡ |
---|
| 1393 | <span class="codefrag">Mapper</span>/<span class="codefrag">Reducer</span>ä»»å¡ïŒTaskïŒçã |
---|
| 1394 | </p> |
---|
| 1395 | <p>åä»»å¡äŒç»§æ¿ç¶<span class="codefrag">TaskTracker</span>çç¯å¢ãçšæ·å¯ä»¥éè¿JobConfäžç |
---|
| 1396 | <span class="codefrag">mapred.child.java.opts</span>é
眮åæ°æ¥è®Ÿå®åjvmäžçéå é项ïŒäŸåŠïŒ |
---|
| 1397 | éè¿<span class="codefrag">-Djava.library.path=<></span> å°äžäžªéæ åè·¯åŸè®Ÿäžºè¿è¡æ¶çéŸæ¥çšä»¥æ玢å
±äº«åºïŒççãåŠæ<span class="codefrag">mapred.child.java.opts</span>å
å«äžäžªç¬Šå·<em>@taskid@</em>ïŒ |
---|
| 1398 | å®äŒè¢«æ¿æ¢æmap/reduceçtaskidçåŒã</p> |
---|
| 1399 | <p>äžé¢æ¯äžäžªå
å«å€äžªåæ°åæ¿æ¢çäŸåïŒå
¶äžå
æ¬ïŒè®°åœjvm GCæ¥å¿ïŒ |
---|
| 1400 | JVM JMX代ççšåºä»¥æ å¯ç çæ¹åŒå¯åšïŒè¿æ ·å®å°±èœè¿æ¥å°jconsoleäžïŒä»èå¯ä»¥æ¥çåè¿çšçå
åå线çšïŒåŸå°çº¿çšçdumpïŒè¿æåjvmçæ倧å 尺寞讟眮䞺512MBïŒ |
---|
| 1401 | 并䞺åjvmç<span class="codefrag">java.library.path</span>æ·»å äºäžäžªéå è·¯åŸã</p> |
---|
| 1402 | <p> |
---|
| 1403 | |
---|
| 1404 | <span class="codefrag"><property></span> |
---|
| 1405 | <br> |
---|
| 1406 | <span class="codefrag"><name>mapred.child.java.opts</name></span> |
---|
| 1407 | <br> |
---|
| 1408 | <span class="codefrag"><value></span> |
---|
| 1409 | <br> |
---|
| 1410 | <span class="codefrag"> |
---|
| 1411 | -Xmx512M -Djava.library.path=/home/mycompany/lib |
---|
| 1412 | -verbose:gc -Xloggc:/tmp/@taskid@.gc</span> |
---|
| 1413 | <br> |
---|
| 1414 | <span class="codefrag"> |
---|
| 1415 | -Dcom.sun.management.jmxremote.authenticate=false |
---|
| 1416 | -Dcom.sun.management.jmxremote.ssl=false</span> |
---|
| 1417 | <br> |
---|
| 1418 | <span class="codefrag"></value></span> |
---|
| 1419 | <br> |
---|
| 1420 | |
---|
| 1421 | <span class="codefrag"></property></span> |
---|
| 1422 | |
---|
| 1423 | </p> |
---|
| 1424 | <p>çšæ·æ管çåä¹å¯ä»¥äœ¿çš<span class="codefrag">mapred.child.ulimit</span>讟å®è¿è¡çåä»»å¡çæ倧èæå
åã<span class="codefrag">mapred.child.ulimit</span>çåŒä»¥ïŒKB)䞺åäœïŒå¹¶äžå¿
须倧äºæçäº-Xmxåæ°äŒ ç»JavaVMçåŒïŒåŠåVMäŒæ æ³å¯åšã</p> |
---|
| 1425 | <p>泚æïŒ<span class="codefrag">mapred.child.java.opts</span>åªçšäºè®Ÿçœ®task trackerå¯åšçåä»»å¡ã䞺å®æ€è¿çšè®Ÿçœ®å
åé项请æ¥ç |
---|
| 1426 | <a href="cluster_setup.html#%E9%85%8D%E7%BD%AEHadoop%E5%AE%88%E6%8A%A4%E8%BF%9B%E7%A8%8B%E7%9A%84%E8%BF%90%E8%A1%8C%E7%8E%AF%E5%A2%83"> |
---|
| 1427 | cluster_setup.html </a> |
---|
| 1428 | </p> |
---|
| 1429 | <p> |
---|
| 1430 | <span class="codefrag"> ${mapred.local.dir}/taskTracker/</span>æ¯task trackerçæ¬å°ç®åœïŒ |
---|
| 1431 | çšäºå建æ¬å°çŒååjobãå®å¯ä»¥æå®å€äžªç®åœïŒè·šè¶å€äžªç£çïŒïŒæ件äŒåéæºçä¿åå°æ¬å°è·¯åŸäžçæ䞪ç®åœãåœjobå¯åšæ¶ïŒtask trackeræ ¹æ®é
眮ææ¡£å建æ¬å°jobç®åœïŒç®åœç»æåŠä»¥äžæ瀺ïŒ</p> |
---|
| 1432 | <ul> |
---|
| 1433 | |
---|
| 1434 | <li> |
---|
| 1435 | <span class="codefrag">${mapred.local.dir}/taskTracker/archive/</span> :ååžåŒçŒåãè¿äžªç®åœä¿åæ¬å°çååžåŒçŒåãå æ€æ¬å°ååžåŒçŒåæ¯åšæætaskåjobéŽå
±äº«çã</li> |
---|
| 1436 | |
---|
| 1437 | <li> |
---|
| 1438 | <span class="codefrag">${mapred.local.dir}/taskTracker/jobcache/$jobid/</span> : |
---|
| 1439 | æ¬å°jobç®åœã |
---|
| 1440 | <ul> |
---|
| 1441 | |
---|
| 1442 | <li> |
---|
| 1443 | <span class="codefrag">${mapred.local.dir}/taskTracker/jobcache/$jobid/work/</span>: |
---|
| 1444 | jobæå®çå
±äº«ç®åœãå䞪任å¡å¯ä»¥äœ¿çšè¿äžªç©ºéŽå䞺æå空éŽïŒçšäºå®ä»¬ä¹éŽå
±äº«æ件ãè¿äžªç®åœéè¿<span class="codefrag">job.local.dir </span>åæ°æŽé²ç»çšæ·ãè¿äžªè·¯åŸå¯ä»¥éè¿API <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#getJobLocalDir()"> |
---|
| 1445 | JobConf.getJobLocalDir()</a>æ¥è®¿é®ãå®ä¹å¯ä»¥è¢«å䞺系ç»å±æ§è·åŸãå æ€ïŒçšæ·ïŒæ¯åŠè¿è¡streamingïŒå¯ä»¥è°çš<span class="codefrag">System.getProperty("job.local.dir")</span>è·åŸè¯¥ç®åœã |
---|
| 1446 | </li> |
---|
| 1447 | |
---|
| 1448 | <li> |
---|
| 1449 | <span class="codefrag">${mapred.local.dir}/taskTracker/jobcache/$jobid/jars/</span>: |
---|
| 1450 | åæŸjarå
çè·¯åŸïŒçšäºåæŸäœäžçjaræ件åå±åŒçjarã<span class="codefrag">job.jar</span>æ¯åºçšçšåºçjaræ件ïŒå®äŒè¢«èªåšååå°åå°æºåšïŒåštaskå¯åšåäŒè¢«èªåšå±åŒã䜿çšapi |
---|
| 1451 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#getJar()"> |
---|
| 1452 | JobConf.getJar() </a>åœæ°å¯ä»¥åŸå°job.jarçäœçœ®ã䜿çšJobConf.getJar().getParent()å¯ä»¥è®¿é®åæŸå±åŒçjarå
çç®åœã |
---|
| 1453 | </li> |
---|
| 1454 | |
---|
| 1455 | <li> |
---|
| 1456 | <span class="codefrag">${mapred.local.dir}/taskTracker/jobcache/$jobid/job.xml</span>ïŒ |
---|
| 1457 | äžäžªjob.xmlæ件ïŒæ¬å°çéçšçäœäžé
眮æ件ã |
---|
| 1458 | </li> |
---|
| 1459 | |
---|
| 1460 | <li> |
---|
| 1461 | <span class="codefrag">${mapred.local.dir}/taskTracker/jobcache/$jobid/$taskid</span>ïŒ |
---|
| 1462 | æ¯äžªä»»å¡æäžäžªç®åœ<span class="codefrag">task-id</span>ïŒå®éé¢æåŠäžçç®åœç»æïŒ |
---|
| 1463 | <ul> |
---|
| 1464 | |
---|
| 1465 | <li> |
---|
| 1466 | <span class="codefrag">${mapred.local.dir}/taskTracker/jobcache/$jobid/$taskid/job.xml</span>ïŒ |
---|
| 1467 | äžäžªjob.xmlæ件ïŒæ¬å°åçä»»å¡äœäžé
眮æ件ãä»»å¡æ¬å°åæ¯æ䞺该task讟å®ç¹å®çå±æ§åŒãè¿äºåŒäŒåšäžé¢å
·äœè¯Žæã |
---|
| 1468 | </li> |
---|
| 1469 | |
---|
| 1470 | <li> |
---|
| 1471 | <span class="codefrag">${mapred.local.dir}/taskTracker/jobcache/$jobid/$taskid/output</span> |
---|
| 1472 | äžäžªåæŸäžéŽè¿çšçèŸåºæ件çç®åœãå®ä¿åäºç±framwork产çç䞎æ¶map reduceæ°æ®ïŒæ¯åŠmapçèŸåºæ件çã</li> |
---|
| 1473 | |
---|
| 1474 | <li> |
---|
| 1475 | <span class="codefrag">${mapred.local.dir}/taskTracker/jobcache/$jobid/$taskid/work</span>ïŒ |
---|
| 1476 | taskçåœåå·¥äœç®åœã</li> |
---|
| 1477 | |
---|
| 1478 | <li> |
---|
| 1479 | <span class="codefrag">${mapred.local.dir}/taskTracker/jobcache/$jobid/$taskid/work/tmp</span>ïŒ |
---|
| 1480 | taskç䞎æ¶ç®åœãïŒçšæ·å¯ä»¥è®Ÿå®å±æ§<span class="codefrag">mapred.child.tmp</span> |
---|
| 1481 | æ¥äžºmapåreduce task讟å®äžŽæ¶ç®åœã猺çåŒæ¯<span class="codefrag">./tmp</span>ãåŠæè¿äžªåŒäžæ¯ç»å¯¹è·¯åŸïŒ |
---|
| 1482 | å®äŒætaskçå·¥äœè·¯åŸå å°è¯¥è·¯åŸåé¢äœäžºtaskç䞎æ¶æ件路åŸãåŠæè¿äžªåŒæ¯ç»å¯¹è·¯åŸåçŽæ¥äœ¿çšè¿äžªåŒã |
---|
| 1483 | åŠææå®çç®åœäžååšïŒäŒèªåšå建该ç®åœãä¹åïŒæç
§é项 |
---|
| 1484 | <span class="codefrag">-Djava.io.tmpdir='䞎æ¶æ件çç»å¯¹è·¯åŸ'</span>æ§è¡javaåä»»å¡ã |
---|
| 1485 | pipesåstreamingç䞎æ¶æ件路åŸæ¯éè¿ç¯å¢åé<span class="codefrag">TMPDIR='the absolute path of the tmp dir'</span>讟å®çïŒã |
---|
| 1486 | åŠæ<span class="codefrag">mapred.child.tmp</span>æ<span class="codefrag">./tmp</span>åŒïŒè¿äžªç®åœäŒè¢«å建ã</li> |
---|
| 1487 | |
---|
| 1488 | </ul> |
---|
| 1489 | |
---|
| 1490 | </li> |
---|
| 1491 | |
---|
| 1492 | </ul> |
---|
| 1493 | |
---|
| 1494 | </li> |
---|
| 1495 | |
---|
| 1496 | </ul> |
---|
| 1497 | <p>äžé¢çå±æ§æ¯äžºæ¯äžªtaskæ§è¡æ¶äœ¿çšçæ¬å°åæ°ïŒå®ä»¬ä¿ååšæ¬å°åçä»»å¡äœäžé
眮æ件éïŒ</p> |
---|
| 1498 | <table class="ForrestTable" cellspacing="1" cellpadding="4"> |
---|
| 1499 | |
---|
| 1500 | <tr> |
---|
| 1501 | <th colspan="1" rowspan="1">å称</th><th colspan="1" rowspan="1">ç±»å</th><th colspan="1" rowspan="1">æè¿°</th> |
---|
| 1502 | </tr> |
---|
| 1503 | |
---|
| 1504 | <tr> |
---|
| 1505 | <td colspan="1" rowspan="1">mapred.job.id</td><td colspan="1" rowspan="1">String</td><td colspan="1" rowspan="1">job id</td> |
---|
| 1506 | </tr> |
---|
| 1507 | |
---|
| 1508 | <tr> |
---|
| 1509 | <td colspan="1" rowspan="1">mapred.jar</td><td colspan="1" rowspan="1">String</td> |
---|
| 1510 | <td colspan="1" rowspan="1">jobç®åœäžjob.jarçäœçœ®</td> |
---|
| 1511 | </tr> |
---|
| 1512 | |
---|
| 1513 | <tr> |
---|
| 1514 | <td colspan="1" rowspan="1">job.local.dir</td><td colspan="1" rowspan="1"> String</td> |
---|
| 1515 | <td colspan="1" rowspan="1">jobæå®çå
±äº«ååšç©ºéŽ</td> |
---|
| 1516 | </tr> |
---|
| 1517 | |
---|
| 1518 | <tr> |
---|
| 1519 | <td colspan="1" rowspan="1">mapred.tip.id</td><td colspan="1" rowspan="1"> String</td> |
---|
| 1520 | <td colspan="1" rowspan="1"> task id</td> |
---|
| 1521 | </tr> |
---|
| 1522 | |
---|
| 1523 | <tr> |
---|
| 1524 | <td colspan="1" rowspan="1">mapred.task.id</td><td colspan="1" rowspan="1"> String</td> |
---|
| 1525 | <td colspan="1" rowspan="1"> taskå°è¯id</td> |
---|
| 1526 | </tr> |
---|
| 1527 | |
---|
| 1528 | <tr> |
---|
| 1529 | <td colspan="1" rowspan="1">mapred.task.is.map</td><td colspan="1" rowspan="1"> boolean </td> |
---|
| 1530 | <td colspan="1" rowspan="1">æ¯åŠæ¯map task</td> |
---|
| 1531 | </tr> |
---|
| 1532 | |
---|
| 1533 | <tr> |
---|
| 1534 | <td colspan="1" rowspan="1">mapred.task.partition</td><td colspan="1" rowspan="1"> int </td> |
---|
| 1535 | <td colspan="1" rowspan="1">taskåšjobäžçid</td> |
---|
| 1536 | </tr> |
---|
| 1537 | |
---|
| 1538 | <tr> |
---|
| 1539 | <td colspan="1" rowspan="1">map.input.file</td><td colspan="1" rowspan="1"> String</td> |
---|
| 1540 | <td colspan="1" rowspan="1"> map读åçæ件å</td> |
---|
| 1541 | </tr> |
---|
| 1542 | |
---|
| 1543 | <tr> |
---|
| 1544 | <td colspan="1" rowspan="1">map.input.start</td><td colspan="1" rowspan="1"> long</td> |
---|
| 1545 | <td colspan="1" rowspan="1"> mapèŸå
¥çæ°æ®åçèµ·å§äœçœ®å移</td> |
---|
| 1546 | </tr> |
---|
| 1547 | |
---|
| 1548 | <tr> |
---|
| 1549 | <td colspan="1" rowspan="1">map.input.length </td><td colspan="1" rowspan="1">long </td> |
---|
| 1550 | <td colspan="1" rowspan="1">mapèŸå
¥çæ°æ®åçåèæ°</td> |
---|
| 1551 | </tr> |
---|
| 1552 | |
---|
| 1553 | <tr> |
---|
| 1554 | <td colspan="1" rowspan="1">mapred.work.output.dir</td><td colspan="1" rowspan="1"> String </td> |
---|
| 1555 | <td colspan="1" rowspan="1">task䞎æ¶èŸåºç®åœ</td> |
---|
| 1556 | </tr> |
---|
| 1557 | |
---|
| 1558 | </table> |
---|
| 1559 | <p>taskçæ åèŸåºåé误èŸåºæµäŒè¢«è¯»å°TaskTrackeräžïŒå¹¶äžè®°åœå° |
---|
| 1560 | <span class="codefrag">${HADOOP_LOG_DIR}/userlogs</span> |
---|
| 1561 | </p> |
---|
| 1562 | <p> |
---|
| 1563 | <a href="#DistributedCache">DistributedCache</a> |
---|
| 1564 | å¯çšäºmapæreduce taskäžååjarå
åæ¬å°åºãåjvmæ»æ¯æ |
---|
| 1565 | <em>åœåå·¥äœç®åœ</em> å å° |
---|
| 1566 | <span class="codefrag">java.library.path</span> å <span class="codefrag">LD_LIBRARY_PATH</span>ã |
---|
| 1567 | å æ€ïŒå¯ä»¥éè¿ |
---|
| 1568 | <a href="http://java.sun.com/j2se/1.5.0/docs/api/java/lang/System.html#loadLibrary(java.lang.String)"> |
---|
| 1569 | System.loadLibrary</a>æ |
---|
| 1570 | <a href="http://java.sun.com/j2se/1.5.0/docs/api/java/lang/System.html#load(java.lang.String)"> |
---|
| 1571 | System.load</a>è£
蜜çŒåçåºãæå
³äœ¿çšååžåŒçŒåå 蜜å
±äº«åºçç»è请åè |
---|
| 1572 | <a href="native_libraries.html#%E4%BD%BF%E7%94%A8DistributedCache+%E5%8A%A0%E8%BD%BD%E6%9C%AC%E5%9C%B0%E5%BA%93"> |
---|
| 1573 | native_libraries.html</a> |
---|
| 1574 | </p> |
---|
| 1575 | <a name="N109E3"></a><a name="%E4%BD%9C%E4%B8%9A%E7%9A%84%E6%8F%90%E4%BA%A4%E4%B8%8E%E7%9B%91%E6%8E%A7"></a> |
---|
| 1576 | <h3 class="h4">äœäžçæ亀äžçæ§</h3> |
---|
| 1577 | <p> |
---|
| 1578 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobClient.html"> |
---|
| 1579 | JobClient</a>æ¯çšæ·æ亀çäœäžäž<span class="codefrag">JobTracker</span>亀äºçäž»èŠæ¥å£ã |
---|
| 1580 | </p> |
---|
| 1581 | <p> |
---|
| 1582 | <span class="codefrag">JobClient</span> æäŸæ亀äœäžïŒè¿œèžªè¿çšïŒè®¿é®åä»»å¡çæ¥å¿è®°åœïŒè·åŸMap/Reduceé矀ç¶æä¿¡æ¯çåèœã |
---|
| 1583 | </p> |
---|
| 1584 | <p>äœäžæ亀è¿çšå
æ¬ïŒ </p> |
---|
| 1585 | <ol> |
---|
| 1586 | |
---|
| 1587 | <li>æ£æ¥äœäžèŸå
¥èŸåºæ ·åŒç»è</li> |
---|
| 1588 | |
---|
| 1589 | <li>䞺äœäžè®¡ç®<span class="codefrag">InputSplit</span>åŒã</li> |
---|
| 1590 | |
---|
| 1591 | <li> |
---|
| 1592 | åŠæéèŠçè¯ïŒäžºäœäžç<span class="codefrag">DistributedCache</span>建ç«å¿
é¡»çç»è®¡ä¿¡æ¯ã |
---|
| 1593 | </li> |
---|
| 1594 | |
---|
| 1595 | <li> |
---|
| 1596 | æ·èŽäœäžçjarå
åé
眮æ件å°<span class="codefrag">FileSystem</span>äžçMap/Reduceç³»ç»ç®åœäžã |
---|
| 1597 | </li> |
---|
| 1598 | |
---|
| 1599 | <li> |
---|
| 1600 | æ亀äœäžå°<span class="codefrag">JobTracker</span>并äžçæ§å®çç¶æã |
---|
| 1601 | </li> |
---|
| 1602 | |
---|
| 1603 | </ol> |
---|
| 1604 | <p>äœäžçåå²æ件记åœå°æå®ç®åœç"_logs/history/"åç®åœäžãè¿äžªæå®ç®åœç±<span class="codefrag">hadoop.job.history.user.location</span>讟å®ïŒé»è®€æ¯äœäžèŸåºçç®åœãå æ€é»è®€æ
åµäžïŒæ件äŒåæŸåšmapred.output.dir/_logs/historyç®åœäžãçšæ·å¯ä»¥è®Ÿçœ®<span class="codefrag">hadoop.job.history.user.location</span>䞺<span class="codefrag">none</span>æ¥åæ¢æ¥å¿è®°åœã |
---|
| 1605 | </p> |
---|
| 1606 | <p> çšæ·äœ¿çšäžé¢çåœä»€å¯ä»¥çå°åšæå®ç®åœäžçåå²æ¥å¿è®°åœçæèŠã |
---|
| 1607 | <br> |
---|
| 1608 | |
---|
| 1609 | <span class="codefrag">$ bin/hadoop job -history output-dir</span> |
---|
| 1610 | <br> |
---|
| 1611 | è¿äžªåœä»€äŒæå°åºäœäžçç»èïŒä»¥å倱莥çå被ææ»çä»»å¡ç»èã<br> |
---|
| 1612 | èŠæ¥çæå
³äœäžçæŽå€ç»èäŸåŠæåçä»»å¡ãæ¯äžªä»»å¡å°è¯ç次æ°ïŒtask attemptïŒçïŒå¯ä»¥äœ¿çšäžé¢çåœä»€ |
---|
| 1613 | <br> |
---|
| 1614 | |
---|
| 1615 | <span class="codefrag">$ bin/hadoop job -history all output-dir</span> |
---|
| 1616 | <br> |
---|
| 1617 | </p> |
---|
| 1618 | <p>çšæ·å¯ä»¥äœ¿çš |
---|
| 1619 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/OutputLogFilter.html">OutputLogFilter</a> |
---|
| 1620 | ä»èŸåºç®åœåè¡šäžçéæ¥å¿æ件ã</p> |
---|
| 1621 | <p>äžè¬æ
åµïŒçšæ·å©çš<span class="codefrag">JobConf</span>å建åºçšçšåºå¹¶é
眮äœäžå±æ§ïŒ |
---|
| 1622 | ç¶åçš |
---|
| 1623 | <span class="codefrag">JobClient</span> æ亀äœäžå¹¶çè§å®çè¿çšã</p> |
---|
| 1624 | <a name="N10A44"></a><a name="%E4%BD%9C%E4%B8%9A%E7%9A%84%E6%8E%A7%E5%88%B6"></a> |
---|
| 1625 | <h4>äœäžçæ§å¶</h4> |
---|
| 1626 | <p>ææ¶åïŒçšäžäžªåç¬çMap/Reduceäœäžå¹¶äžèœå®æäžäžªå€æçä»»å¡ïŒçšæ·ä¹è®žèŠéŸæ¥å€äžªMap/Reduceäœäžæè¡ãè¿æ¯å®¹æå®ç°çïŒå 䞺äœäžéåžžèŸåºå°ååžåŒæ件系ç»äžçïŒæ以å¯ä»¥æè¿äžªäœäžçèŸåºäœäžºäžäžäžªäœäžçèŸå
¥å®ç°äž²èã |
---|
| 1627 | </p> |
---|
| 1628 | <p>ç¶èïŒè¿ä¹æå³çïŒç¡®ä¿æ¯äžäœäžå®æ(æåæ倱莥)ç莣任就çŽæ¥èœåšäºå®¢æ·èº«äžãåšè¿ç§æ
åµäžïŒå¯ä»¥çšçæ§å¶äœäžçé项æïŒ |
---|
| 1629 | </p> |
---|
| 1630 | <ul> |
---|
| 1631 | |
---|
| 1632 | <li> |
---|
| 1633 | |
---|
| 1634 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobClient.html#runJob(org.apache.hadoop.mapred.JobConf)"> |
---|
| 1635 | runJob(JobConf)</a>ïŒæ亀äœäžïŒä»
åœäœäžå®ææ¶è¿åã |
---|
| 1636 | </li> |
---|
| 1637 | |
---|
| 1638 | <li> |
---|
| 1639 | |
---|
| 1640 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobClient.html#submitJob(org.apache.hadoop.mapred.JobConf)"> |
---|
| 1641 | submitJob(JobConf)</a>ïŒåªæ亀äœäžïŒä¹åéèŠäœ 蜮询å®è¿åç |
---|
| 1642 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/RunningJob.html"> |
---|
| 1643 | RunningJob</a>å¥æçç¶æïŒå¹¶æ ¹æ®æ
åµè°åºŠã |
---|
| 1644 | </li> |
---|
| 1645 | |
---|
| 1646 | <li> |
---|
| 1647 | |
---|
| 1648 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setJobEndNotificationURI(java.lang.String)"> |
---|
| 1649 | JobConf.setJobEndNotificationURI(String)</a>ïŒè®Ÿçœ®äžäžªäœäžå®æéç¥ïŒå¯é¿å
蜮询ã |
---|
| 1650 | |
---|
| 1651 | </li> |
---|
| 1652 | |
---|
| 1653 | </ul> |
---|
| 1654 | <a name="N10A6E"></a><a name="%E4%BD%9C%E4%B8%9A%E7%9A%84%E8%BE%93%E5%85%A5"></a> |
---|
| 1655 | <h3 class="h4">äœäžçèŸå
¥</h3> |
---|
| 1656 | <p> |
---|
| 1657 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/InputFormat.html"> |
---|
| 1658 | InputFormat</a> 䞺Map/Reduceäœäžæè¿°èŸå
¥çç»èè§èã |
---|
| 1659 | </p> |
---|
| 1660 | <p>Map/Reduceæ¡æ¶æ ¹æ®äœäžç<span class="codefrag">InputFormat</span>æ¥ïŒ |
---|
| 1661 | </p> |
---|
| 1662 | <ol> |
---|
| 1663 | |
---|
| 1664 | <li>æ£æ¥äœäžèŸå
¥çæææ§ã</li> |
---|
| 1665 | |
---|
| 1666 | <li> |
---|
| 1667 | æèŸå
¥æ件ååæå€äžªé»èŸ<span class="codefrag">InputSplit</span>å®äŸïŒ |
---|
| 1668 | 并ææ¯äžå®äŸåå«ååç»äžäžª |
---|
| 1669 | <span class="codefrag">Mapper</span>ã |
---|
| 1670 | </li> |
---|
| 1671 | |
---|
| 1672 | <li> |
---|
| 1673 | æäŸ<span class="codefrag">RecordReader</span>çå®ç°ïŒè¿äžªRecordReaderä»é»èŸ<span class="codefrag">InputSplit</span>äžè·åŸèŸå
¥è®°åœïŒ |
---|
| 1674 | è¿äºè®°åœå°ç±<span class="codefrag">Mapper</span>å€çã |
---|
| 1675 | </li> |
---|
| 1676 | |
---|
| 1677 | </ol> |
---|
| 1678 | <p>åºäºæ件ç<span class="codefrag">InputFormat</span>å®ç°ïŒéåžžæ¯ |
---|
| 1679 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/FileInputFormat.html"> |
---|
| 1680 | FileInputFormat</a>çåç±»ïŒ |
---|
| 1681 | é»è®€è¡äžºæ¯æç
§èŸå
¥æ件çåè倧å°ïŒæèŸå
¥æ°æ®ååæé»èŸååïŒ<em>logical</em> |
---|
| 1682 | <span class="codefrag">InputSplit</span> ïŒã |
---|
| 1683 | å
¶äžèŸå
¥æ件æåšç<span class="codefrag">FileSystem</span>çæ°æ®å尺寞æ¯åå倧å°çäžéãäžéå¯ä»¥è®Ÿçœ®<span class="codefrag">mapred.min.split.size</span> |
---|
| 1684 | çåŒã</p> |
---|
| 1685 | <p>èèå°èŸ¹çæ
åµïŒå¯¹äºåŸå€åºçšçšåºæ¥è¯ŽïŒåŸææŸæç
§æ件倧å°è¿è¡é»èŸåå²æ¯äžèœæ»¡è¶³éæ±çã |
---|
| 1686 | åšè¿ç§æ
åµäžïŒåºçšçšåºéèŠå®ç°äžäžª<span class="codefrag">RecordReader</span>æ¥å€çè®°åœç蟹ç并䞺æ¯äžªä»»å¡æäŸäžäžªé»èŸååçé¢åè®°åœçè§åŸã |
---|
| 1687 | </p> |
---|
| 1688 | <p> |
---|
| 1689 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/TextInputFormat.html"> |
---|
| 1690 | TextInputFormat</a> æ¯é»è®€ç<span class="codefrag">InputFormat</span>ã</p> |
---|
| 1691 | <p>åŠæäžäžªäœäžç<span class="codefrag">Inputformat</span>æ¯<span class="codefrag">TextInputFormat</span>ïŒ |
---|
| 1692 | 并äžæ¡æ¶æ£æµå°èŸå
¥æ件çåçŒæ¯<em>.gz</em>å<em>.lzo</em>ïŒå°±äŒäœ¿çšå¯¹åºç<span class="codefrag">CompressionCodec</span>èªåšè§£å猩è¿äºæ件ã |
---|
| 1693 | äœæ¯éèŠæ³šæïŒäžè¿°åžŠåçŒçå猩æ件äžäŒè¢«ååïŒå¹¶äžæŽäžªå猩æ件äŒåç»äžäžªmapperæ¥å€çã |
---|
| 1694 | </p> |
---|
| 1695 | <a name="N10AD2"></a><a name="InputSplit"></a> |
---|
| 1696 | <h4>InputSplit</h4> |
---|
| 1697 | <p> |
---|
| 1698 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/InputSplit.html"> |
---|
| 1699 | InputSplit</a> æ¯äžäžªåç¬ç<span class="codefrag">Mapper</span>èŠå€ççæ°æ®åã</p> |
---|
| 1700 | <p>äžè¬ç<span class="codefrag">InputSplit</span> æ¯åèæ ·åŒèŸå
¥ïŒç¶åç±<span class="codefrag">RecordReader</span>å€ç并蜬åæè®°åœæ ·åŒã |
---|
| 1701 | </p> |
---|
| 1702 | <p> |
---|
| 1703 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/FileSplit.html"> |
---|
| 1704 | FileSplit</a> æ¯é»è®€ç<span class="codefrag">InputSplit</span>ã å®æ |
---|
| 1705 | <span class="codefrag">map.input.file</span> 讟å®äžºèŸå
¥æ件çè·¯åŸïŒèŸå
¥æ件æ¯é»èŸååæ件ã |
---|
| 1706 | </p> |
---|
| 1707 | <a name="N10AF7"></a><a name="RecordReader"></a> |
---|
| 1708 | <h4>RecordReader</h4> |
---|
| 1709 | <p> |
---|
| 1710 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/RecordReader.html"> |
---|
| 1711 | RecordReader</a> ä»<span class="codefrag">InputSlit</span>读å
¥<span class="codefrag"><key, value></span>对ã |
---|
| 1712 | </p> |
---|
| 1713 | <p>äžè¬çïŒ<span class="codefrag">RecordReader</span> æç±<span class="codefrag">InputSplit</span> |
---|
| 1714 | æäŸçåèæ ·åŒçèŸå
¥æ件ïŒèœ¬åæç±<span class="codefrag">Mapper</span>å€ççè®°åœæ ·åŒçæ件ã |
---|
| 1715 | å æ€<span class="codefrag">RecordReader</span>èŽèŽ£å€çè®°åœç蟹çæ
åµåææ°æ®è¡šç€ºækeys/values对圢åŒã |
---|
| 1716 | </p> |
---|
| 1717 | <a name="N10B1A"></a><a name="%E4%BD%9C%E4%B8%9A%E7%9A%84%E8%BE%93%E5%87%BA"></a> |
---|
| 1718 | <h3 class="h4">äœäžçèŸåº</h3> |
---|
| 1719 | <p> |
---|
| 1720 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/OutputFormat.html"> |
---|
| 1721 | OutputFormat</a> æè¿°Map/ReduceäœäžçèŸåºæ ·åŒã |
---|
| 1722 | </p> |
---|
| 1723 | <p>Map/Reduceæ¡æ¶æ ¹æ®äœäžç<span class="codefrag">OutputFormat</span>æ¥ïŒ |
---|
| 1724 | </p> |
---|
| 1725 | <ol> |
---|
| 1726 | |
---|
| 1727 | <li> |
---|
| 1728 | æ£éªäœäžçèŸåºïŒäŸåŠæ£æ¥èŸåºè·¯åŸæ¯åŠå·²ç»ååšã |
---|
| 1729 | </li> |
---|
| 1730 | |
---|
| 1731 | <li> |
---|
| 1732 | æäŸäžäžª<span class="codefrag">RecordWriter</span>çå®ç°ïŒçšæ¥èŸåºäœäžç»æã |
---|
| 1733 | èŸåºæ件ä¿ååš<span class="codefrag">FileSystem</span>äžã |
---|
| 1734 | </li> |
---|
| 1735 | |
---|
| 1736 | </ol> |
---|
| 1737 | <p> |
---|
| 1738 | <span class="codefrag">TextOutputFormat</span>æ¯é»è®€ç |
---|
| 1739 | <span class="codefrag">OutputFormat</span>ã</p> |
---|
| 1740 | <a name="N10B43"></a><a name="%E4%BB%BB%E5%8A%A1%E7%9A%84Side-Effect+File"></a> |
---|
| 1741 | <h4>ä»»å¡çSide-Effect File</h4> |
---|
| 1742 | <p>åšäžäºåºçšçšåºäžïŒåä»»å¡éèŠäº§çäžäºside-fileïŒè¿äºæ件äžäœäžå®é
èŸåºç»æçæ件äžåã |
---|
| 1743 | </p> |
---|
| 1744 | <p>åšè¿ç§æ
åµäžïŒåäžäžª<span class="codefrag">Mapper</span>æè
<span class="codefrag">Reducer</span>ç䞀䞪å®äŸïŒæ¯åŠé¢é²æ§ä»»å¡ïŒåæ¶æåŒæè
å |
---|
| 1745 | <span class="codefrag">FileSystem</span>äžçåäžæ件就äŒäº§çå²çªãå æ€åºçšçšåºåšåæ件çæ¶åéèŠäžºæ¯æ¬¡ä»»å¡å°è¯ïŒäžä»
ä»
æ¯æ¯æ¬¡ä»»å¡ïŒæ¯äžªä»»å¡å¯ä»¥å°è¯æ§è¡åŸå€æ¬¡ïŒéåäžäžªç¬äžæ äºçæ件å(䜿çšattemptidïŒäŸåŠ<span class="codefrag">task_200709221812_0001_m_000000_0</span>)ã |
---|
| 1746 | </p> |
---|
| 1747 | <p>䞺äºé¿å
å²çªïŒMap/Reduceæ¡æ¶äžºæ¯æ¬¡å°è¯æ§è¡ä»»å¡éœå»ºç«å绎æ€äžäžªç¹æ®ç |
---|
| 1748 | <span class="codefrag">${mapred.output.dir}/_temporary/_${taskid}</span>åç®åœïŒè¿äžªç®åœäœäºæ¬æ¬¡å°è¯æ§è¡ä»»å¡èŸåºç»ææåšç<span class="codefrag">FileSystem</span>äžïŒå¯ä»¥éè¿ |
---|
| 1749 | <span class="codefrag">${mapred.work.output.dir}</span>æ¥è®¿é®è¿äžªåç®åœã |
---|
| 1750 | 对äºæåå®æçä»»å¡å°è¯ïŒåªæ<span class="codefrag">${mapred.output.dir}/_temporary/_${taskid}</span>äžçæ件äŒ<em>移åš</em>å°<span class="codefrag">${mapred.output.dir}</span>ãåœç¶ïŒæ¡æ¶äŒäž¢åŒé£äºå€±èŽ¥çä»»å¡å°è¯çåç®åœãè¿ç§å€çè¿çšå¯¹äºåºçšçšåºæ¥è¯Žæ¯å®å
šéæçã</p> |
---|
| 1751 | <p>åšä»»å¡æ§è¡æéŽïŒåºçšçšåºåšåæ件æ¶å¯ä»¥å©çšè¿äžªç¹æ§ïŒæ¯åŠ |
---|
| 1752 | éè¿<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/FileOutputFormat.html#getWorkOutputPath(org.apache.hadoop.mapred.JobConf)"> |
---|
| 1753 | FileOutputFormat.getWorkOutputPath()</a>è·åŸ<span class="codefrag">${mapred.work.output.dir}</span>ç®åœïŒ |
---|
| 1754 | 并åšå
¶äžå建任æä»»å¡æ§è¡æ¶æéçside-fileïŒæ¡æ¶åšä»»å¡å°è¯æåæ¶äŒé©¬äžç§»åšè¿äºæ件ïŒå æ€äžéèŠåšçšåºå
䞺æ¯æ¬¡ä»»å¡å°è¯éåäžäžªç¬äžæ äºçååã |
---|
| 1755 | </p> |
---|
| 1756 | <p>泚æïŒåšæ¯æ¬¡ä»»å¡å°è¯æ§è¡æéŽïŒ<span class="codefrag">${mapred.work.output.dir}</span> çåŒå®é
äžæ¯ |
---|
| 1757 | <span class="codefrag">${mapred.output.dir}/_temporary/_{$taskid}</span>ïŒè¿äžªåŒæ¯Map/Reduceæ¡æ¶å建çã |
---|
| 1758 | æ以䜿çšè¿äžªç¹æ§çæ¹æ³æ¯ïŒåš<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/FileOutputFormat.html#getWorkOutputPath(org.apache.hadoop.mapred.JobConf)"> |
---|
| 1759 | FileOutputFormat.getWorkOutputPath() </a> |
---|
| 1760 | è·¯åŸäžå建side-fileå³å¯ã |
---|
| 1761 | </p> |
---|
| 1762 | <p>对äºåªäœ¿çšmapäžäœ¿çšreduceçäœäžïŒè¿äžªç»è®ºä¹æç«ãè¿ç§æ
åµäžïŒmapçèŸåºç»æçŽæ¥çæå°HDFSäžã |
---|
| 1763 | </p> |
---|
| 1764 | <a name="N10B8B"></a><a name="RecordWriter"></a> |
---|
| 1765 | <h4>RecordWriter</h4> |
---|
| 1766 | <p> |
---|
| 1767 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/RecordWriter.html"> |
---|
| 1768 | RecordWriter</a> çæ<span class="codefrag"><key, value></span> |
---|
| 1769 | 对å°èŸåºæ件ã</p> |
---|
| 1770 | <p>RecordWriterçå®ç°æäœäžçèŸåºç»æåå° |
---|
| 1771 | <span class="codefrag">FileSystem</span>ã</p> |
---|
| 1772 | <a name="N10BA2"></a><a name="%E5%85%B6%E4%BB%96%E6%9C%89%E7%94%A8%E7%9A%84%E7%89%B9%E6%80%A7"></a> |
---|
| 1773 | <h3 class="h4">å
¶ä»æçšçç¹æ§</h3> |
---|
| 1774 | <a name="N10BA8"></a><a name="Counters"></a> |
---|
| 1775 | <h4>Counters</h4> |
---|
| 1776 | <p> |
---|
| 1777 | <span class="codefrag">Counters</span> æ¯å€äžªç±Map/Reduceæ¡æ¶æè
åºçšçšåºå®ä¹çå
šå±è®¡æ°åšã |
---|
| 1778 | æ¯äžäžª<span class="codefrag">Counter</span>å¯ä»¥æ¯ä»»äœäžç§ |
---|
| 1779 | <span class="codefrag">Enum</span>ç±»åãåäžç¹å®<span class="codefrag">Enum</span>ç±»åçCounterå¯ä»¥æ±éå°äžäžªç»ïŒå
¶ç±»å䞺<span class="codefrag">Counters.Group</span>ã</p> |
---|
| 1780 | <p>åºçšçšåºå¯ä»¥å®ä¹ä»»æ(Enumç±»å)ç<span class="codefrag">Counters</span>并äžå¯ä»¥éè¿ <span class="codefrag">map</span> æè
|
---|
| 1781 | <span class="codefrag">reduce</span>æ¹æ³äžç |
---|
| 1782 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/Reporter.html#incrCounter(java.lang.Enum, long)"> |
---|
| 1783 | Reporter.incrCounter(Enum, long)</a>æè
|
---|
| 1784 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/Reporter.html#incrCounter(java.lang.String, java.lang.String, long amount)"> |
---|
| 1785 | Reporter.incrCounter(String, String, long)</a> |
---|
| 1786 | æŽæ°ãä¹åæ¡æ¶äŒæ±æ»è¿äºå
šå±countersã |
---|
| 1787 | </p> |
---|
| 1788 | <a name="N10BD4"></a><a name="DistributedCache"></a> |
---|
| 1789 | <h4>DistributedCache</h4> |
---|
| 1790 | <p> |
---|
| 1791 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/filecache/DistributedCache.html"> |
---|
| 1792 | DistributedCache</a> å¯å°å
·äœåºçšçžå
³çã倧尺寞çãåªè¯»çæ件ææå°ååžæŸçœ®ã |
---|
| 1793 | </p> |
---|
| 1794 | <p> |
---|
| 1795 | <span class="codefrag">DistributedCache</span> æ¯Map/Reduceæ¡æ¶æäŸçåèœïŒèœå€çŒååºçšçšåºæéçæ件 |
---|
| 1796 | ïŒå
æ¬ææ¬ïŒæ¡£æ¡æ件ïŒjaræ件çïŒã |
---|
| 1797 | </p> |
---|
| 1798 | <p>åºçšçšåºåš<span class="codefrag">JobConf</span>äžéè¿url(hdfs://)æå®éèŠè¢«çŒåçæ件ã |
---|
| 1799 | <span class="codefrag">DistributedCache</span>åå®ç±hdfs://æ ŒåŒurlæå®çæ件已ç»åš |
---|
| 1800 | <span class="codefrag">FileSystem</span>äžäºã</p> |
---|
| 1801 | <p>Map-Redcueæ¡æ¶åšäœäžææä»»å¡æ§è¡ä¹åäŒæå¿
èŠçæ件æ·èŽå°slaveèç¹äžã |
---|
| 1802 | å®è¿è¡é«ææ¯å 䞺æ¯äžªäœäžçæ件åªæ·èŽäžæ¬¡å¹¶äžäžºé£äºæ²¡æææ¡£çslaveèç¹çŒåææ¡£ã |
---|
| 1803 | </p> |
---|
| 1804 | <p> |
---|
| 1805 | <span class="codefrag">DistributedCache</span> æ ¹æ®çŒåæ档修æ¹çæ¶éŽæ³è¿è¡è¿œèžªã |
---|
| 1806 | åšäœäžæ§è¡æéŽïŒåœååºçšçšåºæè
å€éšçšåºäžèœä¿®æ¹çŒåæ件ã |
---|
| 1807 | </p> |
---|
| 1808 | <p> |
---|
| 1809 | <span class="codefrag">distributedCache</span>å¯ä»¥ååç®åçåªè¯»æ°æ®æææ¬æ件ïŒä¹å¯ä»¥ååå€æç±»åçæ件äŸåŠåœæ¡£æ件åjaræ件ãåœæ¡£æ件(zip,tar,tgzåtar.gzæ件)åšslaveèç¹äžäŒè¢«<em>解档ïŒun-archivedïŒ</em>ã |
---|
| 1810 | è¿äºæ件å¯ä»¥è®Ÿçœ®<em>æ§è¡æé</em>ã</p> |
---|
| 1811 | <p>çšæ·å¯ä»¥éè¿è®Ÿçœ®<span class="codefrag">mapred.cache.{files|archives}</span>æ¥ååæ件ã |
---|
| 1812 | åŠæèŠååå€äžªæ件ïŒå¯ä»¥äœ¿çšéå·åéæ件æåšè·¯åŸãä¹å¯ä»¥å©çšAPIæ¥è®Ÿçœ®è¯¥å±æ§ïŒ |
---|
| 1813 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/filecache/DistributedCache.html#addCacheFile(java.net.URI,%20org.apache.hadoop.conf.Configuration)"> |
---|
| 1814 | DistributedCache.addCacheFile(URI,conf)</a>/ |
---|
| 1815 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/filecache/DistributedCache.html#addCacheArchive(java.net.URI,%20org.apache.hadoop.conf.Configuration)"> |
---|
| 1816 | DistributedCache.addCacheArchive(URI,conf)</a> and |
---|
| 1817 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/filecache/DistributedCache.html#setCacheFiles(java.net.URI[],%20org.apache.hadoop.conf.Configuration)"> |
---|
| 1818 | DistributedCache.setCacheFiles(URIs,conf)</a>/ |
---|
| 1819 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/filecache/DistributedCache.html#setCacheArchives(java.net.URI[],%20org.apache.hadoop.conf.Configuration)"> |
---|
| 1820 | DistributedCache.setCacheArchives(URIs,conf)</a> |
---|
| 1821 | å
¶äžURIç圢åŒæ¯ |
---|
| 1822 | <span class="codefrag">hdfs://host:port/absolute-path#link-name</span> |
---|
| 1823 | åšStreamingçšåºäžïŒå¯ä»¥éè¿åœä»€è¡é项 |
---|
| 1824 | <span class="codefrag">-cacheFile/-cacheArchive</span> |
---|
| 1825 | ååæ件ã</p> |
---|
| 1826 | <p> |
---|
| 1827 | çšæ·å¯ä»¥éè¿<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/filecache/DistributedCache.html#createSymlink(org.apache.hadoop.conf.Configuration)"> |
---|
| 1828 | DistributedCache.createSymlink(Configuration)</a>æ¹æ³è®©<span class="codefrag">DistributedCache</span> |
---|
| 1829 | åš<em>åœåå·¥äœç®åœ</em>äžå建å°çŒåæ件ç笊å·éŸæ¥ã |
---|
| 1830 | æè
éè¿è®Ÿçœ®é
眮æ件å±æ§<span class="codefrag">mapred.create.symlink</span>䞺<span class="codefrag">yes</span>ã |
---|
| 1831 | ååžåŒçŒåäŒæªåURIçç段äœäžºéŸæ¥çååã |
---|
| 1832 | äŸåŠïŒURIæ¯ <span class="codefrag">hdfs://namenode:port/lib.so.1#lib.so</span>ïŒ |
---|
| 1833 | ååštaskåœåå·¥äœç®åœäŒæå䞺<span class="codefrag">lib.so</span>çéŸæ¥ïŒ |
---|
| 1834 | å®äŒéŸæ¥ååžåŒçŒåäžç<span class="codefrag">lib.so.1</span>ã |
---|
| 1835 | </p> |
---|
| 1836 | <p> |
---|
| 1837 | <span class="codefrag">DistributedCache</span>å¯åšmap/reduceä»»å¡äžäœäžº |
---|
| 1838 | äžç§åºç¡èœ¯ä»¶ååæºå¶äœ¿çšãå®å¯ä»¥è¢«çšäºååjarå
åæ¬å°åºïŒnative librariesïŒã |
---|
| 1839 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/filecache/DistributedCache.html#addArchiveToClassPath(org.apache.hadoop.fs.Path,%20org.apache.hadoop.conf.Configuration)"> |
---|
| 1840 | DistributedCache.addArchiveToClassPath(Path, Configuration)</a>å |
---|
| 1841 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/filecache/DistributedCache.html#addFileToClassPath(org.apache.hadoop.fs.Path,%20org.apache.hadoop.conf.Configuration)"> |
---|
| 1842 | DistributedCache.addFileToClassPath(Path, Configuration)</a> APIèœå€è¢«çšäº |
---|
| 1843 | çŒåæ件åjarå
ïŒå¹¶æå®ä»¬å å
¥åjvmç<em>classpath</em>ãä¹å¯ä»¥éè¿è®Ÿçœ®é
眮ææ¡£éçå±æ§ |
---|
| 1844 | <span class="codefrag">mapred.job.classpath.{files|archives}</span>蟟å°çžåçææãçŒåæ件å¯çšäºåååè£
蜜æ¬å°åºã |
---|
| 1845 | </p> |
---|
| 1846 | <a name="N10C50"></a><a name="Tool"></a> |
---|
| 1847 | <h4>Tool</h4> |
---|
| 1848 | <p> |
---|
| 1849 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/util/Tool.html">Tool</a> |
---|
| 1850 | æ¥å£æ¯æå€çåžžçšçHadoopåœä»€è¡é项ã |
---|
| 1851 | </p> |
---|
| 1852 | <p> |
---|
| 1853 | <span class="codefrag">Tool</span> æ¯Map/Reduceå·¥å
·æåºçšçæ åãåºçšçšåºåºåªå€çå
¶å®å¶åæ°ïŒ |
---|
| 1854 | èŠææ ååœä»€è¡é项éè¿ |
---|
| 1855 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/util/ToolRunner.html#run(org.apache.hadoop.util.Tool, java.lang.String[])"> ToolRunner.run(Tool, String[])</a> |
---|
| 1856 | å§æç» |
---|
| 1857 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/util/GenericOptionsParser.html"> |
---|
| 1858 | GenericOptionsParser</a>å€çã |
---|
| 1859 | </p> |
---|
| 1860 | <p> |
---|
| 1861 | Hadoopåœä»€è¡çåžžçšé项æïŒ<br> |
---|
| 1862 | |
---|
| 1863 | <span class="codefrag"> |
---|
| 1864 | -conf <configuration file> |
---|
| 1865 | </span> |
---|
| 1866 | |
---|
| 1867 | <br> |
---|
| 1868 | |
---|
| 1869 | <span class="codefrag"> |
---|
| 1870 | -D <property=value> |
---|
| 1871 | </span> |
---|
| 1872 | |
---|
| 1873 | <br> |
---|
| 1874 | |
---|
| 1875 | <span class="codefrag"> |
---|
| 1876 | -fs <local|namenode:port> |
---|
| 1877 | </span> |
---|
| 1878 | |
---|
| 1879 | <br> |
---|
| 1880 | |
---|
| 1881 | <span class="codefrag"> |
---|
| 1882 | -jt <local|jobtracker:port> |
---|
| 1883 | </span> |
---|
| 1884 | |
---|
| 1885 | </p> |
---|
| 1886 | <a name="N10C81"></a><a name="IsolationRunner"></a> |
---|
| 1887 | <h4>IsolationRunner</h4> |
---|
| 1888 | <p> |
---|
| 1889 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/IsolationRunner.html"> |
---|
| 1890 | IsolationRunner</a> æ¯åž®å©è°è¯Map/Reduceçšåºçå·¥å
·ã</p> |
---|
| 1891 | <p>䜿çš<span class="codefrag">IsolationRunner</span>çæ¹æ³æ¯ïŒéŠå
讟眮 |
---|
| 1892 | <span class="codefrag">keep.failed.tasks.files</span>å±æ§äžº<span class="codefrag">true</span> |
---|
| 1893 | ïŒåæ¶åè<span class="codefrag">keep.tasks.files.pattern</span>ïŒã</p> |
---|
| 1894 | <p> |
---|
| 1895 | ç¶åïŒç»åœå°ä»»å¡è¿è¡å€±èŽ¥çèç¹äžïŒè¿å
¥ |
---|
| 1896 | <span class="codefrag">TaskTracker</span>çæ¬å°è·¯åŸè¿è¡ |
---|
| 1897 | <span class="codefrag">IsolationRunner</span>ïŒ<br> |
---|
| 1898 | |
---|
| 1899 | <span class="codefrag">$ cd <local path>/taskTracker/${taskid}/work</span> |
---|
| 1900 | <br> |
---|
| 1901 | |
---|
| 1902 | <span class="codefrag"> |
---|
| 1903 | $ bin/hadoop org.apache.hadoop.mapred.IsolationRunner ../job.xml |
---|
| 1904 | </span> |
---|
| 1905 | |
---|
| 1906 | </p> |
---|
| 1907 | <p> |
---|
| 1908 | <span class="codefrag">IsolationRunner</span>äŒæ倱莥çä»»å¡æŸåšåç¬çäžäžªèœå€è°è¯çjvmäžè¿è¡ïŒå¹¶äžéçšåä¹åå®å
šäžæ ·çèŸå
¥æ°æ®ã |
---|
| 1909 | </p> |
---|
| 1910 | <a name="N10CB4"></a><a name="Profiling"></a> |
---|
| 1911 | <h4>Profiling</h4> |
---|
| 1912 | <p>Profilingæ¯äžäžªå·¥å
·ïŒå®äœ¿çšå
眮çjava profilerå·¥å
·è¿è¡åæè·åŸ(2-3䞪)mapæreduceæ ·äŸè¿è¡åææ¥åã</p> |
---|
| 1913 | <p>çšæ·å¯ä»¥éè¿è®Ÿçœ®å±æ§<span class="codefrag">mapred.task.profile</span>æå®ç³»ç»æ¯åŠééprofilerä¿¡æ¯ã |
---|
| 1914 | å©çšapi<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setProfileEnabled(boolean)"> |
---|
| 1915 | JobConf.setProfileEnabled(boolean)å¯ä»¥ä¿®æ¹å±æ§åŒ</a>ãåŠæ讟䞺<span class="codefrag">true</span>ïŒ |
---|
| 1916 | ååŒå¯profilingåèœãprofilerä¿¡æ¯ä¿ååšçšæ·æ¥å¿ç®åœäžã猺çæ
åµïŒprofilingåèœæ¯å
³éçã</p> |
---|
| 1917 | <p>åŠæçšæ·è®Ÿå®äœ¿çšprofilingåèœïŒå¯ä»¥äœ¿çšé
眮ææ¡£éçå±æ§ |
---|
| 1918 | <span class="codefrag">mapred.task.profile.{maps|reduces}</span> |
---|
| 1919 | 讟眮èŠprofile map/reduce taskçèåŽã讟眮该å±æ§åŒçapiæ¯ |
---|
| 1920 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setProfileTaskRange(boolean,%20java.lang.String)"> |
---|
| 1921 | JobConf.setProfileTaskRange(boolean,String)</a>ã |
---|
| 1922 | èåŽç猺çåŒæ¯<span class="codefrag">0-2</span>ã</p> |
---|
| 1923 | <p>çšæ·å¯ä»¥éè¿è®Ÿå®é
眮ææ¡£éçå±æ§<span class="codefrag">mapred.task.profile.params</span> |
---|
| 1924 | æ¥æå®profileré
眮åæ°ãä¿®æ¹å±æ§èŠäœ¿çšapi |
---|
| 1925 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setProfileParams(java.lang.String)"> |
---|
| 1926 | JobConf.setProfileParams(String)</a>ãåœè¿è¡taskæ¶ïŒåŠæå笊䞲å
å«<span class="codefrag">%s</span>ã |
---|
| 1927 | å®äŒè¢«æ¿æ¢æprofileingçèŸåºæ件åãè¿äºåæ°äŒåšåœä»€è¡éäŒ éå°åJVMäžã猺ççprofiling |
---|
| 1928 | åæ°æ¯ |
---|
| 1929 | <span class="codefrag">-agentlib:hprof=cpu=samples,heap=sites,force=n,thread=y,verbose=n,file=%s</span>ã |
---|
| 1930 | </p> |
---|
| 1931 | <a name="N10CE8"></a><a name="%E8%B0%83%E8%AF%95"></a> |
---|
| 1932 | <h4>è°è¯</h4> |
---|
| 1933 | <p>Map/Reduceæ¡æ¶èœå€è¿è¡çšæ·æäŸççšäºè°è¯çèæ¬çšåºã |
---|
| 1934 | åœmap/reduceä»»å¡å€±èŽ¥æ¶ïŒçšæ·å¯ä»¥éè¿è¿è¡èæ¬åšä»»å¡æ¥å¿ïŒäŸåŠä»»å¡çæ åèŸåºãæ åé误ãç³»ç»æ¥å¿ä»¥åäœäžé
眮æ件ïŒäžååç»å€çå·¥äœãçšæ·æäŸçè°è¯èæ¬çšåºçæ åèŸåºåæ åé误äŒèŸåºäžºè¯ææ件ãåŠæéèŠçè¯è¿äºèŸåºç»æä¹å¯ä»¥æå°åšçšæ·çé¢äžã</p> |
---|
| 1935 | <p> åšæ¥äžæ¥çç« èïŒæ们讚论åŠäœäžäœäžäžèµ·æ亀è°è¯èæ¬ã䞺äºæ亀è°è¯èæ¬ïŒ |
---|
| 1936 | éŠå
èŠæè¿äžªèæ¬åååºå»ïŒèäžè¿èŠåšé
眮æ件é讟眮ã |
---|
| 1937 | </p> |
---|
| 1938 | <a name="N10CF4"></a><a name="%E5%A6%82%E4%BD%95%E5%88%86%E5%8F%91%E8%84%9A%E6%9C%AC%E6%96%87%E4%BB%B6%EF%BC%9A"></a> |
---|
| 1939 | <h5> åŠäœååèæ¬æ件ïŒ</h5> |
---|
| 1940 | <p>çšæ·èŠçš |
---|
| 1941 | <a href="mapred_tutorial.html#DistributedCache">DistributedCache</a> |
---|
| 1942 | æºå¶æ¥<em>åå</em>å<em>éŸæ¥</em>èæ¬æ件</p> |
---|
| 1943 | <a name="N10D08"></a><a name="%E5%A6%82%E4%BD%95%E6%8F%90%E4%BA%A4%E8%84%9A%E6%9C%AC%EF%BC%9A"></a> |
---|
| 1944 | <h5> åŠäœæ亀èæ¬ïŒ</h5> |
---|
| 1945 | <p> äžäžªå¿«éæ亀è°è¯èæ¬çæ¹æ³æ¯åå«äžºéèŠè°è¯çmapä»»å¡åreduceä»»å¡è®Ÿçœ® |
---|
| 1946 | "mapred.map.task.debug.script" å "mapred.reduce.task.debug.script" |
---|
| 1947 | å±æ§çåŒãè¿äºå±æ§ä¹å¯ä»¥éè¿ |
---|
| 1948 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setMapDebugScript(java.lang.String)"> |
---|
| 1949 | JobConf.setMapDebugScript(String) </a>å |
---|
| 1950 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setReduceDebugScript(java.lang.String)"> |
---|
| 1951 | JobConf.setReduceDebugScript(String) </a>APIæ¥è®Ÿçœ®ã对äºstreamingïŒ |
---|
| 1952 | å¯ä»¥åå«äžºéèŠè°è¯çmapä»»å¡åreduceä»»å¡äœ¿çšåœä»€è¡é项-mapdebug å -reducedegugæ¥æ亀è°è¯èæ¬ã |
---|
| 1953 | </p> |
---|
| 1954 | <p>èæ¬çåæ°æ¯ä»»å¡çæ åèŸåºãæ åé误ãç³»ç»æ¥å¿ä»¥åäœäžé
眮æ件ãåšè¿è¡map/reduce倱莥çèç¹äžè¿è¡è°è¯åœä»€æ¯ïŒ |
---|
| 1955 | <br> |
---|
| 1956 | |
---|
| 1957 | <span class="codefrag"> $script $stdout $stderr $syslog $jobconf </span> |
---|
| 1958 | </p> |
---|
| 1959 | <p> Pipes çšåºæ ¹æ®ç¬¬äºäžªåæ°è·åŸc++çšåºåã |
---|
| 1960 | å æ€è°è¯pipesçšåºçåœä»€æ¯<br> |
---|
| 1961 | |
---|
| 1962 | <span class="codefrag">$script $stdout $stderr $syslog $jobconf $program </span> |
---|
| 1963 | |
---|
| 1964 | </p> |
---|
| 1965 | <a name="N10D2A"></a><a name="%E9%BB%98%E8%AE%A4%E8%A1%8C%E4%B8%BA"></a> |
---|
| 1966 | <h5> é»è®€è¡äžº </h5> |
---|
| 1967 | <p> 对äºpipesïŒé»è®€çèæ¬äŒçšgdbå€çcore dumpïŒ |
---|
| 1968 | æå° stack trace并äžç»åºæ£åšè¿è¡çº¿çšçä¿¡æ¯ã</p> |
---|
| 1969 | <a name="N10D35"></a><a name="JobControl"></a> |
---|
| 1970 | <h4>JobControl</h4> |
---|
| 1971 | <p> |
---|
| 1972 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/jobcontrol/package-summary.html"> |
---|
| 1973 | JobControl</a>æ¯äžäžªå·¥å
·ïŒå®å°è£
äºäžç»Map/Reduceäœäžä»¥åä»ä»¬ä¹éŽçäŸèµå
³ç³»ã |
---|
| 1974 | </p> |
---|
| 1975 | <a name="N10D42"></a><a name="%E6%95%B0%E6%8D%AE%E5%8E%8B%E7%BC%A9"></a> |
---|
| 1976 | <h4>æ°æ®å猩</h4> |
---|
| 1977 | <p>Hadoop Map/Reduceæ¡æ¶äžºåºçšçšåºçåå
¥æ件æäœæäŸå猩工å
·ïŒè¿äºå·¥å
·å¯ä»¥äžºmapèŸåºçäžéŽæ°æ®åäœäžæç»èŸåºæ°æ®ïŒäŸåŠreduceçèŸåºïŒæäŸæ¯æãå®è¿é垊äºäžäº |
---|
| 1978 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/io/compress/CompressionCodec.html"> |
---|
| 1979 | CompressionCodec</a>çå®ç°ïŒæ¯åŠå®ç°äº |
---|
| 1980 | <a href="http://www.zlib.net/">zlib</a>å<a href="http://www.oberhumer.com/opensource/lzo/">lzo</a>å猩ç®æ³ã |
---|
| 1981 | Hadoopåæ ·æ¯æ<a href="http://www.gzip.org/">gzip</a>æä»¶æ ŒåŒã |
---|
| 1982 | </p> |
---|
| 1983 | <p>èèå°æ§èœé®é¢ïŒzlibïŒä»¥åJavaç±»åºç猺倱ïŒlzoïŒçå çŽ ïŒHadoopä¹äžºäžè¿°å猩解åç®æ³æäŸæ¬å°åºçå®ç°ãæŽå€çç»è请åè |
---|
| 1984 | <a href="native_libraries.html">è¿é</a>ã</p> |
---|
| 1985 | <a name="N10D62"></a><a name="%E4%B8%AD%E9%97%B4%E8%BE%93%E5%87%BA"></a> |
---|
| 1986 | <h5>äžéŽèŸåº</h5> |
---|
| 1987 | <p>åºçšçšåºå¯ä»¥éè¿ |
---|
| 1988 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setCompressMapOutput(boolean)"> |
---|
| 1989 | JobConf.setCompressMapOutput(boolean)</a>apiæ§å¶mapèŸåºçäžéŽç»æïŒå¹¶äžå¯ä»¥éè¿ |
---|
| 1990 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setMapOutputCompressorClass(java.lang.Class)"> |
---|
| 1991 | JobConf.setMapOutputCompressorClass(Class)</a>apiæå® |
---|
| 1992 | <span class="codefrag">CompressionCodec</span>ã |
---|
| 1993 | </p> |
---|
| 1994 | <a name="N10D77"></a><a name="%E4%BD%9C%E4%B8%9A%E8%BE%93%E5%87%BA"></a> |
---|
| 1995 | <h5>äœäžèŸåº</h5> |
---|
| 1996 | <p>åºçšçšåºå¯ä»¥éè¿ |
---|
| 1997 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/FileOutputFormat.html#setCompressOutput(org.apache.hadoop.mapred.JobConf,%20boolean)"> |
---|
| 1998 | FileOutputFormat.setCompressOutput(JobConf, boolean)</a> |
---|
| 1999 | apiæ§å¶èŸåºæ¯åŠéèŠå猩并äžå¯ä»¥äœ¿çš |
---|
| 2000 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/FileOutputFormat.html#setOutputCompressorClass(org.apache.hadoop.mapred.JobConf,%20java.lang.Class)"> |
---|
| 2001 | FileOutputFormat.setOutputCompressorClass(JobConf, Class)</a>apiæå®<span class="codefrag">CompressionCodec</span>ã</p> |
---|
| 2002 | <p>åŠæäœäžèŸåºèŠä¿åæ |
---|
| 2003 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/SequenceFileOutputFormat.html"> |
---|
| 2004 | SequenceFileOutputFormat</a>æ ŒåŒïŒéèŠäœ¿çš |
---|
| 2005 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/SequenceFileOutputFormat.html#setOutputCompressionType(org.apache.hadoop.mapred.JobConf,%20org.apache.hadoop.io.SequenceFile.CompressionType)"> |
---|
| 2006 | SequenceFileOutputFormat.setOutputCompressionType(JobConf, |
---|
| 2007 | SequenceFile.CompressionType)</a>apiïŒæ¥è®Ÿå® |
---|
| 2008 | <span class="codefrag">SequenceFile.CompressionType</span> (i.e. <span class="codefrag">RECORD</span> / |
---|
| 2009 | <span class="codefrag">BLOCK</span> - é»è®€æ¯<span class="codefrag">RECORD</span>)ã |
---|
| 2010 | </p> |
---|
| 2011 | </div> |
---|
| 2012 | |
---|
| 2013 | |
---|
| 2014 | <a name="N10DA6"></a><a name="%E4%BE%8B%E5%AD%90%EF%BC%9AWordCount+v2.0"></a> |
---|
| 2015 | <h2 class="h3">äŸåïŒWordCount v2.0</h2> |
---|
| 2016 | <div class="section"> |
---|
| 2017 | <p>è¿éæ¯äžäžªæŽå
šé¢ç<span class="codefrag">WordCount</span>äŸåïŒå®äœ¿çšäºæ们已ç»è®šè®ºè¿çåŸå€Map/Reduceæ¡æ¶æäŸçåèœã |
---|
| 2018 | </p> |
---|
| 2019 | <p>è¿è¡è¿äžªäŸåéèŠHDFSçæäºåèœïŒç¹å«æ¯ |
---|
| 2020 | <span class="codefrag">DistributedCache</span>çžå
³åèœãå æ€è¿äžªäŸååªèœè¿è¡åš |
---|
| 2021 | <a href="quickstart.html#SingleNodeSetup">䌪ååžåŒ</a> æè
|
---|
| 2022 | <a href="quickstart.html#Fully-Distributed+Operation">å®å
šååžåŒæš¡åŒ</a>ç |
---|
| 2023 | Hadoopäžã</p> |
---|
| 2024 | <a name="N10DC0"></a><a name="%E6%BA%90%E4%BB%A3%E7%A0%81-N10DC0"></a> |
---|
| 2025 | <h3 class="h4">æºä»£ç </h3> |
---|
| 2026 | <table class="ForrestTable" cellspacing="1" cellpadding="4"> |
---|
| 2027 | |
---|
| 2028 | <tr> |
---|
| 2029 | |
---|
| 2030 | <th colspan="1" rowspan="1"></th> |
---|
| 2031 | <th colspan="1" rowspan="1">WordCount.java</th> |
---|
| 2032 | |
---|
| 2033 | </tr> |
---|
| 2034 | |
---|
| 2035 | <tr> |
---|
| 2036 | |
---|
| 2037 | <td colspan="1" rowspan="1">1.</td> |
---|
| 2038 | <td colspan="1" rowspan="1"> |
---|
| 2039 | <span class="codefrag">package org.myorg;</span> |
---|
| 2040 | </td> |
---|
| 2041 | |
---|
| 2042 | </tr> |
---|
| 2043 | |
---|
| 2044 | <tr> |
---|
| 2045 | |
---|
| 2046 | <td colspan="1" rowspan="1">2.</td> |
---|
| 2047 | <td colspan="1" rowspan="1"></td> |
---|
| 2048 | |
---|
| 2049 | </tr> |
---|
| 2050 | |
---|
| 2051 | <tr> |
---|
| 2052 | |
---|
| 2053 | <td colspan="1" rowspan="1">3.</td> |
---|
| 2054 | <td colspan="1" rowspan="1"> |
---|
| 2055 | <span class="codefrag">import java.io.*;</span> |
---|
| 2056 | </td> |
---|
| 2057 | |
---|
| 2058 | </tr> |
---|
| 2059 | |
---|
| 2060 | <tr> |
---|
| 2061 | |
---|
| 2062 | <td colspan="1" rowspan="1">4.</td> |
---|
| 2063 | <td colspan="1" rowspan="1"> |
---|
| 2064 | <span class="codefrag">import java.util.*;</span> |
---|
| 2065 | </td> |
---|
| 2066 | |
---|
| 2067 | </tr> |
---|
| 2068 | |
---|
| 2069 | <tr> |
---|
| 2070 | |
---|
| 2071 | <td colspan="1" rowspan="1">5.</td> |
---|
| 2072 | <td colspan="1" rowspan="1"></td> |
---|
| 2073 | |
---|
| 2074 | </tr> |
---|
| 2075 | |
---|
| 2076 | <tr> |
---|
| 2077 | |
---|
| 2078 | <td colspan="1" rowspan="1">6.</td> |
---|
| 2079 | <td colspan="1" rowspan="1"> |
---|
| 2080 | <span class="codefrag">import org.apache.hadoop.fs.Path;</span> |
---|
| 2081 | </td> |
---|
| 2082 | |
---|
| 2083 | </tr> |
---|
| 2084 | |
---|
| 2085 | <tr> |
---|
| 2086 | |
---|
| 2087 | <td colspan="1" rowspan="1">7.</td> |
---|
| 2088 | <td colspan="1" rowspan="1"> |
---|
| 2089 | <span class="codefrag">import org.apache.hadoop.filecache.DistributedCache;</span> |
---|
| 2090 | </td> |
---|
| 2091 | |
---|
| 2092 | </tr> |
---|
| 2093 | |
---|
| 2094 | <tr> |
---|
| 2095 | |
---|
| 2096 | <td colspan="1" rowspan="1">8.</td> |
---|
| 2097 | <td colspan="1" rowspan="1"> |
---|
| 2098 | <span class="codefrag">import org.apache.hadoop.conf.*;</span> |
---|
| 2099 | </td> |
---|
| 2100 | |
---|
| 2101 | </tr> |
---|
| 2102 | |
---|
| 2103 | <tr> |
---|
| 2104 | |
---|
| 2105 | <td colspan="1" rowspan="1">9.</td> |
---|
| 2106 | <td colspan="1" rowspan="1"> |
---|
| 2107 | <span class="codefrag">import org.apache.hadoop.io.*;</span> |
---|
| 2108 | </td> |
---|
| 2109 | |
---|
| 2110 | </tr> |
---|
| 2111 | |
---|
| 2112 | <tr> |
---|
| 2113 | |
---|
| 2114 | <td colspan="1" rowspan="1">10.</td> |
---|
| 2115 | <td colspan="1" rowspan="1"> |
---|
| 2116 | <span class="codefrag">import org.apache.hadoop.mapred.*;</span> |
---|
| 2117 | </td> |
---|
| 2118 | |
---|
| 2119 | </tr> |
---|
| 2120 | |
---|
| 2121 | <tr> |
---|
| 2122 | |
---|
| 2123 | <td colspan="1" rowspan="1">11.</td> |
---|
| 2124 | <td colspan="1" rowspan="1"> |
---|
| 2125 | <span class="codefrag">import org.apache.hadoop.util.*;</span> |
---|
| 2126 | </td> |
---|
| 2127 | |
---|
| 2128 | </tr> |
---|
| 2129 | |
---|
| 2130 | <tr> |
---|
| 2131 | |
---|
| 2132 | <td colspan="1" rowspan="1">12.</td> |
---|
| 2133 | <td colspan="1" rowspan="1"></td> |
---|
| 2134 | |
---|
| 2135 | </tr> |
---|
| 2136 | |
---|
| 2137 | <tr> |
---|
| 2138 | |
---|
| 2139 | <td colspan="1" rowspan="1">13.</td> |
---|
| 2140 | <td colspan="1" rowspan="1"> |
---|
| 2141 | <span class="codefrag">public class WordCount extends Configured implements Tool {</span> |
---|
| 2142 | </td> |
---|
| 2143 | |
---|
| 2144 | </tr> |
---|
| 2145 | |
---|
| 2146 | <tr> |
---|
| 2147 | |
---|
| 2148 | <td colspan="1" rowspan="1">14.</td> |
---|
| 2149 | <td colspan="1" rowspan="1"></td> |
---|
| 2150 | |
---|
| 2151 | </tr> |
---|
| 2152 | |
---|
| 2153 | <tr> |
---|
| 2154 | |
---|
| 2155 | <td colspan="1" rowspan="1">15.</td> |
---|
| 2156 | <td colspan="1" rowspan="1"> |
---|
| 2157 | |
---|
| 2158 | <span class="codefrag"> |
---|
| 2159 | public static class Map extends MapReduceBase |
---|
| 2160 | implements Mapper<LongWritable, Text, Text, IntWritable> { |
---|
| 2161 | </span> |
---|
| 2162 | </td> |
---|
| 2163 | |
---|
| 2164 | </tr> |
---|
| 2165 | |
---|
| 2166 | <tr> |
---|
| 2167 | |
---|
| 2168 | <td colspan="1" rowspan="1">16.</td> |
---|
| 2169 | <td colspan="1" rowspan="1"></td> |
---|
| 2170 | |
---|
| 2171 | </tr> |
---|
| 2172 | |
---|
| 2173 | <tr> |
---|
| 2174 | |
---|
| 2175 | <td colspan="1" rowspan="1">17.</td> |
---|
| 2176 | <td colspan="1" rowspan="1"> |
---|
| 2177 | |
---|
| 2178 | <span class="codefrag"> |
---|
| 2179 | static enum Counters { INPUT_WORDS } |
---|
| 2180 | </span> |
---|
| 2181 | </td> |
---|
| 2182 | |
---|
| 2183 | </tr> |
---|
| 2184 | |
---|
| 2185 | <tr> |
---|
| 2186 | |
---|
| 2187 | <td colspan="1" rowspan="1">18.</td> |
---|
| 2188 | <td colspan="1" rowspan="1"></td> |
---|
| 2189 | |
---|
| 2190 | </tr> |
---|
| 2191 | |
---|
| 2192 | <tr> |
---|
| 2193 | |
---|
| 2194 | <td colspan="1" rowspan="1">19.</td> |
---|
| 2195 | <td colspan="1" rowspan="1"> |
---|
| 2196 | |
---|
| 2197 | <span class="codefrag"> |
---|
| 2198 | private final static IntWritable one = new IntWritable(1); |
---|
| 2199 | </span> |
---|
| 2200 | </td> |
---|
| 2201 | |
---|
| 2202 | </tr> |
---|
| 2203 | |
---|
| 2204 | <tr> |
---|
| 2205 | |
---|
| 2206 | <td colspan="1" rowspan="1">20.</td> |
---|
| 2207 | <td colspan="1" rowspan="1"> |
---|
| 2208 | |
---|
| 2209 | <span class="codefrag">private Text word = new Text();</span> |
---|
| 2210 | </td> |
---|
| 2211 | |
---|
| 2212 | </tr> |
---|
| 2213 | |
---|
| 2214 | <tr> |
---|
| 2215 | |
---|
| 2216 | <td colspan="1" rowspan="1">21.</td> |
---|
| 2217 | <td colspan="1" rowspan="1"></td> |
---|
| 2218 | |
---|
| 2219 | </tr> |
---|
| 2220 | |
---|
| 2221 | <tr> |
---|
| 2222 | |
---|
| 2223 | <td colspan="1" rowspan="1">22.</td> |
---|
| 2224 | <td colspan="1" rowspan="1"> |
---|
| 2225 | |
---|
| 2226 | <span class="codefrag">private boolean caseSensitive = true;</span> |
---|
| 2227 | </td> |
---|
| 2228 | |
---|
| 2229 | </tr> |
---|
| 2230 | |
---|
| 2231 | <tr> |
---|
| 2232 | |
---|
| 2233 | <td colspan="1" rowspan="1">23.</td> |
---|
| 2234 | <td colspan="1" rowspan="1"> |
---|
| 2235 | |
---|
| 2236 | <span class="codefrag">private Set<String> patternsToSkip = new HashSet<String>();</span> |
---|
| 2237 | </td> |
---|
| 2238 | |
---|
| 2239 | </tr> |
---|
| 2240 | |
---|
| 2241 | <tr> |
---|
| 2242 | |
---|
| 2243 | <td colspan="1" rowspan="1">24.</td> |
---|
| 2244 | <td colspan="1" rowspan="1"></td> |
---|
| 2245 | |
---|
| 2246 | </tr> |
---|
| 2247 | |
---|
| 2248 | <tr> |
---|
| 2249 | |
---|
| 2250 | <td colspan="1" rowspan="1">25.</td> |
---|
| 2251 | <td colspan="1" rowspan="1"> |
---|
| 2252 | |
---|
| 2253 | <span class="codefrag">private long numRecords = 0;</span> |
---|
| 2254 | </td> |
---|
| 2255 | |
---|
| 2256 | </tr> |
---|
| 2257 | |
---|
| 2258 | <tr> |
---|
| 2259 | |
---|
| 2260 | <td colspan="1" rowspan="1">26.</td> |
---|
| 2261 | <td colspan="1" rowspan="1"> |
---|
| 2262 | |
---|
| 2263 | <span class="codefrag">private String inputFile;</span> |
---|
| 2264 | </td> |
---|
| 2265 | |
---|
| 2266 | </tr> |
---|
| 2267 | |
---|
| 2268 | <tr> |
---|
| 2269 | |
---|
| 2270 | <td colspan="1" rowspan="1">27.</td> |
---|
| 2271 | <td colspan="1" rowspan="1"></td> |
---|
| 2272 | |
---|
| 2273 | </tr> |
---|
| 2274 | |
---|
| 2275 | <tr> |
---|
| 2276 | |
---|
| 2277 | <td colspan="1" rowspan="1">28.</td> |
---|
| 2278 | <td colspan="1" rowspan="1"> |
---|
| 2279 | |
---|
| 2280 | <span class="codefrag">public void configure(JobConf job) {</span> |
---|
| 2281 | </td> |
---|
| 2282 | |
---|
| 2283 | </tr> |
---|
| 2284 | |
---|
| 2285 | <tr> |
---|
| 2286 | |
---|
| 2287 | <td colspan="1" rowspan="1">29.</td> |
---|
| 2288 | <td colspan="1" rowspan="1"> |
---|
| 2289 | |
---|
| 2290 | <span class="codefrag"> |
---|
| 2291 | caseSensitive = job.getBoolean("wordcount.case.sensitive", true); |
---|
| 2292 | </span> |
---|
| 2293 | </td> |
---|
| 2294 | |
---|
| 2295 | </tr> |
---|
| 2296 | |
---|
| 2297 | <tr> |
---|
| 2298 | |
---|
| 2299 | <td colspan="1" rowspan="1">30.</td> |
---|
| 2300 | <td colspan="1" rowspan="1"> |
---|
| 2301 | |
---|
| 2302 | <span class="codefrag">inputFile = job.get("map.input.file");</span> |
---|
| 2303 | </td> |
---|
| 2304 | |
---|
| 2305 | </tr> |
---|
| 2306 | |
---|
| 2307 | <tr> |
---|
| 2308 | |
---|
| 2309 | <td colspan="1" rowspan="1">31.</td> |
---|
| 2310 | <td colspan="1" rowspan="1"></td> |
---|
| 2311 | |
---|
| 2312 | </tr> |
---|
| 2313 | |
---|
| 2314 | <tr> |
---|
| 2315 | |
---|
| 2316 | <td colspan="1" rowspan="1">32.</td> |
---|
| 2317 | <td colspan="1" rowspan="1"> |
---|
| 2318 | |
---|
| 2319 | <span class="codefrag">if (job.getBoolean("wordcount.skip.patterns", false)) {</span> |
---|
| 2320 | </td> |
---|
| 2321 | |
---|
| 2322 | </tr> |
---|
| 2323 | |
---|
| 2324 | <tr> |
---|
| 2325 | |
---|
| 2326 | <td colspan="1" rowspan="1">33.</td> |
---|
| 2327 | <td colspan="1" rowspan="1"> |
---|
| 2328 | |
---|
| 2329 | <span class="codefrag">Path[] patternsFiles = new Path[0];</span> |
---|
| 2330 | </td> |
---|
| 2331 | |
---|
| 2332 | </tr> |
---|
| 2333 | |
---|
| 2334 | <tr> |
---|
| 2335 | |
---|
| 2336 | <td colspan="1" rowspan="1">34.</td> |
---|
| 2337 | <td colspan="1" rowspan="1"> |
---|
| 2338 | |
---|
| 2339 | <span class="codefrag">try {</span> |
---|
| 2340 | </td> |
---|
| 2341 | |
---|
| 2342 | </tr> |
---|
| 2343 | |
---|
| 2344 | <tr> |
---|
| 2345 | |
---|
| 2346 | <td colspan="1" rowspan="1">35.</td> |
---|
| 2347 | <td colspan="1" rowspan="1"> |
---|
| 2348 | |
---|
| 2349 | <span class="codefrag"> |
---|
| 2350 | patternsFiles = DistributedCache.getLocalCacheFiles(job); |
---|
| 2351 | </span> |
---|
| 2352 | </td> |
---|
| 2353 | |
---|
| 2354 | </tr> |
---|
| 2355 | |
---|
| 2356 | <tr> |
---|
| 2357 | |
---|
| 2358 | <td colspan="1" rowspan="1">36.</td> |
---|
| 2359 | <td colspan="1" rowspan="1"> |
---|
| 2360 | |
---|
| 2361 | <span class="codefrag">} catch (IOException ioe) {</span> |
---|
| 2362 | </td> |
---|
| 2363 | |
---|
| 2364 | </tr> |
---|
| 2365 | |
---|
| 2366 | <tr> |
---|
| 2367 | |
---|
| 2368 | <td colspan="1" rowspan="1">37.</td> |
---|
| 2369 | <td colspan="1" rowspan="1"> |
---|
| 2370 | |
---|
| 2371 | <span class="codefrag"> |
---|
| 2372 | System.err.println("Caught exception while getting cached files: " |
---|
| 2373 | + StringUtils.stringifyException(ioe)); |
---|
| 2374 | </span> |
---|
| 2375 | </td> |
---|
| 2376 | |
---|
| 2377 | </tr> |
---|
| 2378 | |
---|
| 2379 | <tr> |
---|
| 2380 | |
---|
| 2381 | <td colspan="1" rowspan="1">38.</td> |
---|
| 2382 | <td colspan="1" rowspan="1"> |
---|
| 2383 | |
---|
| 2384 | <span class="codefrag">}</span> |
---|
| 2385 | </td> |
---|
| 2386 | |
---|
| 2387 | </tr> |
---|
| 2388 | |
---|
| 2389 | <tr> |
---|
| 2390 | |
---|
| 2391 | <td colspan="1" rowspan="1">39.</td> |
---|
| 2392 | <td colspan="1" rowspan="1"> |
---|
| 2393 | |
---|
| 2394 | <span class="codefrag">for (Path patternsFile : patternsFiles) {</span> |
---|
| 2395 | </td> |
---|
| 2396 | |
---|
| 2397 | </tr> |
---|
| 2398 | |
---|
| 2399 | <tr> |
---|
| 2400 | |
---|
| 2401 | <td colspan="1" rowspan="1">40.</td> |
---|
| 2402 | <td colspan="1" rowspan="1"> |
---|
| 2403 | |
---|
| 2404 | <span class="codefrag">parseSkipFile(patternsFile);</span> |
---|
| 2405 | </td> |
---|
| 2406 | |
---|
| 2407 | </tr> |
---|
| 2408 | |
---|
| 2409 | <tr> |
---|
| 2410 | |
---|
| 2411 | <td colspan="1" rowspan="1">41.</td> |
---|
| 2412 | <td colspan="1" rowspan="1"> |
---|
| 2413 | |
---|
| 2414 | <span class="codefrag">}</span> |
---|
| 2415 | </td> |
---|
| 2416 | |
---|
| 2417 | </tr> |
---|
| 2418 | |
---|
| 2419 | <tr> |
---|
| 2420 | |
---|
| 2421 | <td colspan="1" rowspan="1">42.</td> |
---|
| 2422 | <td colspan="1" rowspan="1"> |
---|
| 2423 | |
---|
| 2424 | <span class="codefrag">}</span> |
---|
| 2425 | </td> |
---|
| 2426 | |
---|
| 2427 | </tr> |
---|
| 2428 | |
---|
| 2429 | <tr> |
---|
| 2430 | |
---|
| 2431 | <td colspan="1" rowspan="1">43.</td> |
---|
| 2432 | <td colspan="1" rowspan="1"> |
---|
| 2433 | |
---|
| 2434 | <span class="codefrag">}</span> |
---|
| 2435 | </td> |
---|
| 2436 | |
---|
| 2437 | </tr> |
---|
| 2438 | |
---|
| 2439 | <tr> |
---|
| 2440 | |
---|
| 2441 | <td colspan="1" rowspan="1">44.</td> |
---|
| 2442 | <td colspan="1" rowspan="1"></td> |
---|
| 2443 | |
---|
| 2444 | </tr> |
---|
| 2445 | |
---|
| 2446 | <tr> |
---|
| 2447 | |
---|
| 2448 | <td colspan="1" rowspan="1">45.</td> |
---|
| 2449 | <td colspan="1" rowspan="1"> |
---|
| 2450 | |
---|
| 2451 | <span class="codefrag">private void parseSkipFile(Path patternsFile) {</span> |
---|
| 2452 | </td> |
---|
| 2453 | |
---|
| 2454 | </tr> |
---|
| 2455 | |
---|
| 2456 | <tr> |
---|
| 2457 | |
---|
| 2458 | <td colspan="1" rowspan="1">46.</td> |
---|
| 2459 | <td colspan="1" rowspan="1"> |
---|
| 2460 | |
---|
| 2461 | <span class="codefrag">try {</span> |
---|
| 2462 | </td> |
---|
| 2463 | |
---|
| 2464 | </tr> |
---|
| 2465 | |
---|
| 2466 | <tr> |
---|
| 2467 | |
---|
| 2468 | <td colspan="1" rowspan="1">47.</td> |
---|
| 2469 | <td colspan="1" rowspan="1"> |
---|
| 2470 | |
---|
| 2471 | <span class="codefrag"> |
---|
| 2472 | BufferedReader fis = |
---|
| 2473 | new BufferedReader(new FileReader(patternsFile.toString())); |
---|
| 2474 | </span> |
---|
| 2475 | </td> |
---|
| 2476 | |
---|
| 2477 | </tr> |
---|
| 2478 | |
---|
| 2479 | <tr> |
---|
| 2480 | |
---|
| 2481 | <td colspan="1" rowspan="1">48.</td> |
---|
| 2482 | <td colspan="1" rowspan="1"> |
---|
| 2483 | |
---|
| 2484 | <span class="codefrag">String pattern = null;</span> |
---|
| 2485 | </td> |
---|
| 2486 | |
---|
| 2487 | </tr> |
---|
| 2488 | |
---|
| 2489 | <tr> |
---|
| 2490 | |
---|
| 2491 | <td colspan="1" rowspan="1">49.</td> |
---|
| 2492 | <td colspan="1" rowspan="1"> |
---|
| 2493 | |
---|
| 2494 | <span class="codefrag">while ((pattern = fis.readLine()) != null) {</span> |
---|
| 2495 | </td> |
---|
| 2496 | |
---|
| 2497 | </tr> |
---|
| 2498 | |
---|
| 2499 | <tr> |
---|
| 2500 | |
---|
| 2501 | <td colspan="1" rowspan="1">50.</td> |
---|
| 2502 | <td colspan="1" rowspan="1"> |
---|
| 2503 | |
---|
| 2504 | <span class="codefrag">patternsToSkip.add(pattern);</span> |
---|
| 2505 | </td> |
---|
| 2506 | |
---|
| 2507 | </tr> |
---|
| 2508 | |
---|
| 2509 | <tr> |
---|
| 2510 | |
---|
| 2511 | <td colspan="1" rowspan="1">51.</td> |
---|
| 2512 | <td colspan="1" rowspan="1"> |
---|
| 2513 | |
---|
| 2514 | <span class="codefrag">}</span> |
---|
| 2515 | </td> |
---|
| 2516 | |
---|
| 2517 | </tr> |
---|
| 2518 | |
---|
| 2519 | <tr> |
---|
| 2520 | |
---|
| 2521 | <td colspan="1" rowspan="1">52.</td> |
---|
| 2522 | <td colspan="1" rowspan="1"> |
---|
| 2523 | |
---|
| 2524 | <span class="codefrag">} catch (IOException ioe) {</span> |
---|
| 2525 | </td> |
---|
| 2526 | |
---|
| 2527 | </tr> |
---|
| 2528 | |
---|
| 2529 | <tr> |
---|
| 2530 | |
---|
| 2531 | <td colspan="1" rowspan="1">53.</td> |
---|
| 2532 | <td colspan="1" rowspan="1"> |
---|
| 2533 | |
---|
| 2534 | <span class="codefrag"> |
---|
| 2535 | System.err.println("Caught exception while parsing the cached file '" + |
---|
| 2536 | patternsFile + "' : " + |
---|
| 2537 | StringUtils.stringifyException(ioe)); |
---|
| 2538 | |
---|
| 2539 | </span> |
---|
| 2540 | </td> |
---|
| 2541 | |
---|
| 2542 | </tr> |
---|
| 2543 | |
---|
| 2544 | <tr> |
---|
| 2545 | |
---|
| 2546 | <td colspan="1" rowspan="1">54.</td> |
---|
| 2547 | <td colspan="1" rowspan="1"> |
---|
| 2548 | |
---|
| 2549 | <span class="codefrag">}</span> |
---|
| 2550 | </td> |
---|
| 2551 | |
---|
| 2552 | </tr> |
---|
| 2553 | |
---|
| 2554 | <tr> |
---|
| 2555 | |
---|
| 2556 | <td colspan="1" rowspan="1">55.</td> |
---|
| 2557 | <td colspan="1" rowspan="1"> |
---|
| 2558 | |
---|
| 2559 | <span class="codefrag">}</span> |
---|
| 2560 | </td> |
---|
| 2561 | |
---|
| 2562 | </tr> |
---|
| 2563 | |
---|
| 2564 | <tr> |
---|
| 2565 | |
---|
| 2566 | <td colspan="1" rowspan="1">56.</td> |
---|
| 2567 | <td colspan="1" rowspan="1"></td> |
---|
| 2568 | |
---|
| 2569 | </tr> |
---|
| 2570 | |
---|
| 2571 | <tr> |
---|
| 2572 | |
---|
| 2573 | <td colspan="1" rowspan="1">57.</td> |
---|
| 2574 | <td colspan="1" rowspan="1"> |
---|
| 2575 | |
---|
| 2576 | <span class="codefrag"> |
---|
| 2577 | public void map(LongWritable key, Text value, |
---|
| 2578 | OutputCollector<Text, IntWritable> output, |
---|
| 2579 | Reporter reporter) throws IOException { |
---|
| 2580 | </span> |
---|
| 2581 | </td> |
---|
| 2582 | |
---|
| 2583 | </tr> |
---|
| 2584 | |
---|
| 2585 | <tr> |
---|
| 2586 | |
---|
| 2587 | <td colspan="1" rowspan="1">58.</td> |
---|
| 2588 | <td colspan="1" rowspan="1"> |
---|
| 2589 | |
---|
| 2590 | <span class="codefrag"> |
---|
| 2591 | String line = |
---|
| 2592 | (caseSensitive) ? value.toString() : |
---|
| 2593 | value.toString().toLowerCase(); |
---|
| 2594 | </span> |
---|
| 2595 | </td> |
---|
| 2596 | |
---|
| 2597 | </tr> |
---|
| 2598 | |
---|
| 2599 | <tr> |
---|
| 2600 | |
---|
| 2601 | <td colspan="1" rowspan="1">59.</td> |
---|
| 2602 | <td colspan="1" rowspan="1"></td> |
---|
| 2603 | |
---|
| 2604 | </tr> |
---|
| 2605 | |
---|
| 2606 | <tr> |
---|
| 2607 | |
---|
| 2608 | <td colspan="1" rowspan="1">60.</td> |
---|
| 2609 | <td colspan="1" rowspan="1"> |
---|
| 2610 | |
---|
| 2611 | <span class="codefrag">for (String pattern : patternsToSkip) {</span> |
---|
| 2612 | </td> |
---|
| 2613 | |
---|
| 2614 | </tr> |
---|
| 2615 | |
---|
| 2616 | <tr> |
---|
| 2617 | |
---|
| 2618 | <td colspan="1" rowspan="1">61.</td> |
---|
| 2619 | <td colspan="1" rowspan="1"> |
---|
| 2620 | |
---|
| 2621 | <span class="codefrag">line = line.replaceAll(pattern, "");</span> |
---|
| 2622 | </td> |
---|
| 2623 | |
---|
| 2624 | </tr> |
---|
| 2625 | |
---|
| 2626 | <tr> |
---|
| 2627 | |
---|
| 2628 | <td colspan="1" rowspan="1">62.</td> |
---|
| 2629 | <td colspan="1" rowspan="1"> |
---|
| 2630 | |
---|
| 2631 | <span class="codefrag">}</span> |
---|
| 2632 | </td> |
---|
| 2633 | |
---|
| 2634 | </tr> |
---|
| 2635 | |
---|
| 2636 | <tr> |
---|
| 2637 | |
---|
| 2638 | <td colspan="1" rowspan="1">63.</td> |
---|
| 2639 | <td colspan="1" rowspan="1"></td> |
---|
| 2640 | |
---|
| 2641 | </tr> |
---|
| 2642 | |
---|
| 2643 | <tr> |
---|
| 2644 | |
---|
| 2645 | <td colspan="1" rowspan="1">64.</td> |
---|
| 2646 | <td colspan="1" rowspan="1"> |
---|
| 2647 | |
---|
| 2648 | <span class="codefrag">StringTokenizer tokenizer = new StringTokenizer(line);</span> |
---|
| 2649 | </td> |
---|
| 2650 | |
---|
| 2651 | </tr> |
---|
| 2652 | |
---|
| 2653 | <tr> |
---|
| 2654 | |
---|
| 2655 | <td colspan="1" rowspan="1">65.</td> |
---|
| 2656 | <td colspan="1" rowspan="1"> |
---|
| 2657 | |
---|
| 2658 | <span class="codefrag">while (tokenizer.hasMoreTokens()) {</span> |
---|
| 2659 | </td> |
---|
| 2660 | |
---|
| 2661 | </tr> |
---|
| 2662 | |
---|
| 2663 | <tr> |
---|
| 2664 | |
---|
| 2665 | <td colspan="1" rowspan="1">66.</td> |
---|
| 2666 | <td colspan="1" rowspan="1"> |
---|
| 2667 | |
---|
| 2668 | <span class="codefrag">word.set(tokenizer.nextToken());</span> |
---|
| 2669 | </td> |
---|
| 2670 | |
---|
| 2671 | </tr> |
---|
| 2672 | |
---|
| 2673 | <tr> |
---|
| 2674 | |
---|
| 2675 | <td colspan="1" rowspan="1">67.</td> |
---|
| 2676 | <td colspan="1" rowspan="1"> |
---|
| 2677 | |
---|
| 2678 | <span class="codefrag">output.collect(word, one);</span> |
---|
| 2679 | </td> |
---|
| 2680 | |
---|
| 2681 | </tr> |
---|
| 2682 | |
---|
| 2683 | <tr> |
---|
| 2684 | |
---|
| 2685 | <td colspan="1" rowspan="1">68.</td> |
---|
| 2686 | <td colspan="1" rowspan="1"> |
---|
| 2687 | |
---|
| 2688 | <span class="codefrag">reporter.incrCounter(Counters.INPUT_WORDS, 1);</span> |
---|
| 2689 | </td> |
---|
| 2690 | |
---|
| 2691 | </tr> |
---|
| 2692 | |
---|
| 2693 | <tr> |
---|
| 2694 | |
---|
| 2695 | <td colspan="1" rowspan="1">69.</td> |
---|
| 2696 | <td colspan="1" rowspan="1"> |
---|
| 2697 | |
---|
| 2698 | <span class="codefrag">}</span> |
---|
| 2699 | </td> |
---|
| 2700 | |
---|
| 2701 | </tr> |
---|
| 2702 | |
---|
| 2703 | <tr> |
---|
| 2704 | |
---|
| 2705 | <td colspan="1" rowspan="1">70.</td> |
---|
| 2706 | <td colspan="1" rowspan="1"></td> |
---|
| 2707 | |
---|
| 2708 | </tr> |
---|
| 2709 | |
---|
| 2710 | <tr> |
---|
| 2711 | |
---|
| 2712 | <td colspan="1" rowspan="1">71.</td> |
---|
| 2713 | <td colspan="1" rowspan="1"> |
---|
| 2714 | |
---|
| 2715 | <span class="codefrag">if ((++numRecords % 100) == 0) {</span> |
---|
| 2716 | </td> |
---|
| 2717 | |
---|
| 2718 | </tr> |
---|
| 2719 | |
---|
| 2720 | <tr> |
---|
| 2721 | |
---|
| 2722 | <td colspan="1" rowspan="1">72.</td> |
---|
| 2723 | <td colspan="1" rowspan="1"> |
---|
| 2724 | |
---|
| 2725 | <span class="codefrag"> |
---|
| 2726 | reporter.setStatus("Finished processing " + numRecords + |
---|
| 2727 | " records " + "from the input file: " + |
---|
| 2728 | inputFile); |
---|
| 2729 | </span> |
---|
| 2730 | </td> |
---|
| 2731 | |
---|
| 2732 | </tr> |
---|
| 2733 | |
---|
| 2734 | <tr> |
---|
| 2735 | |
---|
| 2736 | <td colspan="1" rowspan="1">73.</td> |
---|
| 2737 | <td colspan="1" rowspan="1"> |
---|
| 2738 | |
---|
| 2739 | <span class="codefrag">}</span> |
---|
| 2740 | </td> |
---|
| 2741 | |
---|
| 2742 | </tr> |
---|
| 2743 | |
---|
| 2744 | <tr> |
---|
| 2745 | |
---|
| 2746 | <td colspan="1" rowspan="1">74.</td> |
---|
| 2747 | <td colspan="1" rowspan="1"> |
---|
| 2748 | |
---|
| 2749 | <span class="codefrag">}</span> |
---|
| 2750 | </td> |
---|
| 2751 | |
---|
| 2752 | </tr> |
---|
| 2753 | |
---|
| 2754 | <tr> |
---|
| 2755 | |
---|
| 2756 | <td colspan="1" rowspan="1">75.</td> |
---|
| 2757 | <td colspan="1" rowspan="1"> |
---|
| 2758 | |
---|
| 2759 | <span class="codefrag">}</span> |
---|
| 2760 | </td> |
---|
| 2761 | |
---|
| 2762 | </tr> |
---|
| 2763 | |
---|
| 2764 | <tr> |
---|
| 2765 | |
---|
| 2766 | <td colspan="1" rowspan="1">76.</td> |
---|
| 2767 | <td colspan="1" rowspan="1"></td> |
---|
| 2768 | |
---|
| 2769 | </tr> |
---|
| 2770 | |
---|
| 2771 | <tr> |
---|
| 2772 | |
---|
| 2773 | <td colspan="1" rowspan="1">77.</td> |
---|
| 2774 | <td colspan="1" rowspan="1"> |
---|
| 2775 | |
---|
| 2776 | <span class="codefrag"> |
---|
| 2777 | public static class Reduce extends MapReduceBase implements |
---|
| 2778 | Reducer<Text, IntWritable, Text, IntWritable> { |
---|
| 2779 | </span> |
---|
| 2780 | </td> |
---|
| 2781 | |
---|
| 2782 | </tr> |
---|
| 2783 | |
---|
| 2784 | <tr> |
---|
| 2785 | |
---|
| 2786 | <td colspan="1" rowspan="1">78.</td> |
---|
| 2787 | <td colspan="1" rowspan="1"> |
---|
| 2788 | |
---|
| 2789 | <span class="codefrag"> |
---|
| 2790 | public void reduce(Text key, Iterator<IntWritable> values, |
---|
| 2791 | OutputCollector<Text, IntWritable> output, |
---|
| 2792 | Reporter reporter) throws IOException { |
---|
| 2793 | </span> |
---|
| 2794 | </td> |
---|
| 2795 | |
---|
| 2796 | </tr> |
---|
| 2797 | |
---|
| 2798 | <tr> |
---|
| 2799 | |
---|
| 2800 | <td colspan="1" rowspan="1">79.</td> |
---|
| 2801 | <td colspan="1" rowspan="1"> |
---|
| 2802 | |
---|
| 2803 | <span class="codefrag">int sum = 0;</span> |
---|
| 2804 | </td> |
---|
| 2805 | |
---|
| 2806 | </tr> |
---|
| 2807 | |
---|
| 2808 | <tr> |
---|
| 2809 | |
---|
| 2810 | <td colspan="1" rowspan="1">80.</td> |
---|
| 2811 | <td colspan="1" rowspan="1"> |
---|
| 2812 | |
---|
| 2813 | <span class="codefrag">while (values.hasNext()) {</span> |
---|
| 2814 | </td> |
---|
| 2815 | |
---|
| 2816 | </tr> |
---|
| 2817 | |
---|
| 2818 | <tr> |
---|
| 2819 | |
---|
| 2820 | <td colspan="1" rowspan="1">81.</td> |
---|
| 2821 | <td colspan="1" rowspan="1"> |
---|
| 2822 | |
---|
| 2823 | <span class="codefrag">sum += values.next().get();</span> |
---|
| 2824 | </td> |
---|
| 2825 | |
---|
| 2826 | </tr> |
---|
| 2827 | |
---|
| 2828 | <tr> |
---|
| 2829 | |
---|
| 2830 | <td colspan="1" rowspan="1">82.</td> |
---|
| 2831 | <td colspan="1" rowspan="1"> |
---|
| 2832 | |
---|
| 2833 | <span class="codefrag">}</span> |
---|
| 2834 | </td> |
---|
| 2835 | |
---|
| 2836 | </tr> |
---|
| 2837 | |
---|
| 2838 | <tr> |
---|
| 2839 | |
---|
| 2840 | <td colspan="1" rowspan="1">83.</td> |
---|
| 2841 | <td colspan="1" rowspan="1"> |
---|
| 2842 | |
---|
| 2843 | <span class="codefrag">output.collect(key, new IntWritable(sum));</span> |
---|
| 2844 | </td> |
---|
| 2845 | |
---|
| 2846 | </tr> |
---|
| 2847 | |
---|
| 2848 | <tr> |
---|
| 2849 | |
---|
| 2850 | <td colspan="1" rowspan="1">84.</td> |
---|
| 2851 | <td colspan="1" rowspan="1"> |
---|
| 2852 | |
---|
| 2853 | <span class="codefrag">}</span> |
---|
| 2854 | </td> |
---|
| 2855 | |
---|
| 2856 | </tr> |
---|
| 2857 | |
---|
| 2858 | <tr> |
---|
| 2859 | |
---|
| 2860 | <td colspan="1" rowspan="1">85.</td> |
---|
| 2861 | <td colspan="1" rowspan="1"> |
---|
| 2862 | |
---|
| 2863 | <span class="codefrag">}</span> |
---|
| 2864 | </td> |
---|
| 2865 | |
---|
| 2866 | </tr> |
---|
| 2867 | |
---|
| 2868 | <tr> |
---|
| 2869 | |
---|
| 2870 | <td colspan="1" rowspan="1">86.</td> |
---|
| 2871 | <td colspan="1" rowspan="1"></td> |
---|
| 2872 | |
---|
| 2873 | </tr> |
---|
| 2874 | |
---|
| 2875 | <tr> |
---|
| 2876 | |
---|
| 2877 | <td colspan="1" rowspan="1">87.</td> |
---|
| 2878 | <td colspan="1" rowspan="1"> |
---|
| 2879 | |
---|
| 2880 | <span class="codefrag">public int run(String[] args) throws Exception {</span> |
---|
| 2881 | </td> |
---|
| 2882 | |
---|
| 2883 | </tr> |
---|
| 2884 | |
---|
| 2885 | <tr> |
---|
| 2886 | |
---|
| 2887 | <td colspan="1" rowspan="1">88.</td> |
---|
| 2888 | <td colspan="1" rowspan="1"> |
---|
| 2889 | |
---|
| 2890 | <span class="codefrag"> |
---|
| 2891 | JobConf conf = new JobConf(getConf(), WordCount.class); |
---|
| 2892 | </span> |
---|
| 2893 | </td> |
---|
| 2894 | |
---|
| 2895 | </tr> |
---|
| 2896 | |
---|
| 2897 | <tr> |
---|
| 2898 | |
---|
| 2899 | <td colspan="1" rowspan="1">89.</td> |
---|
| 2900 | <td colspan="1" rowspan="1"> |
---|
| 2901 | |
---|
| 2902 | <span class="codefrag">conf.setJobName("wordcount");</span> |
---|
| 2903 | </td> |
---|
| 2904 | |
---|
| 2905 | </tr> |
---|
| 2906 | |
---|
| 2907 | <tr> |
---|
| 2908 | |
---|
| 2909 | <td colspan="1" rowspan="1">90.</td> |
---|
| 2910 | <td colspan="1" rowspan="1"></td> |
---|
| 2911 | |
---|
| 2912 | </tr> |
---|
| 2913 | |
---|
| 2914 | <tr> |
---|
| 2915 | |
---|
| 2916 | <td colspan="1" rowspan="1">91.</td> |
---|
| 2917 | <td colspan="1" rowspan="1"> |
---|
| 2918 | |
---|
| 2919 | <span class="codefrag">conf.setOutputKeyClass(Text.class);</span> |
---|
| 2920 | </td> |
---|
| 2921 | |
---|
| 2922 | </tr> |
---|
| 2923 | |
---|
| 2924 | <tr> |
---|
| 2925 | |
---|
| 2926 | <td colspan="1" rowspan="1">92.</td> |
---|
| 2927 | <td colspan="1" rowspan="1"> |
---|
| 2928 | |
---|
| 2929 | <span class="codefrag">conf.setOutputValueClass(IntWritable.class);</span> |
---|
| 2930 | </td> |
---|
| 2931 | |
---|
| 2932 | </tr> |
---|
| 2933 | |
---|
| 2934 | <tr> |
---|
| 2935 | |
---|
| 2936 | <td colspan="1" rowspan="1">93.</td> |
---|
| 2937 | <td colspan="1" rowspan="1"></td> |
---|
| 2938 | |
---|
| 2939 | </tr> |
---|
| 2940 | |
---|
| 2941 | <tr> |
---|
| 2942 | |
---|
| 2943 | <td colspan="1" rowspan="1">94.</td> |
---|
| 2944 | <td colspan="1" rowspan="1"> |
---|
| 2945 | |
---|
| 2946 | <span class="codefrag">conf.setMapperClass(Map.class);</span> |
---|
| 2947 | </td> |
---|
| 2948 | |
---|
| 2949 | </tr> |
---|
| 2950 | |
---|
| 2951 | <tr> |
---|
| 2952 | |
---|
| 2953 | <td colspan="1" rowspan="1">95.</td> |
---|
| 2954 | <td colspan="1" rowspan="1"> |
---|
| 2955 | |
---|
| 2956 | <span class="codefrag">conf.setCombinerClass(Reduce.class);</span> |
---|
| 2957 | </td> |
---|
| 2958 | |
---|
| 2959 | </tr> |
---|
| 2960 | |
---|
| 2961 | <tr> |
---|
| 2962 | |
---|
| 2963 | <td colspan="1" rowspan="1">96.</td> |
---|
| 2964 | <td colspan="1" rowspan="1"> |
---|
| 2965 | |
---|
| 2966 | <span class="codefrag">conf.setReducerClass(Reduce.class);</span> |
---|
| 2967 | </td> |
---|
| 2968 | |
---|
| 2969 | </tr> |
---|
| 2970 | |
---|
| 2971 | <tr> |
---|
| 2972 | |
---|
| 2973 | <td colspan="1" rowspan="1">97.</td> |
---|
| 2974 | <td colspan="1" rowspan="1"></td> |
---|
| 2975 | |
---|
| 2976 | </tr> |
---|
| 2977 | |
---|
| 2978 | <tr> |
---|
| 2979 | |
---|
| 2980 | <td colspan="1" rowspan="1">98.</td> |
---|
| 2981 | <td colspan="1" rowspan="1"> |
---|
| 2982 | |
---|
| 2983 | <span class="codefrag">conf.setInputFormat(TextInputFormat.class);</span> |
---|
| 2984 | </td> |
---|
| 2985 | |
---|
| 2986 | </tr> |
---|
| 2987 | |
---|
| 2988 | <tr> |
---|
| 2989 | |
---|
| 2990 | <td colspan="1" rowspan="1">99.</td> |
---|
| 2991 | <td colspan="1" rowspan="1"> |
---|
| 2992 | |
---|
| 2993 | <span class="codefrag">conf.setOutputFormat(TextOutputFormat.class);</span> |
---|
| 2994 | </td> |
---|
| 2995 | |
---|
| 2996 | </tr> |
---|
| 2997 | |
---|
| 2998 | <tr> |
---|
| 2999 | |
---|
| 3000 | <td colspan="1" rowspan="1">100.</td> |
---|
| 3001 | <td colspan="1" rowspan="1"></td> |
---|
| 3002 | |
---|
| 3003 | </tr> |
---|
| 3004 | |
---|
| 3005 | <tr> |
---|
| 3006 | |
---|
| 3007 | <td colspan="1" rowspan="1">101.</td> |
---|
| 3008 | <td colspan="1" rowspan="1"> |
---|
| 3009 | |
---|
| 3010 | <span class="codefrag"> |
---|
| 3011 | List<String> other_args = new ArrayList<String>(); |
---|
| 3012 | </span> |
---|
| 3013 | </td> |
---|
| 3014 | |
---|
| 3015 | </tr> |
---|
| 3016 | |
---|
| 3017 | <tr> |
---|
| 3018 | |
---|
| 3019 | <td colspan="1" rowspan="1">102.</td> |
---|
| 3020 | <td colspan="1" rowspan="1"> |
---|
| 3021 | |
---|
| 3022 | <span class="codefrag">for (int i=0; i < args.length; ++i) {</span> |
---|
| 3023 | </td> |
---|
| 3024 | |
---|
| 3025 | </tr> |
---|
| 3026 | |
---|
| 3027 | <tr> |
---|
| 3028 | |
---|
| 3029 | <td colspan="1" rowspan="1">103.</td> |
---|
| 3030 | <td colspan="1" rowspan="1"> |
---|
| 3031 | |
---|
| 3032 | <span class="codefrag">if ("-skip".equals(args[i])) {</span> |
---|
| 3033 | </td> |
---|
| 3034 | |
---|
| 3035 | </tr> |
---|
| 3036 | |
---|
| 3037 | <tr> |
---|
| 3038 | |
---|
| 3039 | <td colspan="1" rowspan="1">104.</td> |
---|
| 3040 | <td colspan="1" rowspan="1"> |
---|
| 3041 | |
---|
| 3042 | <span class="codefrag"> |
---|
| 3043 | DistributedCache.addCacheFile(new Path(args[++i]).toUri(), conf); |
---|
| 3044 | </span> |
---|
| 3045 | </td> |
---|
| 3046 | |
---|
| 3047 | </tr> |
---|
| 3048 | |
---|
| 3049 | <tr> |
---|
| 3050 | |
---|
| 3051 | <td colspan="1" rowspan="1">105.</td> |
---|
| 3052 | <td colspan="1" rowspan="1"> |
---|
| 3053 | |
---|
| 3054 | <span class="codefrag"> |
---|
| 3055 | conf.setBoolean("wordcount.skip.patterns", true); |
---|
| 3056 | </span> |
---|
| 3057 | </td> |
---|
| 3058 | |
---|
| 3059 | </tr> |
---|
| 3060 | |
---|
| 3061 | <tr> |
---|
| 3062 | |
---|
| 3063 | <td colspan="1" rowspan="1">106.</td> |
---|
| 3064 | <td colspan="1" rowspan="1"> |
---|
| 3065 | |
---|
| 3066 | <span class="codefrag">} else {</span> |
---|
| 3067 | </td> |
---|
| 3068 | |
---|
| 3069 | </tr> |
---|
| 3070 | |
---|
| 3071 | <tr> |
---|
| 3072 | |
---|
| 3073 | <td colspan="1" rowspan="1">107.</td> |
---|
| 3074 | <td colspan="1" rowspan="1"> |
---|
| 3075 | |
---|
| 3076 | <span class="codefrag">other_args.add(args[i]);</span> |
---|
| 3077 | </td> |
---|
| 3078 | |
---|
| 3079 | </tr> |
---|
| 3080 | |
---|
| 3081 | <tr> |
---|
| 3082 | |
---|
| 3083 | <td colspan="1" rowspan="1">108.</td> |
---|
| 3084 | <td colspan="1" rowspan="1"> |
---|
| 3085 | |
---|
| 3086 | <span class="codefrag">}</span> |
---|
| 3087 | </td> |
---|
| 3088 | |
---|
| 3089 | </tr> |
---|
| 3090 | |
---|
| 3091 | <tr> |
---|
| 3092 | |
---|
| 3093 | <td colspan="1" rowspan="1">109.</td> |
---|
| 3094 | <td colspan="1" rowspan="1"> |
---|
| 3095 | |
---|
| 3096 | <span class="codefrag">}</span> |
---|
| 3097 | </td> |
---|
| 3098 | |
---|
| 3099 | </tr> |
---|
| 3100 | |
---|
| 3101 | <tr> |
---|
| 3102 | |
---|
| 3103 | <td colspan="1" rowspan="1">110.</td> |
---|
| 3104 | <td colspan="1" rowspan="1"></td> |
---|
| 3105 | |
---|
| 3106 | </tr> |
---|
| 3107 | |
---|
| 3108 | <tr> |
---|
| 3109 | |
---|
| 3110 | <td colspan="1" rowspan="1">111.</td> |
---|
| 3111 | <td colspan="1" rowspan="1"> |
---|
| 3112 | |
---|
| 3113 | <span class="codefrag">FileInputFormat.setInputPaths(conf, new Path(other_args.get(0)));</span> |
---|
| 3114 | </td> |
---|
| 3115 | |
---|
| 3116 | </tr> |
---|
| 3117 | |
---|
| 3118 | <tr> |
---|
| 3119 | |
---|
| 3120 | <td colspan="1" rowspan="1">112.</td> |
---|
| 3121 | <td colspan="1" rowspan="1"> |
---|
| 3122 | |
---|
| 3123 | <span class="codefrag">FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));</span> |
---|
| 3124 | </td> |
---|
| 3125 | |
---|
| 3126 | </tr> |
---|
| 3127 | |
---|
| 3128 | <tr> |
---|
| 3129 | |
---|
| 3130 | <td colspan="1" rowspan="1">113.</td> |
---|
| 3131 | <td colspan="1" rowspan="1"></td> |
---|
| 3132 | |
---|
| 3133 | </tr> |
---|
| 3134 | |
---|
| 3135 | <tr> |
---|
| 3136 | |
---|
| 3137 | <td colspan="1" rowspan="1">114.</td> |
---|
| 3138 | <td colspan="1" rowspan="1"> |
---|
| 3139 | |
---|
| 3140 | <span class="codefrag">JobClient.runJob(conf);</span> |
---|
| 3141 | </td> |
---|
| 3142 | |
---|
| 3143 | </tr> |
---|
| 3144 | |
---|
| 3145 | <tr> |
---|
| 3146 | |
---|
| 3147 | <td colspan="1" rowspan="1">115.</td> |
---|
| 3148 | <td colspan="1" rowspan="1"> |
---|
| 3149 | |
---|
| 3150 | <span class="codefrag">return 0;</span> |
---|
| 3151 | </td> |
---|
| 3152 | |
---|
| 3153 | </tr> |
---|
| 3154 | |
---|
| 3155 | <tr> |
---|
| 3156 | |
---|
| 3157 | <td colspan="1" rowspan="1">116.</td> |
---|
| 3158 | <td colspan="1" rowspan="1"> |
---|
| 3159 | |
---|
| 3160 | <span class="codefrag">}</span> |
---|
| 3161 | </td> |
---|
| 3162 | |
---|
| 3163 | </tr> |
---|
| 3164 | |
---|
| 3165 | <tr> |
---|
| 3166 | |
---|
| 3167 | <td colspan="1" rowspan="1">117.</td> |
---|
| 3168 | <td colspan="1" rowspan="1"></td> |
---|
| 3169 | |
---|
| 3170 | </tr> |
---|
| 3171 | |
---|
| 3172 | <tr> |
---|
| 3173 | |
---|
| 3174 | <td colspan="1" rowspan="1">118.</td> |
---|
| 3175 | <td colspan="1" rowspan="1"> |
---|
| 3176 | |
---|
| 3177 | <span class="codefrag"> |
---|
| 3178 | public static void main(String[] args) throws Exception { |
---|
| 3179 | </span> |
---|
| 3180 | </td> |
---|
| 3181 | |
---|
| 3182 | </tr> |
---|
| 3183 | |
---|
| 3184 | <tr> |
---|
| 3185 | |
---|
| 3186 | <td colspan="1" rowspan="1">119.</td> |
---|
| 3187 | <td colspan="1" rowspan="1"> |
---|
| 3188 | |
---|
| 3189 | <span class="codefrag"> |
---|
| 3190 | int res = ToolRunner.run(new Configuration(), new WordCount(), |
---|
| 3191 | args); |
---|
| 3192 | </span> |
---|
| 3193 | </td> |
---|
| 3194 | |
---|
| 3195 | </tr> |
---|
| 3196 | |
---|
| 3197 | <tr> |
---|
| 3198 | |
---|
| 3199 | <td colspan="1" rowspan="1">120.</td> |
---|
| 3200 | <td colspan="1" rowspan="1"> |
---|
| 3201 | |
---|
| 3202 | <span class="codefrag">System.exit(res);</span> |
---|
| 3203 | </td> |
---|
| 3204 | |
---|
| 3205 | </tr> |
---|
| 3206 | |
---|
| 3207 | <tr> |
---|
| 3208 | |
---|
| 3209 | <td colspan="1" rowspan="1">121.</td> |
---|
| 3210 | <td colspan="1" rowspan="1"> |
---|
| 3211 | |
---|
| 3212 | <span class="codefrag">}</span> |
---|
| 3213 | </td> |
---|
| 3214 | |
---|
| 3215 | </tr> |
---|
| 3216 | |
---|
| 3217 | <tr> |
---|
| 3218 | |
---|
| 3219 | <td colspan="1" rowspan="1">122.</td> |
---|
| 3220 | <td colspan="1" rowspan="1"> |
---|
| 3221 | <span class="codefrag">}</span> |
---|
| 3222 | </td> |
---|
| 3223 | |
---|
| 3224 | </tr> |
---|
| 3225 | |
---|
| 3226 | <tr> |
---|
| 3227 | |
---|
| 3228 | <td colspan="1" rowspan="1">123.</td> |
---|
| 3229 | <td colspan="1" rowspan="1"></td> |
---|
| 3230 | |
---|
| 3231 | </tr> |
---|
| 3232 | |
---|
| 3233 | </table> |
---|
| 3234 | <a name="N11522"></a><a name="%E8%BF%90%E8%A1%8C%E6%A0%B7%E4%BE%8B"></a> |
---|
| 3235 | <h3 class="h4">è¿è¡æ ·äŸ</h3> |
---|
| 3236 | <p>èŸå
¥æ ·äŸïŒ</p> |
---|
| 3237 | <p> |
---|
| 3238 | |
---|
| 3239 | <span class="codefrag">$ bin/hadoop dfs -ls /usr/joe/wordcount/input/</span> |
---|
| 3240 | <br> |
---|
| 3241 | |
---|
| 3242 | <span class="codefrag">/usr/joe/wordcount/input/file01</span> |
---|
| 3243 | <br> |
---|
| 3244 | |
---|
| 3245 | <span class="codefrag">/usr/joe/wordcount/input/file02</span> |
---|
| 3246 | <br> |
---|
| 3247 | |
---|
| 3248 | <br> |
---|
| 3249 | |
---|
| 3250 | <span class="codefrag">$ bin/hadoop dfs -cat /usr/joe/wordcount/input/file01</span> |
---|
| 3251 | <br> |
---|
| 3252 | |
---|
| 3253 | <span class="codefrag">Hello World, Bye World!</span> |
---|
| 3254 | <br> |
---|
| 3255 | |
---|
| 3256 | <br> |
---|
| 3257 | |
---|
| 3258 | <span class="codefrag">$ bin/hadoop dfs -cat /usr/joe/wordcount/input/file02</span> |
---|
| 3259 | <br> |
---|
| 3260 | |
---|
| 3261 | <span class="codefrag">Hello Hadoop, Goodbye to hadoop.</span> |
---|
| 3262 | |
---|
| 3263 | </p> |
---|
| 3264 | <p>è¿è¡çšåºïŒ</p> |
---|
| 3265 | <p> |
---|
| 3266 | |
---|
| 3267 | <span class="codefrag"> |
---|
| 3268 | $ bin/hadoop jar /usr/joe/wordcount.jar org.myorg.WordCount |
---|
| 3269 | /usr/joe/wordcount/input /usr/joe/wordcount/output |
---|
| 3270 | </span> |
---|
| 3271 | |
---|
| 3272 | </p> |
---|
| 3273 | <p>èŸåºïŒ</p> |
---|
| 3274 | <p> |
---|
| 3275 | |
---|
| 3276 | <span class="codefrag"> |
---|
| 3277 | $ bin/hadoop dfs -cat /usr/joe/wordcount/output/part-00000 |
---|
| 3278 | </span> |
---|
| 3279 | |
---|
| 3280 | <br> |
---|
| 3281 | |
---|
| 3282 | <span class="codefrag">Bye 1</span> |
---|
| 3283 | <br> |
---|
| 3284 | |
---|
| 3285 | <span class="codefrag">Goodbye 1</span> |
---|
| 3286 | <br> |
---|
| 3287 | |
---|
| 3288 | <span class="codefrag">Hadoop, 1</span> |
---|
| 3289 | <br> |
---|
| 3290 | |
---|
| 3291 | <span class="codefrag">Hello 2</span> |
---|
| 3292 | <br> |
---|
| 3293 | |
---|
| 3294 | <span class="codefrag">World! 1</span> |
---|
| 3295 | <br> |
---|
| 3296 | |
---|
| 3297 | <span class="codefrag">World, 1</span> |
---|
| 3298 | <br> |
---|
| 3299 | |
---|
| 3300 | <span class="codefrag">hadoop. 1</span> |
---|
| 3301 | <br> |
---|
| 3302 | |
---|
| 3303 | <span class="codefrag">to 1</span> |
---|
| 3304 | <br> |
---|
| 3305 | |
---|
| 3306 | </p> |
---|
| 3307 | <p>泚ææ€æ¶çèŸå
¥äžç¬¬äžäžªçæ¬çäžåïŒèŸåºçç»æä¹æäžåã |
---|
| 3308 | </p> |
---|
| 3309 | <p>ç°åšéè¿<span class="codefrag">DistributedCache</span>æå
¥äžäžªæš¡åŒæ件ïŒæ件äžä¿åäºèŠè¢«å¿œç¥çåè¯æš¡åŒã |
---|
| 3310 | </p> |
---|
| 3311 | <p> |
---|
| 3312 | |
---|
| 3313 | <span class="codefrag">$ hadoop dfs -cat /user/joe/wordcount/patterns.txt</span> |
---|
| 3314 | <br> |
---|
| 3315 | |
---|
| 3316 | <span class="codefrag">\.</span> |
---|
| 3317 | <br> |
---|
| 3318 | |
---|
| 3319 | <span class="codefrag">\,</span> |
---|
| 3320 | <br> |
---|
| 3321 | |
---|
| 3322 | <span class="codefrag">\!</span> |
---|
| 3323 | <br> |
---|
| 3324 | |
---|
| 3325 | <span class="codefrag">to</span> |
---|
| 3326 | <br> |
---|
| 3327 | |
---|
| 3328 | </p> |
---|
| 3329 | <p>åè¿è¡äžæ¬¡ïŒè¿æ¬¡äœ¿çšæŽå€çé项ïŒ</p> |
---|
| 3330 | <p> |
---|
| 3331 | |
---|
| 3332 | <span class="codefrag"> |
---|
| 3333 | $ bin/hadoop jar /usr/joe/wordcount.jar org.myorg.WordCount |
---|
| 3334 | -Dwordcount.case.sensitive=true /usr/joe/wordcount/input |
---|
| 3335 | /usr/joe/wordcount/output -skip /user/joe/wordcount/patterns.txt |
---|
| 3336 | </span> |
---|
| 3337 | |
---|
| 3338 | </p> |
---|
| 3339 | <p>åºè¯¥åŸå°è¿æ ·çèŸåºïŒ</p> |
---|
| 3340 | <p> |
---|
| 3341 | |
---|
| 3342 | <span class="codefrag"> |
---|
| 3343 | $ bin/hadoop dfs -cat /usr/joe/wordcount/output/part-00000 |
---|
| 3344 | </span> |
---|
| 3345 | |
---|
| 3346 | <br> |
---|
| 3347 | |
---|
| 3348 | <span class="codefrag">Bye 1</span> |
---|
| 3349 | <br> |
---|
| 3350 | |
---|
| 3351 | <span class="codefrag">Goodbye 1</span> |
---|
| 3352 | <br> |
---|
| 3353 | |
---|
| 3354 | <span class="codefrag">Hadoop 1</span> |
---|
| 3355 | <br> |
---|
| 3356 | |
---|
| 3357 | <span class="codefrag">Hello 2</span> |
---|
| 3358 | <br> |
---|
| 3359 | |
---|
| 3360 | <span class="codefrag">World 2</span> |
---|
| 3361 | <br> |
---|
| 3362 | |
---|
| 3363 | <span class="codefrag">hadoop 1</span> |
---|
| 3364 | <br> |
---|
| 3365 | |
---|
| 3366 | </p> |
---|
| 3367 | <p>åè¿è¡äžæ¬¡ïŒè¿äžæ¬¡å
³é倧å°åæææ§ïŒcase-sensitivityïŒïŒ</p> |
---|
| 3368 | <p> |
---|
| 3369 | |
---|
| 3370 | <span class="codefrag"> |
---|
| 3371 | $ bin/hadoop jar /usr/joe/wordcount.jar org.myorg.WordCount |
---|
| 3372 | -Dwordcount.case.sensitive=false /usr/joe/wordcount/input |
---|
| 3373 | /usr/joe/wordcount/output -skip /user/joe/wordcount/patterns.txt |
---|
| 3374 | </span> |
---|
| 3375 | |
---|
| 3376 | </p> |
---|
| 3377 | <p>èŸåºïŒ</p> |
---|
| 3378 | <p> |
---|
| 3379 | |
---|
| 3380 | <span class="codefrag"> |
---|
| 3381 | $ bin/hadoop dfs -cat /usr/joe/wordcount/output/part-00000 |
---|
| 3382 | </span> |
---|
| 3383 | |
---|
| 3384 | <br> |
---|
| 3385 | |
---|
| 3386 | <span class="codefrag">bye 1</span> |
---|
| 3387 | <br> |
---|
| 3388 | |
---|
| 3389 | <span class="codefrag">goodbye 1</span> |
---|
| 3390 | <br> |
---|
| 3391 | |
---|
| 3392 | <span class="codefrag">hadoop 2</span> |
---|
| 3393 | <br> |
---|
| 3394 | |
---|
| 3395 | <span class="codefrag">hello 2</span> |
---|
| 3396 | <br> |
---|
| 3397 | |
---|
| 3398 | <span class="codefrag">world 2</span> |
---|
| 3399 | <br> |
---|
| 3400 | |
---|
| 3401 | </p> |
---|
| 3402 | <a name="N115F6"></a><a name="%E7%A8%8B%E5%BA%8F%E8%A6%81%E7%82%B9"></a> |
---|
| 3403 | <h3 class="h4">çšåºèŠç¹</h3> |
---|
| 3404 | <p> |
---|
| 3405 | éè¿äœ¿çšäžäºMap/Reduceæ¡æ¶æäŸçåèœïŒ<span class="codefrag">WordCount</span>ç第äºäžªçæ¬åšåå§çæ¬åºç¡äžæäºåŠäžçæ¹è¿ïŒ |
---|
| 3406 | </p> |
---|
| 3407 | <ul> |
---|
| 3408 | |
---|
| 3409 | <li> |
---|
| 3410 | å±ç€ºäºåºçšçšåºåŠäœåš<span class="codefrag">Mapper</span> (å<span class="codefrag">Reducer</span>)äžéè¿<span class="codefrag">configure</span>æ¹æ³ |
---|
| 3411 | ä¿®æ¹é
眮åæ°(28-43è¡)ã |
---|
| 3412 | </li> |
---|
| 3413 | |
---|
| 3414 | <li> |
---|
| 3415 | å±ç€ºäºäœäžåŠäœäœ¿çš<span class="codefrag">DistributedCache</span> æ¥åååªè¯»æ°æ®ã |
---|
| 3416 | è¿éå
讞çšæ·æå®åè¯çæš¡åŒïŒåšè®¡æ°æ¶å¿œç¥é£äºç¬Šåæš¡åŒçåè¯(104è¡)ã |
---|
| 3417 | </li> |
---|
| 3418 | |
---|
| 3419 | <li> |
---|
| 3420 | å±ç€º<span class="codefrag">Tool</span>æ¥å£å<span class="codefrag">GenericOptionsParser</span>å€çHadoopåœä»€è¡é项çåèœ |
---|
| 3421 | (87-116, 119è¡)ã |
---|
| 3422 | </li> |
---|
| 3423 | |
---|
| 3424 | <li> |
---|
| 3425 | å±ç€ºäºåºçšçšåºåŠäœäœ¿çš<span class="codefrag">Counters</span>(68è¡)ïŒåŠäœéè¿äŒ éç»<span class="codefrag">map</span>ïŒå<span class="codefrag">reduce</span>ïŒ |
---|
| 3426 | æ¹æ³ç<span class="codefrag">Reporter</span>å®äŸæ¥è®Ÿçœ®åºçšçšåºçç¶æä¿¡æ¯(72è¡)ã |
---|
| 3427 | </li> |
---|
| 3428 | |
---|
| 3429 | </ul> |
---|
| 3430 | </div> |
---|
| 3431 | |
---|
| 3432 | |
---|
| 3433 | <p> |
---|
| 3434 | |
---|
| 3435 | <em>JavaåJNIæ¯Sun Microsystems, Inc.åšçŸåœåå
¶å®åœå®¶ç泚ååæ ã</em> |
---|
| 3436 | |
---|
| 3437 | </p> |
---|
| 3438 | |
---|
| 3439 | |
---|
| 3440 | </div> |
---|
| 3441 | <!--+ |
---|
| 3442 | |end content |
---|
| 3443 | +--> |
---|
| 3444 | <div class="clearboth"> </div> |
---|
| 3445 | </div> |
---|
| 3446 | <div id="footer"> |
---|
| 3447 | <!--+ |
---|
| 3448 | |start bottomstrip |
---|
| 3449 | +--> |
---|
| 3450 | <div class="lastmodified"> |
---|
| 3451 | <script type="text/javascript"><!-- |
---|
| 3452 | document.write("Last Published: " + document.lastModified); |
---|
| 3453 | // --></script> |
---|
| 3454 | </div> |
---|
| 3455 | <div class="copyright"> |
---|
| 3456 | Copyright © |
---|
| 3457 | 2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a> |
---|
| 3458 | </div> |
---|
| 3459 | <!--+ |
---|
| 3460 | |end bottomstrip |
---|
| 3461 | +--> |
---|
| 3462 | </div> |
---|
| 3463 | </body> |
---|
| 3464 | </html> |
---|