[120] | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> |
---|
| 2 | <html> |
---|
| 3 | <head> |
---|
| 4 | <META http-equiv="Content-Type" content="text/html; charset=UTF-8"> |
---|
| 5 | <meta content="Apache Forrest" name="Generator"> |
---|
| 6 | <meta name="Forrest-version" content="0.8"> |
---|
| 7 | <meta name="Forrest-skin-name" content="pelt"> |
---|
| 8 | <meta name="http-equiv" content="Content-Type"> |
---|
| 9 | <meta name="content" content="text/html;"> |
---|
| 10 | <meta name="charset" content="utf-8"> |
---|
| 11 | <title>Hadoop Streaming</title> |
---|
| 12 | <link type="text/css" href="skin/basic.css" rel="stylesheet"> |
---|
| 13 | <link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet"> |
---|
| 14 | <link media="print" type="text/css" href="skin/print.css" rel="stylesheet"> |
---|
| 15 | <link type="text/css" href="skin/profile.css" rel="stylesheet"> |
---|
| 16 | <script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script> |
---|
| 17 | <link rel="shortcut icon" href="images/favicon.ico"> |
---|
| 18 | </head> |
---|
| 19 | <body onload="init()"> |
---|
| 20 | <script type="text/javascript">ndeSetTextSize();</script> |
---|
| 21 | <div id="top"> |
---|
| 22 | <!--+ |
---|
| 23 | |breadtrail |
---|
| 24 | +--> |
---|
| 25 | <div class="breadtrail"> |
---|
| 26 | <a href="http://www.apache.org/">Apache</a> > <a href="http://hadoop.apache.org/">Hadoop</a> > <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script> |
---|
| 27 | </div> |
---|
| 28 | <!--+ |
---|
| 29 | |header |
---|
| 30 | +--> |
---|
| 31 | <div class="header"> |
---|
| 32 | <!--+ |
---|
| 33 | |start group logo |
---|
| 34 | +--> |
---|
| 35 | <div class="grouplogo"> |
---|
| 36 | <a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a> |
---|
| 37 | </div> |
---|
| 38 | <!--+ |
---|
| 39 | |end group logo |
---|
| 40 | +--> |
---|
| 41 | <!--+ |
---|
| 42 | |start Project Logo |
---|
| 43 | +--> |
---|
| 44 | <div class="projectlogo"> |
---|
| 45 | <a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.gif" title="Scalable Computing Platform"></a> |
---|
| 46 | </div> |
---|
| 47 | <!--+ |
---|
| 48 | |end Project Logo |
---|
| 49 | +--> |
---|
| 50 | <!--+ |
---|
| 51 | |start Search |
---|
| 52 | +--> |
---|
| 53 | <div class="searchbox"> |
---|
| 54 | <form action="http://www.google.com/search" method="get" class="roundtopsmall"> |
---|
| 55 | <input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google"> |
---|
| 56 | <input name="Search" value="Search" type="submit"> |
---|
| 57 | </form> |
---|
| 58 | </div> |
---|
| 59 | <!--+ |
---|
| 60 | |end search |
---|
| 61 | +--> |
---|
| 62 | <!--+ |
---|
| 63 | |start Tabs |
---|
| 64 | +--> |
---|
| 65 | <ul id="tabs"> |
---|
| 66 | <li> |
---|
| 67 | <a class="unselected" href="http://hadoop.apache.org/core/">项ç®</a> |
---|
| 68 | </li> |
---|
| 69 | <li> |
---|
| 70 | <a class="unselected" href="http://wiki.apache.org/hadoop">绎åº</a> |
---|
| 71 | </li> |
---|
| 72 | <li class="current"> |
---|
| 73 | <a class="selected" href="index.html">Hadoop 0.18ææ¡£</a> |
---|
| 74 | </li> |
---|
| 75 | </ul> |
---|
| 76 | <!--+ |
---|
| 77 | |end Tabs |
---|
| 78 | +--> |
---|
| 79 | </div> |
---|
| 80 | </div> |
---|
| 81 | <div id="main"> |
---|
| 82 | <div id="publishedStrip"> |
---|
| 83 | <!--+ |
---|
| 84 | |start Subtabs |
---|
| 85 | +--> |
---|
| 86 | <div id="level2tabs"></div> |
---|
| 87 | <!--+ |
---|
| 88 | |end Endtabs |
---|
| 89 | +--> |
---|
| 90 | <script type="text/javascript"><!-- |
---|
| 91 | document.write("Last Published: " + document.lastModified); |
---|
| 92 | // --></script> |
---|
| 93 | </div> |
---|
| 94 | <!--+ |
---|
| 95 | |breadtrail |
---|
| 96 | +--> |
---|
| 97 | <div class="breadtrail"> |
---|
| 98 | |
---|
| 99 | |
---|
| 100 | </div> |
---|
| 101 | <!--+ |
---|
| 102 | |start Menu, mainarea |
---|
| 103 | +--> |
---|
| 104 | <!--+ |
---|
| 105 | |start Menu |
---|
| 106 | +--> |
---|
| 107 | <div id="menu"> |
---|
| 108 | <div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">ææ¡£</div> |
---|
| 109 | <div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;"> |
---|
| 110 | <div class="menuitem"> |
---|
| 111 | <a href="index.html">æŠè¿°</a> |
---|
| 112 | </div> |
---|
| 113 | <div class="menuitem"> |
---|
| 114 | <a href="quickstart.html">å¿«éå
¥éš</a> |
---|
| 115 | </div> |
---|
| 116 | <div class="menuitem"> |
---|
| 117 | <a href="cluster_setup.html">é矀æ建</a> |
---|
| 118 | </div> |
---|
| 119 | <div class="menuitem"> |
---|
| 120 | <a href="hdfs_design.html">HDFSææ¶è®Ÿè®¡</a> |
---|
| 121 | </div> |
---|
| 122 | <div class="menuitem"> |
---|
| 123 | <a href="hdfs_user_guide.html">HDFS䜿çšæå</a> |
---|
| 124 | </div> |
---|
| 125 | <div class="menuitem"> |
---|
| 126 | <a href="hdfs_permissions_guide.html">HDFSæéæå</a> |
---|
| 127 | </div> |
---|
| 128 | <div class="menuitem"> |
---|
| 129 | <a href="hdfs_quota_admin_guide.html">HDFSé
é¢ç®¡çæå</a> |
---|
| 130 | </div> |
---|
| 131 | <div class="menuitem"> |
---|
| 132 | <a href="commands_manual.html">åœä»€æå</a> |
---|
| 133 | </div> |
---|
| 134 | <div class="menuitem"> |
---|
| 135 | <a href="hdfs_shell.html">FS Shell䜿çšæå</a> |
---|
| 136 | </div> |
---|
| 137 | <div class="menuitem"> |
---|
| 138 | <a href="distcp.html">DistCp䜿çšæå</a> |
---|
| 139 | </div> |
---|
| 140 | <div class="menuitem"> |
---|
| 141 | <a href="mapred_tutorial.html">Map-Reduceæçš</a> |
---|
| 142 | </div> |
---|
| 143 | <div class="menuitem"> |
---|
| 144 | <a href="native_libraries.html">Hadoopæ¬å°åº</a> |
---|
| 145 | </div> |
---|
| 146 | <div class="menupage"> |
---|
| 147 | <div class="menupagetitle">Streaming</div> |
---|
| 148 | </div> |
---|
| 149 | <div class="menuitem"> |
---|
| 150 | <a href="hadoop_archives.html">Hadoop Archives</a> |
---|
| 151 | </div> |
---|
| 152 | <div class="menuitem"> |
---|
| 153 | <a href="hod.html">Hadoop On Demand</a> |
---|
| 154 | </div> |
---|
| 155 | <div class="menuitem"> |
---|
| 156 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/index.html">APIåè</a> |
---|
| 157 | </div> |
---|
| 158 | <div class="menuitem"> |
---|
| 159 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/jdiff/changes.html">API Changes</a> |
---|
| 160 | </div> |
---|
| 161 | <div class="menuitem"> |
---|
| 162 | <a href="http://wiki.apache.org/hadoop/">绎åº</a> |
---|
| 163 | </div> |
---|
| 164 | <div class="menuitem"> |
---|
| 165 | <a href="http://wiki.apache.org/hadoop/FAQ">åžžè§é®é¢</a> |
---|
| 166 | </div> |
---|
| 167 | <div class="menuitem"> |
---|
| 168 | <a href="http://hadoop.apache.org/core/mailing_lists.html">é®ä»¶åè¡š</a> |
---|
| 169 | </div> |
---|
| 170 | <div class="menuitem"> |
---|
| 171 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/releasenotes.html">åè¡è¯Žæ</a> |
---|
| 172 | </div> |
---|
| 173 | <div class="menuitem"> |
---|
| 174 | <a href="http://hadoop.apache.org/core/docs/r0.18.2/changes.html">åæŽæ¥å¿</a> |
---|
| 175 | </div> |
---|
| 176 | </div> |
---|
| 177 | <div id="credit"></div> |
---|
| 178 | <div id="roundbottom"> |
---|
| 179 | <img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div> |
---|
| 180 | <!--+ |
---|
| 181 | |alternative credits |
---|
| 182 | +--> |
---|
| 183 | <div id="credit2"></div> |
---|
| 184 | </div> |
---|
| 185 | <!--+ |
---|
| 186 | |end Menu |
---|
| 187 | +--> |
---|
| 188 | <!--+ |
---|
| 189 | |start content |
---|
| 190 | +--> |
---|
| 191 | <div id="content"> |
---|
| 192 | <div title="Portable Document Format" class="pdflink"> |
---|
| 193 | <a class="dida" href="streaming.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br> |
---|
| 194 | PDF</a> |
---|
| 195 | </div> |
---|
| 196 | <h1>Hadoop Streaming</h1> |
---|
| 197 | <div id="minitoc-area"> |
---|
| 198 | <ul class="minitoc"> |
---|
| 199 | <li> |
---|
| 200 | <a href="#Hadoop+Streaming">Hadoop Streaming</a> |
---|
| 201 | </li> |
---|
| 202 | <li> |
---|
| 203 | <a href="#Streaming%E5%B7%A5%E4%BD%9C%E5%8E%9F%E7%90%86">Streamingå·¥äœåç</a> |
---|
| 204 | </li> |
---|
| 205 | <li> |
---|
| 206 | <a href="#%E5%B0%86%E6%96%87%E4%BB%B6%E6%89%93%E5%8C%85%E5%88%B0%E6%8F%90%E4%BA%A4%E7%9A%84%E4%BD%9C%E4%B8%9A%E4%B8%AD">å°æ件æå
å°æ亀çäœäžäž</a> |
---|
| 207 | </li> |
---|
| 208 | <li> |
---|
| 209 | <a href="#Streaming%E9%80%89%E9%A1%B9%E4%B8%8E%E7%94%A8%E6%B3%95">Streamingé项äžçšæ³</a> |
---|
| 210 | <ul class="minitoc"> |
---|
| 211 | <li> |
---|
| 212 | <a href="#%E5%8F%AA%E4%BD%BF%E7%94%A8Mapper%E7%9A%84%E4%BD%9C%E4%B8%9A">åªäœ¿çšMapperçäœäž</a> |
---|
| 213 | </li> |
---|
| 214 | <li> |
---|
| 215 | <a href="#%E4%B8%BA%E4%BD%9C%E4%B8%9A%E6%8C%87%E5%AE%9A%E5%85%B6%E4%BB%96%E6%8F%92%E4%BB%B6">䞺äœäžæå®å
¶ä»æ件</a> |
---|
| 216 | </li> |
---|
| 217 | <li> |
---|
| 218 | <a href="#Hadoop+Streaming%E4%B8%AD%E7%9A%84%E5%A4%A7%E6%96%87%E4%BB%B6%E5%92%8C%E6%A1%A3%E6%A1%88">Hadoop Streamingäžç倧æ件åæ¡£æ¡</a> |
---|
| 219 | </li> |
---|
| 220 | <li> |
---|
| 221 | <a href="#%E4%B8%BA%E4%BD%9C%E4%B8%9A%E6%8C%87%E5%AE%9A%E9%99%84%E5%8A%A0%E9%85%8D%E7%BD%AE%E5%8F%82%E6%95%B0">䞺äœäžæå®éå é
眮åæ°</a> |
---|
| 222 | </li> |
---|
| 223 | <li> |
---|
| 224 | <a href="#%E5%85%B6%E4%BB%96%E9%80%89%E9%A1%B9">å
¶ä»é项</a> |
---|
| 225 | </li> |
---|
| 226 | </ul> |
---|
| 227 | </li> |
---|
| 228 | <li> |
---|
| 229 | <a href="#%E5%85%B6%E4%BB%96%E4%BE%8B%E5%AD%90">å
¶ä»äŸå</a> |
---|
| 230 | <ul class="minitoc"> |
---|
| 231 | <li> |
---|
| 232 | <a href="#%E4%BD%BF%E7%94%A8%E8%87%AA%E5%AE%9A%E4%B9%89%E7%9A%84%E6%96%B9%E6%B3%95%E5%88%87%E5%88%86%E8%A1%8C%E6%9D%A5%E5%BD%A2%E6%88%90Key%2FValue%E5%AF%B9">䜿çšèªå®ä¹çæ¹æ³ååè¡æ¥åœ¢æKey/Value对</a> |
---|
| 233 | </li> |
---|
| 234 | <li> |
---|
| 235 | <a href="#%E4%B8%80%E4%B8%AA%E5%AE%9E%E7%94%A8%E7%9A%84Partitioner%E7%B1%BB">äžäžªå®çšçPartitionerç±» ïŒäºæ¬¡æåºïŒ-partitioner org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner éé¡¹ïŒ </a> |
---|
| 236 | </li> |
---|
| 237 | <li> |
---|
| 238 | <a href="#Hadoop%E8%81%9A%E5%90%88%E5%8A%9F%E8%83%BD%E5%8C%85%E7%9A%84%E4%BD%BF%E7%94%A8%EF%BC%88-reduce+aggregate+%E9%80%89%E9%A1%B9%EF%BC%89">Hadoopèååèœå
ç䜿çšïŒ-reduce aggregate é项ïŒ</a> |
---|
| 239 | </li> |
---|
| 240 | <li> |
---|
| 241 | <a href="#%E5%AD%97%E6%AE%B5%E7%9A%84%E9%80%89%E5%8F%96%EF%BC%88%E7%B1%BB%E4%BC%BC%E4%BA%8Eunix%E4%B8%AD%E7%9A%84+%27cut%27+%E5%91%BD%E4%BB%A4%EF%BC%89">å段çéåïŒç±»äŒŒäºunixäžç 'cut' åœä»€ïŒ </a> |
---|
| 242 | </li> |
---|
| 243 | </ul> |
---|
| 244 | </li> |
---|
| 245 | <li> |
---|
| 246 | <a href="#%E5%B8%B8%E8%A7%81%E9%97%AE%E9%A2%98">åžžè§é®é¢</a> |
---|
| 247 | <ul class="minitoc"> |
---|
| 248 | <li> |
---|
| 249 | <a href="#%E6%88%91%E8%AF%A5%E6%80%8E%E6%A0%B7%E4%BD%BF%E7%94%A8Hadoop+Streaming%E8%BF%90%E8%A1%8C%E4%B8%80%E7%BB%84%E7%8B%AC%E7%AB%8B%EF%BC%88%E7%9B%B8%E5%85%B3%EF%BC%89%E7%9A%84%E4%BB%BB%E5%8A%A1%E5%91%A2%EF%BC%9F">æ该ææ ·äœ¿çšHadoop Streamingè¿è¡äžç»ç¬ç«ïŒçžå
³ïŒçä»»å¡å¢ïŒ</a> |
---|
| 250 | </li> |
---|
| 251 | <li> |
---|
| 252 | <a href="#%E5%A6%82%E4%BD%95%E5%A4%84%E7%90%86%E5%A4%9A%E4%B8%AA%E6%96%87%E4%BB%B6%EF%BC%8C%E5%85%B6%E4%B8%AD%E6%AF%8F%E4%B8%AA%E6%96%87%E4%BB%B6%E4%B8%80%E4%B8%AAmap%EF%BC%9F">åŠäœå€çå€äžªæ件ïŒå
¶äžæ¯äžªæ件äžäžªmapïŒ</a> |
---|
| 253 | </li> |
---|
| 254 | <li> |
---|
| 255 | <a href="#%E5%BA%94%E8%AF%A5%E4%BD%BF%E7%94%A8%E5%A4%9A%E5%B0%91%E4%B8%AAreducer%EF%BC%9F">åºè¯¥äœ¿çšå€å°äžªreducerïŒ</a> |
---|
| 256 | </li> |
---|
| 257 | <li> |
---|
| 258 | <a href="#%E5%A6%82%E6%9E%9C%E5%9C%A8Shell%E8%84%9A%E6%9C%AC%E9%87%8C%E8%AE%BE%E7%BD%AE%E4%B8%80%E4%B8%AA%E5%88%AB%E5%90%8D%EF%BC%8C%E5%B9%B6%E6%94%BE%E5%9C%A8-mapper%E4%B9%8B%E5%90%8E%EF%BC%8CStreaming%E4%BC%9A%E6%AD%A3%E5%B8%B8%E8%BF%90%E8%A1%8C%E5%90%97%EF%BC%9F%0A%E4%BE%8B%E5%A6%82%EF%BC%8Calias+cl%3D%27cut+-fl%27%EF%BC%8C-mapper+%22cl%22%E4%BC%9A%E8%BF%90%E8%A1%8C%E6%AD%A3%E5%B8%B8%E5%90%97%EF%BC%9F"> |
---|
| 259 | åŠæåšShellèæ¬é讟眮äžäžªå«åïŒå¹¶æŸåš-mapperä¹åïŒStreamingäŒæ£åžžè¿è¡åïŒ |
---|
| 260 | äŸåŠïŒalias cl='cut -fl'ïŒ-mapper "cl"äŒè¿è¡æ£åžžåïŒ |
---|
| 261 | </a> |
---|
| 262 | </li> |
---|
| 263 | <li> |
---|
| 264 | <a href="#%E6%88%91%E5%8F%AF%E4%BB%A5%E4%BD%BF%E7%94%A8UNIX+pipes%E5%90%97%EF%BC%9F%E4%BE%8B%E5%A6%82+%E2%80%93mapper+%22cut+%E2%80%93fl+%7C+set+s%2Ffoo%2Fbar%2Fg%22%E7%AE%A1%E7%94%A8%E4%B9%88%EF%BC%9F"> |
---|
| 265 | æå¯ä»¥äœ¿çšUNIX pipesåïŒäŸåŠ –mapper "cut –fl | set s/foo/bar/g"管çšä¹ïŒ |
---|
| 266 | </a> |
---|
| 267 | </li> |
---|
| 268 | <li> |
---|
| 269 | <a href="#%E5%9C%A8streaming%E4%BD%9C%E4%B8%9A%E4%B8%AD%E7%94%A8-file%E9%80%89%E9%A1%B9%E8%BF%90%E8%A1%8C%E4%B8%80%E4%B8%AA">åšstreamingäœäžäžçš-fileé项è¿è¡äžäžªååžåŒçè¶
倧å¯æ§è¡æ件ïŒäŸåŠïŒ3.6GïŒæ¶ïŒ |
---|
| 270 | æåŸå°äºäžäžªé误信毓No space left on device”ãåŠäœè§£å³ïŒ |
---|
| 271 | </a> |
---|
| 272 | </li> |
---|
| 273 | <li> |
---|
| 274 | <a href="#%E5%A6%82%E4%BD%95%E8%AE%BE%E7%BD%AE%E5%A4%9A%E4%B8%AA%E8%BE%93%E5%85%A5%E7%9B%AE%E5%BD%95%EF%BC%9F">åŠäœè®Ÿçœ®å€äžªèŸå
¥ç®åœïŒ</a> |
---|
| 275 | </li> |
---|
| 276 | <li> |
---|
| 277 | <a href="#%E5%A6%82%E4%BD%95%E7%94%9F%E6%88%90gzip%E6%A0%BC%E5%BC%8F%E7%9A%84%E8%BE%93%E5%87%BA%E6%96%87%E4%BB%B6%EF%BC%9F">åŠäœçægzipæ ŒåŒçèŸåºæ件ïŒ</a> |
---|
| 278 | </li> |
---|
| 279 | <li> |
---|
| 280 | <a href="#Streaming%E4%B8%AD%E5%A6%82%E4%BD%95%E8%87%AA%E5%AE%9A%E4%B9%89input%2Foutput+format%EF%BC%9F">StreamingäžåŠäœèªå®ä¹input/output formatïŒ</a> |
---|
| 281 | </li> |
---|
| 282 | <li> |
---|
| 283 | <a href="#Streaming%E5%A6%82%E4%BD%95%E8%A7%A3%E6%9E%90XML%E6%96%87%E6%A1%A3%EF%BC%9F">StreamingåŠäœè§£æXMLææ¡£ïŒ</a> |
---|
| 284 | </li> |
---|
| 285 | <li> |
---|
| 286 | <a href="#%E5%9C%A8streaming%E5%BA%94%E7%94%A8%E7%A8%8B%E5%BA%8F%E4%B8%AD%E5%A6%82%E4%BD%95%E6%9B%B4%E6%96%B0%E8%AE%A1%E6%95%B0%E5%99%A8%EF%BC%9F">åšstreamingåºçšçšåºäžåŠäœæŽæ°è®¡æ°åšïŒ</a> |
---|
| 287 | </li> |
---|
| 288 | <li> |
---|
| 289 | <a href="#%E5%A6%82%E4%BD%95%E6%9B%B4%E6%96%B0streaming%E5%BA%94%E7%94%A8%E7%A8%8B%E5%BA%8F%E7%9A%84%E7%8A%B6%E6%80%81%EF%BC%9F">åŠäœæŽæ°streamingåºçšçšåºçç¶æïŒ</a> |
---|
| 290 | </li> |
---|
| 291 | </ul> |
---|
| 292 | </li> |
---|
| 293 | </ul> |
---|
| 294 | </div> |
---|
| 295 | |
---|
| 296 | <a name="N10019"></a><a name="Hadoop+Streaming"></a> |
---|
| 297 | <h2 class="h3">Hadoop Streaming</h2> |
---|
| 298 | <div class="section"> |
---|
| 299 | <p> |
---|
| 300 | Hadoop streamingæ¯Hadoopçäžäžªå·¥å
·ïŒ |
---|
| 301 | å®åž®å©çšæ·å建åè¿è¡äžç±»ç¹æ®çmap/reduceäœäžïŒ |
---|
| 302 | è¿äºç¹æ®çmap/reduceäœäžæ¯ç±äžäºå¯æ§è¡æ件æèæ¬æ件å
åœmapperæè
reducerãäŸåŠïŒ |
---|
| 303 | </p> |
---|
| 304 | <pre class="code"> |
---|
| 305 | $HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \ |
---|
| 306 | -input myInputDirs \ |
---|
| 307 | -output myOutputDir \ |
---|
| 308 | -mapper /bin/cat \ |
---|
| 309 | -reducer /bin/wc |
---|
| 310 | </pre> |
---|
| 311 | </div> |
---|
| 312 | |
---|
| 313 | |
---|
| 314 | <a name="N10027"></a><a name="Streaming%E5%B7%A5%E4%BD%9C%E5%8E%9F%E7%90%86"></a> |
---|
| 315 | <h2 class="h3">Streamingå·¥äœåç</h2> |
---|
| 316 | <div class="section"> |
---|
| 317 | <p> |
---|
| 318 | åšäžé¢çäŸåéïŒmapperåreduceréœæ¯å¯æ§è¡æ件ïŒå®ä»¬ä»æ åèŸå
¥è¯»å
¥æ°æ®ïŒäžè¡äžè¡è¯»ïŒïŒ |
---|
| 319 | 并æ计ç®ç»æåç»æ åèŸåºãStreamingå·¥å
·äŒå建äžäžªMap/ReduceäœäžïŒ |
---|
| 320 | 并æå®åéç»åéçé矀ïŒåæ¶çè§è¿äžªäœäžçæŽäžªæ§è¡è¿çšã |
---|
| 321 | </p> |
---|
| 322 | <p> |
---|
| 323 | åŠæäžäžªå¯æ§è¡æ件被çšäºmapperïŒååšmapperåå§åæ¶ïŒ |
---|
| 324 | æ¯äžäžªmapperä»»å¡äŒæè¿äžªå¯æ§è¡æ件äœäžºäžäžªåç¬çè¿çšå¯åšã |
---|
| 325 | mapperä»»å¡è¿è¡æ¶ïŒå®æèŸå
¥ååæè¡å¹¶ææ¯äžè¡æäŸç»å¯æ§è¡æ件è¿çšçæ åèŸå
¥ã |
---|
| 326 | åæ¶ïŒmapperæ¶éå¯æ§è¡æ件è¿çšæ åèŸåºçå
容ïŒå¹¶ææ¶å°çæ¯äžè¡å
容蜬åækey/value对ïŒäœäžºmapperçèŸåºã |
---|
| 327 | é»è®€æ
åµäžïŒäžè¡äžç¬¬äžäžªtabä¹åçéšåäœäžº<strong>key</strong>ïŒä¹åçïŒäžå
æ¬tabïŒäœäžº<strong>value</strong>ã |
---|
| 328 | åŠæ没ætabïŒæŽè¡äœäžºkeyåŒïŒvalueåŒäžºnullãäžè¿ïŒè¿å¯ä»¥å®å¶ïŒåšäžæäžå°äŒè®šè®ºåŠäœèªå®ä¹keyåvalueçååæ¹åŒã |
---|
| 329 | </p> |
---|
| 330 | <p>åŠæäžäžªå¯æ§è¡æ件被çšäºreducerïŒæ¯äžªreducerä»»å¡äŒæè¿äžªå¯æ§è¡æ件äœäžºäžäžªåç¬çè¿çšå¯åšã |
---|
| 331 | Reducerä»»å¡è¿è¡æ¶ïŒå®æèŸå
¥ååæè¡å¹¶ææ¯äžè¡æäŸç»å¯æ§è¡æ件è¿çšçæ åèŸå
¥ã |
---|
| 332 | åæ¶ïŒreduceræ¶éå¯æ§è¡æ件è¿çšæ åèŸåºçå
容ïŒå¹¶ææ¯äžè¡å
容蜬åækey/value对ïŒäœäžºreducerçèŸåºã |
---|
| 333 | é»è®€æ
åµäžïŒäžè¡äžç¬¬äžäžªtabä¹åçéšåäœäžºkeyïŒä¹åçïŒäžå
æ¬tabïŒäœäžºvalueãåšäžæäžå°äŒè®šè®ºåŠäœèªå®ä¹keyåvalueçååæ¹åŒã |
---|
| 334 | </p> |
---|
| 335 | <p> |
---|
| 336 | è¿æ¯Map/Reduceæ¡æ¶åstreaming mapper/reducerä¹éŽçåºæ¬éä¿¡åè®®ã |
---|
| 337 | </p> |
---|
| 338 | <p> |
---|
| 339 | çšæ·ä¹å¯ä»¥äœ¿çšjavaç±»äœäžºmapperæè
reducerãäžé¢çäŸåäžè¿éç代ç çä»·ïŒ |
---|
| 340 | </p> |
---|
| 341 | <pre class="code"> |
---|
| 342 | $HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \ |
---|
| 343 | -input myInputDirs \ |
---|
| 344 | -output myOutputDir \ |
---|
| 345 | -mapper org.apache.hadoop.mapred.lib.IdentityMapper \ |
---|
| 346 | -reducer /bin/wc |
---|
| 347 | </pre> |
---|
| 348 | <p>çšæ·å¯ä»¥è®Ÿå®<span class="codefrag">stream.non.zero.exit.is.failure</span> |
---|
| 349 | <span class="codefrag">true</span> æ<span class="codefrag">false</span> æ¥è¡šæstreaming taskçè¿ååŒéé¶æ¶æ¯ |
---|
| 350 | <span class="codefrag">Failure</span> |
---|
| 351 | è¿æ¯<span class="codefrag">Success</span>ãé»è®€æ
åµïŒstreaming taskè¿åéé¶æ¶è¡šç€ºå€±èŽ¥ã |
---|
| 352 | </p> |
---|
| 353 | </div> |
---|
| 354 | |
---|
| 355 | |
---|
| 356 | <a name="N10056"></a><a name="%E5%B0%86%E6%96%87%E4%BB%B6%E6%89%93%E5%8C%85%E5%88%B0%E6%8F%90%E4%BA%A4%E7%9A%84%E4%BD%9C%E4%B8%9A%E4%B8%AD"></a> |
---|
| 357 | <h2 class="h3">å°æ件æå
å°æ亀çäœäžäž</h2> |
---|
| 358 | <div class="section"> |
---|
| 359 | <p> |
---|
| 360 | ä»»äœå¯æ§è¡æ件éœå¯ä»¥è¢«æå®äžºmapper/reducerãè¿äºå¯æ§è¡æ件äžéèŠäºå
åæŸåšé矀äžïŒ |
---|
| 361 | åŠæåšé矀äžè¿æ²¡æïŒåéèŠçš-fileé项让frameworkæå¯æ§è¡æ件äœäžºäœäžçäžéšåïŒäžèµ·æå
æ亀ãäŸåŠïŒ |
---|
| 362 | </p> |
---|
| 363 | <pre class="code"> |
---|
| 364 | $HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \ |
---|
| 365 | -input myInputDirs \ |
---|
| 366 | -output myOutputDir \ |
---|
| 367 | -mapper myPythonScript.py \ |
---|
| 368 | -reducer /bin/wc \ |
---|
| 369 | -file myPythonScript.py |
---|
| 370 | </pre> |
---|
| 371 | <p> |
---|
| 372 | äžé¢çäŸåæè¿°äºäžäžªçšæ·æå¯æ§è¡pythonæ件äœäžºmapperã |
---|
| 373 | å
¶äžçé项“-file myPythonScirpt.py”䜿å¯æ§è¡pythonæ件äœäžºäœäžæ亀çäžéšå被äžäŒ å°é矀çæºåšäžã |
---|
| 374 | </p> |
---|
| 375 | <p> |
---|
| 376 | é€äºå¯æ§è¡æ件å€ïŒå
¶ä»mapperæreduceréèŠçšå°çèŸ
å©æ件ïŒæ¯åŠåå
žïŒé
眮æ件çïŒä¹å¯ä»¥çšè¿ç§æ¹åŒæå
äžäŒ ãäŸåŠïŒ |
---|
| 377 | </p> |
---|
| 378 | <pre class="code"> |
---|
| 379 | $HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \ |
---|
| 380 | -input myInputDirs \ |
---|
| 381 | -output myOutputDir \ |
---|
| 382 | -mapper myPythonScript.py \ |
---|
| 383 | -reducer /bin/wc \ |
---|
| 384 | -file myPythonScript.py \ |
---|
| 385 | -file myDictionary.txt |
---|
| 386 | </pre> |
---|
| 387 | </div> |
---|
| 388 | |
---|
| 389 | |
---|
| 390 | <a name="N1006E"></a><a name="Streaming%E9%80%89%E9%A1%B9%E4%B8%8E%E7%94%A8%E6%B3%95"></a> |
---|
| 391 | <h2 class="h3">Streamingé项äžçšæ³</h2> |
---|
| 392 | <div class="section"> |
---|
| 393 | <a name="N10074"></a><a name="%E5%8F%AA%E4%BD%BF%E7%94%A8Mapper%E7%9A%84%E4%BD%9C%E4%B8%9A"></a> |
---|
| 394 | <h3 class="h4">åªäœ¿çšMapperçäœäž</h3> |
---|
| 395 | <p> |
---|
| 396 | ææ¶åªéèŠmapåœæ°å€çèŸå
¥æ°æ®ãè¿æ¶åªéæmapred.reduce.tasks讟眮䞺é¶ïŒMap/reduceæ¡æ¶å°±äžäŒå建reducerä»»å¡ïŒmapperä»»å¡çèŸåºå°±æ¯æŽäžªäœäžçæç»èŸåºã |
---|
| 397 | </p> |
---|
| 398 | <p> |
---|
| 399 | 䞺äºåå°åäžå
Œå®¹ïŒHadoop Streamingä¹æ¯æ“-reduce None”é项ïŒå®äž“-jobconf mapred.reduce.tasks=0”çä»·ã |
---|
| 400 | </p> |
---|
| 401 | <a name="N10080"></a><a name="%E4%B8%BA%E4%BD%9C%E4%B8%9A%E6%8C%87%E5%AE%9A%E5%85%B6%E4%BB%96%E6%8F%92%E4%BB%B6"></a> |
---|
| 402 | <h3 class="h4">䞺äœäžæå®å
¶ä»æ件</h3> |
---|
| 403 | <p> |
---|
| 404 | åå
¶ä»æ®éçMap/Reduceäœäžäžæ ·ïŒçšæ·å¯ä»¥äžºstreamingäœäžæå®å
¶ä»æä»¶ïŒ |
---|
| 405 | </p> |
---|
| 406 | <pre class="code"> |
---|
| 407 | -inputformat JavaClassName |
---|
| 408 | -outputformat JavaClassName |
---|
| 409 | -partitioner JavaClassName |
---|
| 410 | -combiner JavaClassName |
---|
| 411 | </pre> |
---|
| 412 | <p>çšäºå€çèŸå
¥æ ŒåŒçç±»èŠèœè¿åTextç±»åçkey/value对ãåŠæäžæå®èŸå
¥æ ŒåŒïŒåé»è®€äŒäœ¿çšTextInputFormatã |
---|
| 413 | å 䞺TextInputFormatåŸå°çkeyåŒæ¯LongWritableç±»åçïŒå
¶å®keyåŒå¹¶äžæ¯èŸå
¥æ件äžçå
容ïŒèæ¯valueå移éïŒïŒ |
---|
| 414 | æ以keyäŒè¢«äž¢åŒïŒåªævalueçšç®¡éæ¹åŒåç»mapperã |
---|
| 415 | </p> |
---|
| 416 | <p> |
---|
| 417 | çšæ·æäŸçå®ä¹èŸåºæ ŒåŒçç±»éèŠèœå€å€çTextç±»åçkey/value对ãåŠæäžæå®èŸåºæ ŒåŒïŒåé»è®€äŒäœ¿çšTextOutputFormatç±»ã |
---|
| 418 | </p> |
---|
| 419 | <a name="N10093"></a><a name="Hadoop+Streaming%E4%B8%AD%E7%9A%84%E5%A4%A7%E6%96%87%E4%BB%B6%E5%92%8C%E6%A1%A3%E6%A1%88"></a> |
---|
| 420 | <h3 class="h4">Hadoop Streamingäžç倧æ件åæ¡£æ¡</h3> |
---|
| 421 | <p>ä»»å¡äœ¿çš-cacheFileå-cacheArchiveé项åšé矀äžååæ件åæ¡£æ¡ïŒé项çåæ°æ¯çšæ·å·²äžäŒ è³HDFSçæ件ææ¡£æ¡çURIãè¿äºæ件åæ¡£æ¡åšäžåçäœäžéŽçŒåãçšæ·å¯ä»¥éè¿fs.default.name.configé
眮åæ°çåŒåŸå°æ件æåšçhoståfs_portã |
---|
| 422 | </p> |
---|
| 423 | <p> |
---|
| 424 | è¿äžªæ¯äœ¿çš-cacheFileé项çäŸåïŒ |
---|
| 425 | </p> |
---|
| 426 | <pre class="code"> |
---|
| 427 | -cacheFile hdfs://host:fs_port/user/testfile.txt#testlink |
---|
| 428 | </pre> |
---|
| 429 | <p>åšäžé¢çäŸåéïŒurläž#åé¢çéšåæ¯å»ºç«åšä»»å¡åœåå·¥äœç®åœäžç笊å·éŸæ¥çååãè¿éçä»»å¡çåœåå·¥äœç®åœäžæäžäžª“testlink”笊å·éŸæ¥ïŒå®æåtestfile.txtæ件åšæ¬å°çæ·èŽãåŠææå€äžªæ件ïŒé项å¯ä»¥åæïŒ |
---|
| 430 | </p> |
---|
| 431 | <pre class="code"> |
---|
| 432 | -cacheFile hdfs://host:fs_port/user/testfile1.txt#testlink1 -cacheFile hdfs://host:fs_port/user/testfile2.txt#testlink2 |
---|
| 433 | </pre> |
---|
| 434 | <p> |
---|
| 435 | -cacheArchiveé项çšäºæjaræ件æ·èŽå°ä»»å¡åœåå·¥äœç®åœå¹¶èªåšæjaræ件解å猩ãäŸåŠïŒ |
---|
| 436 | </p> |
---|
| 437 | <pre class="code"> |
---|
| 438 | -cacheArchive hdfs://host:fs_port/user/testfile.jar#testlink3 |
---|
| 439 | </pre> |
---|
| 440 | <p> |
---|
| 441 | åšäžé¢çäŸåäžïŒtestlink3æ¯åœåå·¥äœç®åœäžç笊å·éŸæ¥ïŒå®æåtestfile.jar解ååçç®åœã |
---|
| 442 | </p> |
---|
| 443 | <p> |
---|
| 444 | äžé¢æ¯äœ¿çš-cacheArchiveé项çåŠäžäžªäŸåãå
¶äžïŒinput.txtæ件æ䞀è¡å
容ïŒåå«æ¯äž€äžªæ件çååïŒtestlink/cache.txtåtestlink/cache2.txtã“testlink”æ¯æåæ¡£æ¡ç®åœïŒjaræ件解ååçç®åœïŒç笊å·éŸæ¥ïŒè¿äžªç®åœäžæ“cache.txt”å“cache2.txt”䞀䞪æ件ã |
---|
| 445 | </p> |
---|
| 446 | <pre class="code"> |
---|
| 447 | $HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \ |
---|
| 448 | -input "/user/me/samples/cachefile/input.txt" \ |
---|
| 449 | -mapper "xargs cat" \ |
---|
| 450 | -reducer "cat" \ |
---|
| 451 | -output "/user/me/samples/cachefile/out" \ |
---|
| 452 | -cacheArchive 'hdfs://hadoop-nn1.example.com/user/me/samples/cachefile/cachedir.jar#testlink' \ |
---|
| 453 | -jobconf mapred.map.tasks=1 \ |
---|
| 454 | -jobconf mapred.reduce.tasks=1 \ |
---|
| 455 | -jobconf mapred.job.name="Experiment" |
---|
| 456 | |
---|
| 457 | $ ls test_jar/ |
---|
| 458 | cache.txt cache2.txt |
---|
| 459 | |
---|
| 460 | $ jar cvf cachedir.jar -C test_jar/ . |
---|
| 461 | added manifest |
---|
| 462 | adding: cache.txt(in = 30) (out= 29)(deflated 3%) |
---|
| 463 | adding: cache2.txt(in = 37) (out= 35)(deflated 5%) |
---|
| 464 | |
---|
| 465 | $ hadoop dfs -put cachedir.jar samples/cachefile |
---|
| 466 | |
---|
| 467 | $ hadoop dfs -cat /user/me/samples/cachefile/input.txt |
---|
| 468 | testlink/cache.txt |
---|
| 469 | testlink/cache2.txt |
---|
| 470 | |
---|
| 471 | $ cat test_jar/cache.txt |
---|
| 472 | This is just the cache string |
---|
| 473 | |
---|
| 474 | $ cat test_jar/cache2.txt |
---|
| 475 | This is just the second cache string |
---|
| 476 | |
---|
| 477 | $ hadoop dfs -ls /user/me/samples/cachefile/out |
---|
| 478 | Found 1 items |
---|
| 479 | /user/me/samples/cachefile/out/part-00000 <r 3> 69 |
---|
| 480 | |
---|
| 481 | $ hadoop dfs -cat /user/me/samples/cachefile/out/part-00000 |
---|
| 482 | This is just the cache string |
---|
| 483 | This is just the second cache string |
---|
| 484 | |
---|
| 485 | </pre> |
---|
| 486 | <a name="N100BC"></a><a name="%E4%B8%BA%E4%BD%9C%E4%B8%9A%E6%8C%87%E5%AE%9A%E9%99%84%E5%8A%A0%E9%85%8D%E7%BD%AE%E5%8F%82%E6%95%B0"></a> |
---|
| 487 | <h3 class="h4">䞺äœäžæå®éå é
眮åæ°</h3> |
---|
| 488 | <p> |
---|
| 489 | çšæ·å¯ä»¥äœ¿çš“-jobconf <n>=<v>”å¢å äžäºé
眮åéãäŸåŠïŒ |
---|
| 490 | </p> |
---|
| 491 | <pre class="code"> |
---|
| 492 | $HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \ |
---|
| 493 | -input myInputDirs \ |
---|
| 494 | -output myOutputDir \ |
---|
| 495 | -mapper org.apache.hadoop.mapred.lib.IdentityMapper\ |
---|
| 496 | -reducer /bin/wc \ |
---|
| 497 | -jobconf mapred.reduce.tasks=2 |
---|
| 498 | </pre> |
---|
| 499 | <p> |
---|
| 500 | äžé¢çäŸåäžïŒ-jobconf mapred.reduce.tasks=2è¡šæçšäž€äžªreducerå®æäœäžã |
---|
| 501 | </p> |
---|
| 502 | <p> |
---|
| 503 | å
³äºjobconfåæ°çæŽå€ç»èå¯ä»¥åèïŒ<a href="http://hadoop.apache.org/core/docs/current/hadoop-default.html">hadoop-default.html</a> |
---|
| 504 | </p> |
---|
| 505 | <a name="N100D3"></a><a name="%E5%85%B6%E4%BB%96%E9%80%89%E9%A1%B9"></a> |
---|
| 506 | <h3 class="h4">å
¶ä»é项</h3> |
---|
| 507 | <p> |
---|
| 508 | Streaming äœäžçå
¶ä»é项åŠäžè¡šïŒ |
---|
| 509 | </p> |
---|
| 510 | <table class="ForrestTable" cellspacing="1" cellpadding="4"> |
---|
| 511 | |
---|
| 512 | <tr> |
---|
| 513 | <th colspan="1" rowspan="1">é项</th><th colspan="1" rowspan="1">å¯é/å¿
é¡»</th><th colspan="1" rowspan="1">æè¿°</th> |
---|
| 514 | </tr> |
---|
| 515 | |
---|
| 516 | <tr> |
---|
| 517 | <td colspan="1" rowspan="1"> -cluster name </td><td colspan="1" rowspan="1"> å¯é </td><td colspan="1" rowspan="1"> åšæ¬å°Hadoopé矀äžäžäžªæå€äžªè¿çšé矀éŽåæ¢</td> |
---|
| 518 | </tr> |
---|
| 519 | |
---|
| 520 | |
---|
| 521 | <tr> |
---|
| 522 | <td colspan="1" rowspan="1"> -dfs host:port or local </td><td colspan="1" rowspan="1"> å¯é </td><td colspan="1" rowspan="1"> èŠçäœäžçHDFSé
眮</td> |
---|
| 523 | </tr> |
---|
| 524 | |
---|
| 525 | <tr> |
---|
| 526 | <td colspan="1" rowspan="1"> -jt host:port or local </td><td colspan="1" rowspan="1"> å¯é </td><td colspan="1" rowspan="1"> èŠçäœäžçJobTrackeré
眮</td> |
---|
| 527 | </tr> |
---|
| 528 | |
---|
| 529 | <tr> |
---|
| 530 | <td colspan="1" rowspan="1"> -additionalconfspec specfile </td><td colspan="1" rowspan="1"> å¯é </td><td colspan="1" rowspan="1"> çšäžäžªç±»äŒŒäºhadoop-site.xmlçXMLæ件ä¿åææé
眮ïŒä»èäžéèŠçšå€äžª"-jobconf name=value"ç±»åçé项åç¬äžºæ¯äžªé
眮åéèµåŒ</td> |
---|
| 531 | </tr> |
---|
| 532 | |
---|
| 533 | <tr> |
---|
| 534 | <td colspan="1" rowspan="1"> -cmdenv name=value </td><td colspan="1" rowspan="1"> å¯é </td><td colspan="1" rowspan="1"> äŒ éç¯å¢åéç»streamingåœä»€</td> |
---|
| 535 | </tr> |
---|
| 536 | |
---|
| 537 | <tr> |
---|
| 538 | <td colspan="1" rowspan="1"> -cacheFile fileNameURI </td><td colspan="1" rowspan="1"> å¯é </td><td colspan="1" rowspan="1"> æå®äžäžªäžäŒ å°HDFSçæ件</td> |
---|
| 539 | </tr> |
---|
| 540 | |
---|
| 541 | <tr> |
---|
| 542 | <td colspan="1" rowspan="1"> -cacheArchive fileNameURI </td><td colspan="1" rowspan="1"> å¯é </td><td colspan="1" rowspan="1"> æå®äžäžªäžäŒ å°HDFSçjaræ件ïŒè¿äžªjaræ件äŒè¢«èªåšè§£å猩å°åœåå·¥äœç®åœäž</td> |
---|
| 543 | </tr> |
---|
| 544 | |
---|
| 545 | |
---|
| 546 | <tr> |
---|
| 547 | <td colspan="1" rowspan="1"> -inputreader JavaClassName </td><td colspan="1" rowspan="1"> å¯é </td><td colspan="1" rowspan="1"> 䞺äºåäžå
Œå®¹ïŒæå®äžäžªrecord readerç±»ïŒèäžæ¯input formatç±»ïŒ</td> |
---|
| 548 | </tr> |
---|
| 549 | |
---|
| 550 | <tr> |
---|
| 551 | <td colspan="1" rowspan="1"> -verbose </td><td colspan="1" rowspan="1"> å¯é </td><td colspan="1" rowspan="1"> 诊ç»èŸåº </td> |
---|
| 552 | </tr> |
---|
| 553 | |
---|
| 554 | </table> |
---|
| 555 | <p> |
---|
| 556 | 䜿çš-cluster <name>å®ç°“æ¬å°”Hadoopåäžäžªæå€äžªè¿çšHadoopé矀éŽåæ¢ãé»è®€æ
åµäžïŒäœ¿çšhadoop-default.xmlåhadoop-site.xmlïŒåœäœ¿çš-cluster <name>é项æ¶ïŒäŒäœ¿çš$HADOOP_HOME/conf/hadoop-<name>.xmlã |
---|
| 557 | </p> |
---|
| 558 | <p> |
---|
| 559 | äžé¢çé项æ¹åtempç®åœïŒ |
---|
| 560 | </p> |
---|
| 561 | <pre class="code"> |
---|
| 562 | -jobconf dfs.data.dir=/tmp |
---|
| 563 | </pre> |
---|
| 564 | <p> |
---|
| 565 | äžé¢çé项æå®å
¶ä»æ¬å°tempç®åœïŒ |
---|
| 566 | </p> |
---|
| 567 | <pre class="code"> |
---|
| 568 | -jobconf mapred.local.dir=/tmp/local |
---|
| 569 | -jobconf mapred.system.dir=/tmp/system |
---|
| 570 | -jobconf mapred.temp.dir=/tmp/temp |
---|
| 571 | </pre> |
---|
| 572 | <p> |
---|
| 573 | æŽå€æå
³jobconfçç»è请åèïŒ<a href="http://wiki.apache.org/hadoop/JobConfFile">http://wiki.apache.org/hadoop/JobConfFile</a> |
---|
| 574 | |
---|
| 575 | </p> |
---|
| 576 | <p> |
---|
| 577 | åšstreamingåœä»€äžè®Ÿçœ®ç¯å¢åéïŒ |
---|
| 578 | </p> |
---|
| 579 | <pre class="code"> |
---|
| 580 | -cmdenv EXAMPLE_DIR=/home/example/dictionaries/ |
---|
| 581 | </pre> |
---|
| 582 | </div> |
---|
| 583 | |
---|
| 584 | |
---|
| 585 | <a name="N1018B"></a><a name="%E5%85%B6%E4%BB%96%E4%BE%8B%E5%AD%90"></a> |
---|
| 586 | <h2 class="h3">å
¶ä»äŸå</h2> |
---|
| 587 | <div class="section"> |
---|
| 588 | <a name="N10191"></a><a name="%E4%BD%BF%E7%94%A8%E8%87%AA%E5%AE%9A%E4%B9%89%E7%9A%84%E6%96%B9%E6%B3%95%E5%88%87%E5%88%86%E8%A1%8C%E6%9D%A5%E5%BD%A2%E6%88%90Key%2FValue%E5%AF%B9"></a> |
---|
| 589 | <h3 class="h4">䜿çšèªå®ä¹çæ¹æ³ååè¡æ¥åœ¢æKey/Value对</h3> |
---|
| 590 | <p> |
---|
| 591 | ä¹åå·²ç»æå°ïŒåœMap/Reduceæ¡æ¶ä»mapperçæ åèŸå
¥è¯»åäžè¡æ¶ïŒå®æè¿äžè¡åå䞺key/value对ã |
---|
| 592 | åšé»è®€æ
åµäžïŒæ¯è¡ç¬¬äžäžªtab笊ä¹åçéšåäœäžºkeyïŒä¹åçéšåäœäžºvalueïŒäžå
æ¬tab笊ïŒã |
---|
| 593 | </p> |
---|
| 594 | <p> |
---|
| 595 | äœæ¯ïŒçšæ·å¯ä»¥èªå®ä¹ïŒå¯ä»¥æå®åé笊æ¯å
¶ä»å笊èäžæ¯é»è®€çtab笊ïŒæè
æå®åšç¬¬nïŒn>=1ïŒäžªåå²ç¬Šå€åå²èäžæ¯é»è®€ç第äžäžªãäŸåŠïŒ |
---|
| 596 | </p> |
---|
| 597 | <pre class="code"> |
---|
| 598 | $HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \ |
---|
| 599 | -input myInputDirs \ |
---|
| 600 | -output myOutputDir \ |
---|
| 601 | -mapper org.apache.hadoop.mapred.lib.IdentityMapper \ |
---|
| 602 | -reducer org.apache.hadoop.mapred.lib.IdentityReducer \ |
---|
| 603 | -jobconf stream.map.output.field.separator=. \ |
---|
| 604 | -jobconf stream.num.map.output.key.fields=4 |
---|
| 605 | </pre> |
---|
| 606 | <p> |
---|
| 607 | åšäžé¢çäŸå-jobconf stream.map.output.field.separator=.”æ宓.”äœäžºmapèŸåºå
容çåé笊ïŒå¹¶äžä»åšç¬¬å䞪“.”ä¹åçéšåäœäžºkeyïŒä¹åçéšåäœäžºvalueïŒäžå
æ¬è¿ç¬¬å䞪“.”ïŒã |
---|
| 608 | åŠæäžè¡äžç“.”å°äºå䞪ïŒåæŽè¡çå
容äœäžºkeyïŒvalue讟䞺空çText对象ïŒå°±åè¿æ ·å建äºäžäžªTextïŒnew Text("")ïŒã |
---|
| 609 | </p> |
---|
| 610 | <p> |
---|
| 611 | åæ ·ïŒçšæ·å¯ä»¥äœ¿çš“-jobconf stream.reduce.output.field.separator=SEP”å“-jobconf stream.num.reduce.output.fields=NUM”æ¥æå®reduceèŸåºçè¡äžïŒç¬¬å 䞪åé笊å€åå²keyåvalueã |
---|
| 612 | </p> |
---|
| 613 | <a name="N101A7"></a><a name="%E4%B8%80%E4%B8%AA%E5%AE%9E%E7%94%A8%E7%9A%84Partitioner%E7%B1%BB"></a> |
---|
| 614 | <h3 class="h4">äžäžªå®çšçPartitionerç±» ïŒäºæ¬¡æåºïŒ-partitioner org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner éé¡¹ïŒ </h3> |
---|
| 615 | <p> |
---|
| 616 | Hadoopæäžäžªå·¥å
·ç±»org.apache.hadoop.mapred.lib.KeyFieldBasedPartitionerïŒ |
---|
| 617 | å®åšåºçšçšåºäžåŸæçšãMap/reduceæ¡æ¶çšè¿äžªç±»ååmapçèŸåºïŒ |
---|
| 618 | ååæ¯åºäºkeyåŒçåçŒïŒèäžæ¯æŽäžªkeyãäŸåŠïŒ |
---|
| 619 | </p> |
---|
| 620 | <pre class="code"> |
---|
| 621 | $HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \ |
---|
| 622 | -input myInputDirs \ |
---|
| 623 | -output myOutputDir \ |
---|
| 624 | -mapper org.apache.hadoop.mapred.lib.IdentityMapper \ |
---|
| 625 | -reducer org.apache.hadoop.mapred.lib.IdentityReducer \ |
---|
| 626 | -partitioner org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner \ |
---|
| 627 | -jobconf stream.map.output.field.separator=. \ |
---|
| 628 | -jobconf stream.num.map.output.key.fields=4 \ |
---|
| 629 | -jobconf map.output.key.field.separator=. \ |
---|
| 630 | -jobconf num.key.fields.for.partition=2 \ |
---|
| 631 | -jobconf mapred.reduce.tasks=12 |
---|
| 632 | </pre> |
---|
| 633 | <p> |
---|
| 634 | å
¶äžïŒ<em>-jobconf stream.map.output.field.separator=.</em> å<em>-jobconf stream.num.map.output.key.fields=4</em>æ¯åæäžçäŸåãStreamingçšè¿äž€äžªåéæ¥åŸå°mapperçkey/value对ã |
---|
| 635 | </p> |
---|
| 636 | <p> |
---|
| 637 | äžé¢çMap/Reduce äœäžäžmapèŸåºçkeyäžè¬æ¯ç±“.”åå²æçååãäœæ¯å 䞺䜿çšäº |
---|
| 638 | <em>-jobconf num.key.fields.for.partition=2</em> |
---|
| 639 | é项ïŒæ以Map/Reduceæ¡æ¶äœ¿çškeyçå䞀åæ¥ååmapçèŸåºãå
¶äžïŒ |
---|
| 640 | <em>-jobconf map.output.key.field.separator=.</em> |
---|
| 641 | æå®äºè¿æ¬¡åå䜿çšçkeyçåé笊ãè¿æ ·å¯ä»¥ä¿è¯åšæækey/value对äžïŒ |
---|
| 642 | keyåŒå䞀䞪ååŒçžåçæækey被åå°äžç»ïŒåé
ç»äžäžªreducerã |
---|
| 643 | </p> |
---|
| 644 | <p> |
---|
| 645 | |
---|
| 646 | <em>è¿ç§é«æçæ¹æ³çä»·äºæå®å䞀åäœäžºäž»é®ïŒå䞀åäœäžºå¯é®ã |
---|
| 647 | äž»é®çšäºåååïŒäž»é®åå¯é®çç»åçšäºæåºã</em>äžäžªç®åç瀺äŸåŠäžïŒ |
---|
| 648 | </p> |
---|
| 649 | <p> |
---|
| 650 | MapçèŸåºïŒkeyïŒ</p> |
---|
| 651 | <pre class="code"> |
---|
| 652 | 11.12.1.2 |
---|
| 653 | 11.14.2.3 |
---|
| 654 | 11.11.4.1 |
---|
| 655 | 11.12.1.1 |
---|
| 656 | 11.14.2.2 |
---|
| 657 | |
---|
| 658 | </pre> |
---|
| 659 | <p> |
---|
| 660 | ååç»3䞪reducerïŒå䞀åçåŒçšäºååïŒ</p> |
---|
| 661 | <pre class="code"> |
---|
| 662 | 11.11.4.1 |
---|
| 663 | ----------- |
---|
| 664 | 11.12.1.2 |
---|
| 665 | 11.12.1.1 |
---|
| 666 | ----------- |
---|
| 667 | 11.14.2.3 |
---|
| 668 | 11.14.2.2 |
---|
| 669 | </pre> |
---|
| 670 | <p> |
---|
| 671 | åšæ¯äžªåååçç»å
æåºïŒå䞪åçåŒéœçšäºæåºïŒ |
---|
| 672 | </p> |
---|
| 673 | <pre class="code"> |
---|
| 674 | 11.11.4.1 |
---|
| 675 | ----------- |
---|
| 676 | 11.12.1.1 |
---|
| 677 | 11.12.1.2 |
---|
| 678 | ----------- |
---|
| 679 | 11.14.2.2 |
---|
| 680 | 11.14.2.3 |
---|
| 681 | </pre> |
---|
| 682 | <a name="N101DF"></a><a name="Hadoop%E8%81%9A%E5%90%88%E5%8A%9F%E8%83%BD%E5%8C%85%E7%9A%84%E4%BD%BF%E7%94%A8%EF%BC%88-reduce+aggregate+%E9%80%89%E9%A1%B9%EF%BC%89"></a> |
---|
| 683 | <h3 class="h4">Hadoopèååèœå
ç䜿çšïŒ-reduce aggregate é项ïŒ</h3> |
---|
| 684 | <p> |
---|
| 685 | Hadoopæäžäžªå·¥å
·å
“Aggregate”ïŒ |
---|
| 686 | <a href="https://svn.apache.org/repos/asf/hadoop/core/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate">https://svn.apache.org/repos/asf/hadoop/core/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate</a>ïŒã |
---|
| 687 | “Aggregate”æäŸäžäžªç¹æ®çreducerç±»åäžäžªç¹æ®çcombinerç±»ïŒ |
---|
| 688 | 并äžæäžç³»åç“èå嚔aggregator”ïŒïŒäŸåŠ“sum”max”min”çïŒçšäºèåäžç»valueçåºåã |
---|
| 689 | çšæ·å¯ä»¥äœ¿çšAggregateå®ä¹äžäžªmapperæä»¶ç±»ïŒ |
---|
| 690 | è¿äžªç±»çšäºäžºmapperèŸå
¥çæ¯äžªkey/value对产ç“å¯èå项”ã |
---|
| 691 | combiner/reducerå©çšéåœçèååšèåè¿äºå¯èå项ã |
---|
| 692 | </p> |
---|
| 693 | <p> |
---|
| 694 | èŠäœ¿çšAggregateïŒåªéæ宓-reducer aggregate”ïŒ</p> |
---|
| 695 | <pre class="code"> |
---|
| 696 | $HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \ |
---|
| 697 | -input myInputDirs \ |
---|
| 698 | -output myOutputDir \ |
---|
| 699 | -mapper myAggregatorForKeyCount.py \ |
---|
| 700 | -reducer aggregate \ |
---|
| 701 | -file myAggregatorForKeyCount.py \ |
---|
| 702 | -jobconf mapred.reduce.tasks=12 |
---|
| 703 | </pre> |
---|
| 704 | <p> |
---|
| 705 | pythonçšåºmyAggregatorForKeyCount.pyäŸåïŒ |
---|
| 706 | </p> |
---|
| 707 | <pre class="code"> |
---|
| 708 | #!/usr/bin/python |
---|
| 709 | |
---|
| 710 | import sys; |
---|
| 711 | |
---|
| 712 | def generateLongCountToken(id): |
---|
| 713 | return "LongValueSum:" + id + "\t" + "1" |
---|
| 714 | |
---|
| 715 | def main(argv): |
---|
| 716 | line = sys.stdin.readline(); |
---|
| 717 | try: |
---|
| 718 | while line: |
---|
| 719 | line = line[:-1]; |
---|
| 720 | fields = line.split("\t"); |
---|
| 721 | print generateLongCountToken(fields[0]); |
---|
| 722 | line = sys.stdin.readline(); |
---|
| 723 | except "end of file": |
---|
| 724 | return None |
---|
| 725 | if __name__ == "__main__": |
---|
| 726 | main(sys.argv) |
---|
| 727 | </pre> |
---|
| 728 | <a name="N101FA"></a><a name="%E5%AD%97%E6%AE%B5%E7%9A%84%E9%80%89%E5%8F%96%EF%BC%88%E7%B1%BB%E4%BC%BC%E4%BA%8Eunix%E4%B8%AD%E7%9A%84+%27cut%27+%E5%91%BD%E4%BB%A4%EF%BC%89"></a> |
---|
| 729 | <h3 class="h4">å段çéåïŒç±»äŒŒäºunixäžç 'cut' åœä»€ïŒ </h3> |
---|
| 730 | <p> |
---|
| 731 | Hadoopçå·¥å
·ç±»org.apache.hadoop.mapred.lib.FieldSelectionMapReduceåž®å©çšæ·é«æå€çææ¬æ°æ®ïŒ |
---|
| 732 | å°±åunixäžç“cut”å·¥å
ዌᴌ
·ç±»äžçmapåœæ°æèŸå
¥çkey/value对çäœå段çåè¡šã |
---|
| 733 | çšæ·å¯ä»¥æå®å段çåé笊ïŒé»è®€æ¯tabïŒïŒ |
---|
| 734 | å¯ä»¥éæ©å段åè¡šäžä»»æäžæ®µïŒç±åè¡šäžäžäžªæå€äžªå段ç»æïŒäœäžºmapèŸåºçkeyæè
valueã |
---|
| 735 | åæ ·ïŒå·¥å
·ç±»äžçreduceåœæ°ä¹æèŸå
¥çkey/value对çäœå段çåè¡šïŒçšæ·å¯ä»¥éåä»»æäžæ®µäœäžºreduceèŸåºçkeyævalueãäŸåŠïŒ |
---|
| 736 | </p> |
---|
| 737 | <pre class="code"> |
---|
| 738 | $HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \ |
---|
| 739 | -input myInputDirs \ |
---|
| 740 | -output myOutputDir \ |
---|
| 741 | -mapper org.apache.hadoop.mapred.lib.FieldSelectionMapReduce\ |
---|
| 742 | -reducer org.apache.hadoop.mapred.lib.FieldSelectionMapReduce\ |
---|
| 743 | -partitioner org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner \ |
---|
| 744 | -jobconf map.output.key.field.separa=. \ |
---|
| 745 | -jobconf num.key.fields.for.partition=2 \ |
---|
| 746 | -jobconf mapred.data.field.separator=. \ |
---|
| 747 | -jobconf map.output.key.value.fields.spec=6,5,1-3:0- \ |
---|
| 748 | -jobconf reduce.output.key.value.fields.spec=0-2:5- \ |
---|
| 749 | -jobconf mapred.reduce.tasks=12 |
---|
| 750 | </pre> |
---|
| 751 | <p> |
---|
| 752 | é项“-jobconf map.output.key.value.fields.spec=6,5,1-3:0-”æå®äºåŠäœäžºmapçèŸåºéåkeyåvalueãKeyéåè§ååvalueéåè§å籓:”åå²ã |
---|
| 753 | åšè¿äžªäŸåäžïŒmapèŸåºçkeyç±å段6ïŒ5ïŒ1ïŒ2å3ç»æãèŸåºçvalueç±ææå段ç»æ0-”æå段0以åä¹åææå段ïŒã |
---|
| 754 | </p> |
---|
| 755 | <p> |
---|
| 756 | é项“-jobconf reduce.output.key.value.fields.spec=0-2:0-”ïŒè¯è
泚ïŒæ€å€åºäžº”0-2:5-“ïŒæå®åŠäœäžºreduceçèŸåºéåvalueã |
---|
| 757 | æ¬äŸäžïŒreduceçèŸåºçkeyå°å
å«å段0ïŒ1ïŒ2ïŒå¯¹åºäºåå§çå段6ïŒ5ïŒ1ïŒã |
---|
| 758 | reduceèŸåºçvalueå°å
å«èµ·èªå段5çææå段ïŒå¯¹åºäºææçåå§å段ïŒã |
---|
| 759 | </p> |
---|
| 760 | </div> |
---|
| 761 | |
---|
| 762 | |
---|
| 763 | <a name="N1020F"></a><a name="%E5%B8%B8%E8%A7%81%E9%97%AE%E9%A2%98"></a> |
---|
| 764 | <h2 class="h3">åžžè§é®é¢</h2> |
---|
| 765 | <div class="section"> |
---|
| 766 | <a name="N10215"></a><a name="%E6%88%91%E8%AF%A5%E6%80%8E%E6%A0%B7%E4%BD%BF%E7%94%A8Hadoop+Streaming%E8%BF%90%E8%A1%8C%E4%B8%80%E7%BB%84%E7%8B%AC%E7%AB%8B%EF%BC%88%E7%9B%B8%E5%85%B3%EF%BC%89%E7%9A%84%E4%BB%BB%E5%8A%A1%E5%91%A2%EF%BC%9F"></a> |
---|
| 767 | <h3 class="h4">æ该ææ ·äœ¿çšHadoop Streamingè¿è¡äžç»ç¬ç«ïŒçžå
³ïŒçä»»å¡å¢ïŒ</h3> |
---|
| 768 | <p> |
---|
| 769 | å€æ°æ
åµäžïŒäœ äžéèŠMap Reduceçå
šéšåèœïŒ |
---|
| 770 | èåªéèŠè¿è¡åäžçšåºçå€äžªå®äŸïŒæè
䜿çšäžåæ°æ®ïŒæè
åšçžåæ°æ®äžäœ¿çšäžåçåæ°ã |
---|
| 771 | äœ å¯ä»¥éè¿Hadoop Streamingæ¥å®ç°ã</p> |
---|
| 772 | <a name="N1021F"></a><a name="%E5%A6%82%E4%BD%95%E5%A4%84%E7%90%86%E5%A4%9A%E4%B8%AA%E6%96%87%E4%BB%B6%EF%BC%8C%E5%85%B6%E4%B8%AD%E6%AF%8F%E4%B8%AA%E6%96%87%E4%BB%B6%E4%B8%80%E4%B8%AAmap%EF%BC%9F"></a> |
---|
| 773 | <h3 class="h4">åŠäœå€çå€äžªæ件ïŒå
¶äžæ¯äžªæ件äžäžªmapïŒ</h3> |
---|
| 774 | <p> |
---|
| 775 | äŸåŠè¿æ ·äžäžªé®é¢ïŒåšé矀äžå猩ïŒzippingïŒäžäºæ件ïŒäœ å¯ä»¥äœ¿çšä»¥äžå ç§æ¹æ³ïŒ</p> |
---|
| 776 | <ol> |
---|
| 777 | |
---|
| 778 | <li>䜿çšHadoop Streamingåçšæ·çŒåçmapperèæ¬çšåºïŒ<ul> |
---|
| 779 | |
---|
| 780 | <li> çæäžäžªæ件ïŒæ件äžå
å«ææèŠå猩çæ件åšHDFSäžçå®æŽè·¯åŸãæ¯äžªmap ä»»å¡è·åŸäžäžªè·¯åŸåäœäžºèŸå
¥ã</li> |
---|
| 781 | |
---|
| 782 | <li> å建äžäžªmapperèæ¬çšåºïŒå®ç°åŠäžåèœïŒè·åŸæ件åïŒæ该æ件æ·èŽå°æ¬å°ïŒå猩该æ件并æå®åå°ææçèŸåºç®åœã</li> |
---|
| 783 | |
---|
| 784 | </ul> |
---|
| 785 | </li> |
---|
| 786 | |
---|
| 787 | <li>䜿çšç°æçHadoopæ¡æ¶ïŒ<ul> |
---|
| 788 | |
---|
| 789 | <li>åšmainåœæ°äžæ·»å åŠäžåœä»€ïŒ |
---|
| 790 | <pre class="code"> |
---|
| 791 | FileOutputFormat.setCompressOutput(conf, true); |
---|
| 792 | FileOutputFormat.setOutputCompressorClass(conf, org.apache.hadoop.io.compress.GzipCodec.class); |
---|
| 793 | conf.setOutputFormat(NonSplitableTextInputFormat.class); |
---|
| 794 | conf.setNumReduceTasks(0); |
---|
| 795 | </pre> |
---|
| 796 | </li> |
---|
| 797 | |
---|
| 798 | <li>çŒåmapåœæ°ïŒ |
---|
| 799 | <pre class="code"> |
---|
| 800 | |
---|
| 801 | public void map(WritableComparable key, Writable value, |
---|
| 802 | OutputCollector output, |
---|
| 803 | Reporter reporter) throws IOException { |
---|
| 804 | output.collect((Text)value, null); |
---|
| 805 | } |
---|
| 806 | </pre> |
---|
| 807 | </li> |
---|
| 808 | |
---|
| 809 | <li>泚æèŸåºçæ件åååæ件åäžå</li> |
---|
| 810 | |
---|
| 811 | </ul> |
---|
| 812 | </li> |
---|
| 813 | |
---|
| 814 | </ol> |
---|
| 815 | <a name="N1024A"></a><a name="%E5%BA%94%E8%AF%A5%E4%BD%BF%E7%94%A8%E5%A4%9A%E5%B0%91%E4%B8%AAreducer%EF%BC%9F"></a> |
---|
| 816 | <h3 class="h4">åºè¯¥äœ¿çšå€å°äžªreducerïŒ</h3> |
---|
| 817 | <p> |
---|
| 818 | 请åèHadoop WikiïŒ<a href="mapred_tutorial.html#Reducer">Reducer</a> |
---|
| 819 | |
---|
| 820 | </p> |
---|
| 821 | <a name="N10258"></a><a name="%E5%A6%82%E6%9E%9C%E5%9C%A8Shell%E8%84%9A%E6%9C%AC%E9%87%8C%E8%AE%BE%E7%BD%AE%E4%B8%80%E4%B8%AA%E5%88%AB%E5%90%8D%EF%BC%8C%E5%B9%B6%E6%94%BE%E5%9C%A8-mapper%E4%B9%8B%E5%90%8E%EF%BC%8CStreaming%E4%BC%9A%E6%AD%A3%E5%B8%B8%E8%BF%90%E8%A1%8C%E5%90%97%EF%BC%9F%0A%E4%BE%8B%E5%A6%82%EF%BC%8Calias+cl%3D%27cut+-fl%27%EF%BC%8C-mapper+%22cl%22%E4%BC%9A%E8%BF%90%E8%A1%8C%E6%AD%A3%E5%B8%B8%E5%90%97%EF%BC%9F"></a> |
---|
| 822 | <h3 class="h4"> |
---|
| 823 | åŠæåšShellèæ¬é讟眮äžäžªå«åïŒå¹¶æŸåš-mapperä¹åïŒStreamingäŒæ£åžžè¿è¡åïŒ |
---|
| 824 | äŸåŠïŒalias cl='cut -fl'ïŒ-mapper "cl"äŒè¿è¡æ£åžžåïŒ |
---|
| 825 | </h3> |
---|
| 826 | <p> |
---|
| 827 | èæ¬éæ æ³äœ¿çšå«åïŒäœæ¯å
讞åéæ¿æ¢ïŒäŸåŠïŒ |
---|
| 828 | </p> |
---|
| 829 | <pre class="code"> |
---|
| 830 | $ hadoop dfs -cat samples/student_marks |
---|
| 831 | alice 50 |
---|
| 832 | bruce 70 |
---|
| 833 | charlie 80 |
---|
| 834 | dan 75 |
---|
| 835 | |
---|
| 836 | $ c2='cut -f2'; $HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \ |
---|
| 837 | -input /user/me/samples/student_marks |
---|
| 838 | -mapper \"$c2\" -reducer 'cat' |
---|
| 839 | -output /user/me/samples/student_out |
---|
| 840 | -jobconf mapred.job.name='Experiment' |
---|
| 841 | |
---|
| 842 | $ hadoop dfs -ls samples/student_out |
---|
| 843 | Found 1 items/user/me/samples/student_out/part-00000 <r 3> 16 |
---|
| 844 | |
---|
| 845 | $ hadoop dfs -cat samples/student_out/part-00000 |
---|
| 846 | 50 |
---|
| 847 | 70 |
---|
| 848 | 75 |
---|
| 849 | 80 |
---|
| 850 | </pre> |
---|
| 851 | <a name="N10266"></a><a name="%E6%88%91%E5%8F%AF%E4%BB%A5%E4%BD%BF%E7%94%A8UNIX+pipes%E5%90%97%EF%BC%9F%E4%BE%8B%E5%A6%82+%E2%80%93mapper+%22cut+%E2%80%93fl+%7C+set+s%2Ffoo%2Fbar%2Fg%22%E7%AE%A1%E7%94%A8%E4%B9%88%EF%BC%9F"></a> |
---|
| 852 | <h3 class="h4"> |
---|
| 853 | æå¯ä»¥äœ¿çšUNIX pipesåïŒäŸåŠ –mapper "cut –fl | set s/foo/bar/g"管çšä¹ïŒ |
---|
| 854 | </h3> |
---|
| 855 | <p> |
---|
| 856 | ç°åšäžæ¯æïŒèäžäŒç»åºé误信毓java.io.IOException: Broken pipe”ãè¿æ讞æ¯äžäžªbugïŒéèŠè¿äžæ¥ç 究ã |
---|
| 857 | </p> |
---|
| 858 | <a name="N10270"></a><a name="%E5%9C%A8streaming%E4%BD%9C%E4%B8%9A%E4%B8%AD%E7%94%A8-file%E9%80%89%E9%A1%B9%E8%BF%90%E8%A1%8C%E4%B8%80%E4%B8%AA"></a> |
---|
| 859 | <h3 class="h4">åšstreamingäœäžäžçš-fileé项è¿è¡äžäžªååžåŒçè¶
倧å¯æ§è¡æ件ïŒäŸåŠïŒ3.6GïŒæ¶ïŒ |
---|
| 860 | æåŸå°äºäžäžªé误信毓No space left on device”ãåŠäœè§£å³ïŒ |
---|
| 861 | </h3> |
---|
| 862 | <p> |
---|
| 863 | é
眮åéstream.tmpdiræå®äºäžäžªç®åœïŒåšè¿äžªç®åœäžèŠè¿è¡æjarå
çæäœãstream.tmpdirçé»è®€åŒæ¯/tmpïŒäœ éèŠå°è¿äžªåŒè®Ÿçœ®äžºäžäžªææŽå€§ç©ºéŽçç®åœïŒ |
---|
| 864 | </p> |
---|
| 865 | <pre class="code"> |
---|
| 866 | -jobconf stream.tmpdir=/export/bigspace/... |
---|
| 867 | </pre> |
---|
| 868 | <a name="N10281"></a><a name="%E5%A6%82%E4%BD%95%E8%AE%BE%E7%BD%AE%E5%A4%9A%E4%B8%AA%E8%BE%93%E5%85%A5%E7%9B%AE%E5%BD%95%EF%BC%9F"></a> |
---|
| 869 | <h3 class="h4">åŠäœè®Ÿçœ®å€äžªèŸå
¥ç®åœïŒ</h3> |
---|
| 870 | <p> |
---|
| 871 | å¯ä»¥äœ¿çšå€äžª-inputé项讟眮å€äžªèŸå
¥ç®åœïŒ |
---|
| 872 | </p> |
---|
| 873 | <pre class="code"> |
---|
| 874 | hadoop jar hadoop-streaming.jar -input '/user/foo/dir1' -input '/user/foo/dir2' |
---|
| 875 | </pre> |
---|
| 876 | <a name="N1028E"></a><a name="%E5%A6%82%E4%BD%95%E7%94%9F%E6%88%90gzip%E6%A0%BC%E5%BC%8F%E7%9A%84%E8%BE%93%E5%87%BA%E6%96%87%E4%BB%B6%EF%BC%9F"></a> |
---|
| 877 | <h3 class="h4">åŠäœçægzipæ ŒåŒçèŸåºæ件ïŒ</h3> |
---|
| 878 | <p> |
---|
| 879 | é€äºçº¯ææ¬æ ŒåŒçèŸåºïŒäœ è¿å¯ä»¥çægzipæä»¶æ ŒåŒçèŸåºïŒäœ åªé讟眮streamingäœäžäžçé项‘-jobconf mapred.output.compress=true -jobconf mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCode’ã |
---|
| 880 | </p> |
---|
| 881 | <a name="N10298"></a><a name="Streaming%E4%B8%AD%E5%A6%82%E4%BD%95%E8%87%AA%E5%AE%9A%E4%B9%89input%2Foutput+format%EF%BC%9F"></a> |
---|
| 882 | <h3 class="h4">StreamingäžåŠäœèªå®ä¹input/output formatïŒ</h3> |
---|
| 883 | <p> |
---|
| 884 | è³å°åšHadoop 0.14çæ¬ä»¥åïŒäžæ¯æå€äžªjaræ件ãæ以åœæå®èªå®ä¹çç±»æ¶ïŒäœ èŠæä»ä»¬ååæçstreaming jaræå
åšäžèµ·ïŒå¹¶çšè¿äžªèªå®ä¹çjarå
æ¿æ¢é»è®€çhadoop streaming jarå
ã |
---|
| 885 | </p> |
---|
| 886 | <a name="N102A2"></a><a name="Streaming%E5%A6%82%E4%BD%95%E8%A7%A3%E6%9E%90XML%E6%96%87%E6%A1%A3%EF%BC%9F"></a> |
---|
| 887 | <h3 class="h4">StreamingåŠäœè§£æXMLææ¡£ïŒ</h3> |
---|
| 888 | <p> |
---|
| 889 | äœ å¯ä»¥äœ¿çšStreamXmlRecordReaderæ¥è§£æXMLææ¡£ã |
---|
| 890 | </p> |
---|
| 891 | <pre class="code"> |
---|
| 892 | hadoop jar hadoop-streaming.jar -inputreader "StreamXmlRecord,begin=BEGIN_STRING,end=END_STRING" ..... (rest of the command) |
---|
| 893 | </pre> |
---|
| 894 | <p> |
---|
| 895 | Mapä»»å¡äŒæBEGIN_STRINGåEND_STRINGä¹éŽçéšåçäœäžæ¡è®°åœã |
---|
| 896 | </p> |
---|
| 897 | <a name="N102B3"></a><a name="%E5%9C%A8streaming%E5%BA%94%E7%94%A8%E7%A8%8B%E5%BA%8F%E4%B8%AD%E5%A6%82%E4%BD%95%E6%9B%B4%E6%96%B0%E8%AE%A1%E6%95%B0%E5%99%A8%EF%BC%9F"></a> |
---|
| 898 | <h3 class="h4">åšstreamingåºçšçšåºäžåŠäœæŽæ°è®¡æ°åšïŒ</h3> |
---|
| 899 | <p> |
---|
| 900 | streamingè¿çšèœå€äœ¿çšstderrååºè®¡æ°åšä¿¡æ¯ã |
---|
| 901 | <span class="codefrag">reporter:counter:<group>,<counter>,<amount></span> |
---|
| 902 | åºè¯¥è¢«åéå°stderræ¥æŽæ°è®¡æ°åšã |
---|
| 903 | </p> |
---|
| 904 | <a name="N102C0"></a><a name="%E5%A6%82%E4%BD%95%E6%9B%B4%E6%96%B0streaming%E5%BA%94%E7%94%A8%E7%A8%8B%E5%BA%8F%E7%9A%84%E7%8A%B6%E6%80%81%EF%BC%9F"></a> |
---|
| 905 | <h3 class="h4">åŠäœæŽæ°streamingåºçšçšåºçç¶æïŒ</h3> |
---|
| 906 | <p> |
---|
| 907 | streamingè¿çšèœå€äœ¿çšstderrååºç¶æä¿¡æ¯ã |
---|
| 908 | <span class="codefrag">reporter:status:<message></span> èŠè¢«åéå°stderræ¥è®Ÿçœ®ç¶æã |
---|
| 909 | </p> |
---|
| 910 | </div> |
---|
| 911 | |
---|
| 912 | </div> |
---|
| 913 | <!--+ |
---|
| 914 | |end content |
---|
| 915 | +--> |
---|
| 916 | <div class="clearboth"> </div> |
---|
| 917 | </div> |
---|
| 918 | <div id="footer"> |
---|
| 919 | <!--+ |
---|
| 920 | |start bottomstrip |
---|
| 921 | +--> |
---|
| 922 | <div class="lastmodified"> |
---|
| 923 | <script type="text/javascript"><!-- |
---|
| 924 | document.write("Last Published: " + document.lastModified); |
---|
| 925 | // --></script> |
---|
| 926 | </div> |
---|
| 927 | <div class="copyright"> |
---|
| 928 | Copyright © |
---|
| 929 | 2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a> |
---|
| 930 | </div> |
---|
| 931 | <!--+ |
---|
| 932 | |end bottomstrip |
---|
| 933 | +--> |
---|
| 934 | </div> |
---|
| 935 | </body> |
---|
| 936 | </html> |
---|