[120] | 1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> |
---|
| 2 | <!--NewPage--> |
---|
| 3 | <HTML> |
---|
| 4 | <HEAD> |
---|
| 5 | <!-- Generated by javadoc (build 1.6.0_07) on Tue Sep 01 20:57:00 UTC 2009 --> |
---|
| 6 | <TITLE> |
---|
| 7 | org.apache.hadoop.mapred.pipes (Hadoop 0.20.1 API) |
---|
| 8 | </TITLE> |
---|
| 9 | |
---|
| 10 | <META NAME="date" CONTENT="2009-09-01"> |
---|
| 11 | |
---|
| 12 | <LINK REL ="stylesheet" TYPE="text/css" HREF="../../../../../stylesheet.css" TITLE="Style"> |
---|
| 13 | |
---|
| 14 | <SCRIPT type="text/javascript"> |
---|
| 15 | function windowTitle() |
---|
| 16 | { |
---|
| 17 | if (location.href.indexOf('is-external=true') == -1) { |
---|
| 18 | parent.document.title="org.apache.hadoop.mapred.pipes (Hadoop 0.20.1 API)"; |
---|
| 19 | } |
---|
| 20 | } |
---|
| 21 | </SCRIPT> |
---|
| 22 | <NOSCRIPT> |
---|
| 23 | </NOSCRIPT> |
---|
| 24 | |
---|
| 25 | </HEAD> |
---|
| 26 | |
---|
| 27 | <BODY BGCOLOR="white" onload="windowTitle();"> |
---|
| 28 | <HR> |
---|
| 29 | |
---|
| 30 | |
---|
| 31 | <!-- ========= START OF TOP NAVBAR ======= --> |
---|
| 32 | <A NAME="navbar_top"><!-- --></A> |
---|
| 33 | <A HREF="#skip-navbar_top" title="Skip navigation links"></A> |
---|
| 34 | <TABLE BORDER="0" WIDTH="100%" CELLPADDING="1" CELLSPACING="0" SUMMARY=""> |
---|
| 35 | <TR> |
---|
| 36 | <TD COLSPAN=2 BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> |
---|
| 37 | <A NAME="navbar_top_firstrow"><!-- --></A> |
---|
| 38 | <TABLE BORDER="0" CELLPADDING="0" CELLSPACING="3" SUMMARY=""> |
---|
| 39 | <TR ALIGN="center" VALIGN="top"> |
---|
| 40 | <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="../../../../../overview-summary.html"><FONT CLASS="NavBarFont1"><B>Overview</B></FONT></A> </TD> |
---|
| 41 | <TD BGCOLOR="#FFFFFF" CLASS="NavBarCell1Rev"> <FONT CLASS="NavBarFont1Rev"><B>Package</B></FONT> </TD> |
---|
| 42 | <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <FONT CLASS="NavBarFont1">Class</FONT> </TD> |
---|
| 43 | <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="package-use.html"><FONT CLASS="NavBarFont1"><B>Use</B></FONT></A> </TD> |
---|
| 44 | <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="package-tree.html"><FONT CLASS="NavBarFont1"><B>Tree</B></FONT></A> </TD> |
---|
| 45 | <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="../../../../../deprecated-list.html"><FONT CLASS="NavBarFont1"><B>Deprecated</B></FONT></A> </TD> |
---|
| 46 | <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="../../../../../index-all.html"><FONT CLASS="NavBarFont1"><B>Index</B></FONT></A> </TD> |
---|
| 47 | <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="../../../../../help-doc.html"><FONT CLASS="NavBarFont1"><B>Help</B></FONT></A> </TD> |
---|
| 48 | </TR> |
---|
| 49 | </TABLE> |
---|
| 50 | </TD> |
---|
| 51 | <TD ALIGN="right" VALIGN="top" ROWSPAN=3><EM> |
---|
| 52 | </EM> |
---|
| 53 | </TD> |
---|
| 54 | </TR> |
---|
| 55 | |
---|
| 56 | <TR> |
---|
| 57 | <TD BGCOLOR="white" CLASS="NavBarCell2"><FONT SIZE="-2"> |
---|
| 58 | <A HREF="../../../../../org/apache/hadoop/mapred/lib/db/package-summary.html"><B>PREV PACKAGE</B></A> |
---|
| 59 | <A HREF="../../../../../org/apache/hadoop/mapred/tools/package-summary.html"><B>NEXT PACKAGE</B></A></FONT></TD> |
---|
| 60 | <TD BGCOLOR="white" CLASS="NavBarCell2"><FONT SIZE="-2"> |
---|
| 61 | <A HREF="../../../../../index.html?org/apache/hadoop/mapred/pipes/package-summary.html" target="_top"><B>FRAMES</B></A> |
---|
| 62 | <A HREF="package-summary.html" target="_top"><B>NO FRAMES</B></A> |
---|
| 63 | <SCRIPT type="text/javascript"> |
---|
| 64 | <!-- |
---|
| 65 | if(window==top) { |
---|
| 66 | document.writeln('<A HREF="../../../../../allclasses-noframe.html"><B>All Classes</B></A>'); |
---|
| 67 | } |
---|
| 68 | //--> |
---|
| 69 | </SCRIPT> |
---|
| 70 | <NOSCRIPT> |
---|
| 71 | <A HREF="../../../../../allclasses-noframe.html"><B>All Classes</B></A> |
---|
| 72 | </NOSCRIPT> |
---|
| 73 | |
---|
| 74 | |
---|
| 75 | </FONT></TD> |
---|
| 76 | </TR> |
---|
| 77 | </TABLE> |
---|
| 78 | <A NAME="skip-navbar_top"></A> |
---|
| 79 | <!-- ========= END OF TOP NAVBAR ========= --> |
---|
| 80 | |
---|
| 81 | <HR> |
---|
| 82 | <H2> |
---|
| 83 | Package org.apache.hadoop.mapred.pipes |
---|
| 84 | </H2> |
---|
| 85 | Hadoop Pipes allows C++ code to use Hadoop DFS and map/reduce. |
---|
| 86 | <P> |
---|
| 87 | <B>See:</B> |
---|
| 88 | <BR> |
---|
| 89 | <A HREF="#package_description"><B>Description</B></A> |
---|
| 90 | <P> |
---|
| 91 | |
---|
| 92 | <TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY=""> |
---|
| 93 | <TR BGCOLOR="#CCCCFF" CLASS="TableHeadingColor"> |
---|
| 94 | <TH ALIGN="left" COLSPAN="2"><FONT SIZE="+2"> |
---|
| 95 | <B>Class Summary</B></FONT></TH> |
---|
| 96 | </TR> |
---|
| 97 | <TR BGCOLOR="white" CLASS="TableRowColor"> |
---|
| 98 | <TD WIDTH="15%"><B><A HREF="../../../../../org/apache/hadoop/mapred/pipes/Submitter.html" title="class in org.apache.hadoop.mapred.pipes">Submitter</A></B></TD> |
---|
| 99 | <TD>The main entry point and job submitter.</TD> |
---|
| 100 | </TR> |
---|
| 101 | </TABLE> |
---|
| 102 | |
---|
| 103 | |
---|
| 104 | <P> |
---|
| 105 | <A NAME="package_description"><!-- --></A><H2> |
---|
| 106 | Package org.apache.hadoop.mapred.pipes Description |
---|
| 107 | </H2> |
---|
| 108 | |
---|
| 109 | <P> |
---|
| 110 | Hadoop Pipes allows C++ code to use Hadoop DFS and map/reduce. The |
---|
| 111 | primary approach is to split the C++ code into a separate process that |
---|
| 112 | does the application specific code. In many ways, the approach will be |
---|
| 113 | similar to Hadoop streaming, but using Writable serialization to |
---|
| 114 | convert the types into bytes that are sent to the process via a |
---|
| 115 | socket. |
---|
| 116 | |
---|
| 117 | <p> |
---|
| 118 | |
---|
| 119 | The class org.apache.hadoop.mapred.pipes.Submitter has a public static |
---|
| 120 | method to submit a job as a JobConf and a main method that takes an |
---|
| 121 | application and optional configuration file, input directories, and |
---|
| 122 | output directory. The cli for the main looks like: |
---|
| 123 | |
---|
| 124 | <pre> |
---|
| 125 | bin/hadoop pipes \ |
---|
| 126 | [-input <i>inputDir</i>] \ |
---|
| 127 | [-output <i>outputDir</i>] \ |
---|
| 128 | [-jar <i>applicationJarFile</i>] \ |
---|
| 129 | [-inputformat <i>class</i>] \ |
---|
| 130 | [-map <i>class</i>] \ |
---|
| 131 | [-partitioner <i>class</i>] \ |
---|
| 132 | [-reduce <i>class</i>] \ |
---|
| 133 | [-writer <i>class</i>] \ |
---|
| 134 | [-program <i>program url</i>] \ |
---|
| 135 | [-conf <i>configuration file</i>] \ |
---|
| 136 | [-D <i>property=value</i>] \ |
---|
| 137 | [-fs <i>local|namenode:port</i>] \ |
---|
| 138 | [-jt <i>local|jobtracker:port</i>] \ |
---|
| 139 | [-files <i>comma separated list of files</i>] \ |
---|
| 140 | [-libjars <i>comma separated list of jars</i>] \ |
---|
| 141 | [-archives <i>comma separated list of archives</i>] |
---|
| 142 | </pre> |
---|
| 143 | |
---|
| 144 | |
---|
| 145 | <p> |
---|
| 146 | |
---|
| 147 | The application programs link against a thin C++ wrapper library that |
---|
| 148 | handles the communication with the rest of the Hadoop system. The C++ |
---|
| 149 | interface is "swigable" so that interfaces can be generated for python |
---|
| 150 | and other scripting languages. All of the C++ functions and classes |
---|
| 151 | are in the HadoopPipes namespace. The job may consist of any |
---|
| 152 | combination of Java and C++ RecordReaders, Mappers, Paritioner, |
---|
| 153 | Combiner, Reducer, and RecordWriter. |
---|
| 154 | |
---|
| 155 | <p> |
---|
| 156 | |
---|
| 157 | Hadoop Pipes has a generic Java class for handling the mapper and |
---|
| 158 | reducer (PipesMapRunner and PipesReducer). They fork off the |
---|
| 159 | application program and communicate with it over a socket. The |
---|
| 160 | communication is handled by the C++ wrapper library and the |
---|
| 161 | PipesMapRunner and PipesReducer. |
---|
| 162 | |
---|
| 163 | <p> |
---|
| 164 | |
---|
| 165 | The application program passes in a factory object that can create |
---|
| 166 | the various objects needed by the framework to the runTask |
---|
| 167 | function. The framework creates the Mapper or Reducer as |
---|
| 168 | appropriate and calls the map or reduce method to invoke the |
---|
| 169 | application's code. The JobConf is available to the application. |
---|
| 170 | |
---|
| 171 | <p> |
---|
| 172 | |
---|
| 173 | The Mapper and Reducer objects get all of their inputs, outputs, and |
---|
| 174 | context via context objects. The advantage of using the context |
---|
| 175 | objects is that their interface can be extended with additional |
---|
| 176 | methods without breaking clients. Although this interface is different |
---|
| 177 | from the current Java interface, the plan is to migrate the Java |
---|
| 178 | interface in this direction. |
---|
| 179 | |
---|
| 180 | <p> |
---|
| 181 | |
---|
| 182 | Although the Java implementation is typed, the C++ interfaces of keys |
---|
| 183 | and values is just a byte buffer. Since STL strings provide precisely |
---|
| 184 | the right functionality and are standard, they will be used. The |
---|
| 185 | decision to not use stronger types was to simplify the interface. |
---|
| 186 | |
---|
| 187 | <p> |
---|
| 188 | |
---|
| 189 | The application can also define combiner functions. The combiner will |
---|
| 190 | be run locally by the framework in the application process to avoid |
---|
| 191 | the round trip to the Java process and back. Because the compare |
---|
| 192 | function is not available in C++, the combiner will use memcmp to |
---|
| 193 | sort the inputs to the combiner. This is not as general as the Java |
---|
| 194 | equivalent, which uses the user's comparator, but should cover the |
---|
| 195 | majority of the use cases. As the map function outputs key/value |
---|
| 196 | pairs, they will be buffered. When the buffer is full, it will be |
---|
| 197 | sorted and passed to the combiner. The output of the combiner will be |
---|
| 198 | sent to the Java process. |
---|
| 199 | |
---|
| 200 | <p> |
---|
| 201 | |
---|
| 202 | The application can also set a partition function to control which key |
---|
| 203 | is given to a particular reduce. If a partition function is not |
---|
| 204 | defined, the Java one will be used. The partition function will be |
---|
| 205 | called by the C++ framework before the key/value pair is sent back to |
---|
| 206 | Java. |
---|
| 207 | |
---|
| 208 | <p> |
---|
| 209 | |
---|
| 210 | The application programs can also register counters with a group and a name |
---|
| 211 | and also increment the counters and get the counter values. Word-count |
---|
| 212 | example illustrating pipes usage with counters is available at |
---|
| 213 | <a href="https://svn.apache.org/repos/asf/hadoop/core/trunk/src/examples/pipes/impl/wordcount-simple.cc">wordcount-simple.cc</a> |
---|
| 214 | <P> |
---|
| 215 | |
---|
| 216 | <P> |
---|
| 217 | <DL> |
---|
| 218 | </DL> |
---|
| 219 | <HR> |
---|
| 220 | |
---|
| 221 | |
---|
| 222 | <!-- ======= START OF BOTTOM NAVBAR ====== --> |
---|
| 223 | <A NAME="navbar_bottom"><!-- --></A> |
---|
| 224 | <A HREF="#skip-navbar_bottom" title="Skip navigation links"></A> |
---|
| 225 | <TABLE BORDER="0" WIDTH="100%" CELLPADDING="1" CELLSPACING="0" SUMMARY=""> |
---|
| 226 | <TR> |
---|
| 227 | <TD COLSPAN=2 BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> |
---|
| 228 | <A NAME="navbar_bottom_firstrow"><!-- --></A> |
---|
| 229 | <TABLE BORDER="0" CELLPADDING="0" CELLSPACING="3" SUMMARY=""> |
---|
| 230 | <TR ALIGN="center" VALIGN="top"> |
---|
| 231 | <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="../../../../../overview-summary.html"><FONT CLASS="NavBarFont1"><B>Overview</B></FONT></A> </TD> |
---|
| 232 | <TD BGCOLOR="#FFFFFF" CLASS="NavBarCell1Rev"> <FONT CLASS="NavBarFont1Rev"><B>Package</B></FONT> </TD> |
---|
| 233 | <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <FONT CLASS="NavBarFont1">Class</FONT> </TD> |
---|
| 234 | <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="package-use.html"><FONT CLASS="NavBarFont1"><B>Use</B></FONT></A> </TD> |
---|
| 235 | <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="package-tree.html"><FONT CLASS="NavBarFont1"><B>Tree</B></FONT></A> </TD> |
---|
| 236 | <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="../../../../../deprecated-list.html"><FONT CLASS="NavBarFont1"><B>Deprecated</B></FONT></A> </TD> |
---|
| 237 | <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="../../../../../index-all.html"><FONT CLASS="NavBarFont1"><B>Index</B></FONT></A> </TD> |
---|
| 238 | <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="../../../../../help-doc.html"><FONT CLASS="NavBarFont1"><B>Help</B></FONT></A> </TD> |
---|
| 239 | </TR> |
---|
| 240 | </TABLE> |
---|
| 241 | </TD> |
---|
| 242 | <TD ALIGN="right" VALIGN="top" ROWSPAN=3><EM> |
---|
| 243 | </EM> |
---|
| 244 | </TD> |
---|
| 245 | </TR> |
---|
| 246 | |
---|
| 247 | <TR> |
---|
| 248 | <TD BGCOLOR="white" CLASS="NavBarCell2"><FONT SIZE="-2"> |
---|
| 249 | <A HREF="../../../../../org/apache/hadoop/mapred/lib/db/package-summary.html"><B>PREV PACKAGE</B></A> |
---|
| 250 | <A HREF="../../../../../org/apache/hadoop/mapred/tools/package-summary.html"><B>NEXT PACKAGE</B></A></FONT></TD> |
---|
| 251 | <TD BGCOLOR="white" CLASS="NavBarCell2"><FONT SIZE="-2"> |
---|
| 252 | <A HREF="../../../../../index.html?org/apache/hadoop/mapred/pipes/package-summary.html" target="_top"><B>FRAMES</B></A> |
---|
| 253 | <A HREF="package-summary.html" target="_top"><B>NO FRAMES</B></A> |
---|
| 254 | <SCRIPT type="text/javascript"> |
---|
| 255 | <!-- |
---|
| 256 | if(window==top) { |
---|
| 257 | document.writeln('<A HREF="../../../../../allclasses-noframe.html"><B>All Classes</B></A>'); |
---|
| 258 | } |
---|
| 259 | //--> |
---|
| 260 | </SCRIPT> |
---|
| 261 | <NOSCRIPT> |
---|
| 262 | <A HREF="../../../../../allclasses-noframe.html"><B>All Classes</B></A> |
---|
| 263 | </NOSCRIPT> |
---|
| 264 | |
---|
| 265 | |
---|
| 266 | </FONT></TD> |
---|
| 267 | </TR> |
---|
| 268 | </TABLE> |
---|
| 269 | <A NAME="skip-navbar_bottom"></A> |
---|
| 270 | <!-- ======== END OF BOTTOM NAVBAR ======= --> |
---|
| 271 | |
---|
| 272 | <HR> |
---|
| 273 | Copyright © 2009 The Apache Software Foundation |
---|
| 274 | </BODY> |
---|
| 275 | </HTML> |
---|