1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> |
---|
2 | <!--NewPage--> |
---|
3 | <HTML> |
---|
4 | <HEAD> |
---|
5 | <!-- Generated by javadoc (build 1.6.0_07) on Tue Sep 01 20:57:00 UTC 2009 --> |
---|
6 | <TITLE> |
---|
7 | org.apache.hadoop.mapred.pipes (Hadoop 0.20.1 API) |
---|
8 | </TITLE> |
---|
9 | |
---|
10 | <META NAME="date" CONTENT="2009-09-01"> |
---|
11 | |
---|
12 | <LINK REL ="stylesheet" TYPE="text/css" HREF="../../../../../stylesheet.css" TITLE="Style"> |
---|
13 | |
---|
14 | <SCRIPT type="text/javascript"> |
---|
15 | function windowTitle() |
---|
16 | { |
---|
17 | if (location.href.indexOf('is-external=true') == -1) { |
---|
18 | parent.document.title="org.apache.hadoop.mapred.pipes (Hadoop 0.20.1 API)"; |
---|
19 | } |
---|
20 | } |
---|
21 | </SCRIPT> |
---|
22 | <NOSCRIPT> |
---|
23 | </NOSCRIPT> |
---|
24 | |
---|
25 | </HEAD> |
---|
26 | |
---|
27 | <BODY BGCOLOR="white" onload="windowTitle();"> |
---|
28 | <HR> |
---|
29 | |
---|
30 | |
---|
31 | <!-- ========= START OF TOP NAVBAR ======= --> |
---|
32 | <A NAME="navbar_top"><!-- --></A> |
---|
33 | <A HREF="#skip-navbar_top" title="Skip navigation links"></A> |
---|
34 | <TABLE BORDER="0" WIDTH="100%" CELLPADDING="1" CELLSPACING="0" SUMMARY=""> |
---|
35 | <TR> |
---|
36 | <TD COLSPAN=2 BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> |
---|
37 | <A NAME="navbar_top_firstrow"><!-- --></A> |
---|
38 | <TABLE BORDER="0" CELLPADDING="0" CELLSPACING="3" SUMMARY=""> |
---|
39 | <TR ALIGN="center" VALIGN="top"> |
---|
40 | <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="../../../../../overview-summary.html"><FONT CLASS="NavBarFont1"><B>Overview</B></FONT></A> </TD> |
---|
41 | <TD BGCOLOR="#FFFFFF" CLASS="NavBarCell1Rev"> <FONT CLASS="NavBarFont1Rev"><B>Package</B></FONT> </TD> |
---|
42 | <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <FONT CLASS="NavBarFont1">Class</FONT> </TD> |
---|
43 | <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="package-use.html"><FONT CLASS="NavBarFont1"><B>Use</B></FONT></A> </TD> |
---|
44 | <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="package-tree.html"><FONT CLASS="NavBarFont1"><B>Tree</B></FONT></A> </TD> |
---|
45 | <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="../../../../../deprecated-list.html"><FONT CLASS="NavBarFont1"><B>Deprecated</B></FONT></A> </TD> |
---|
46 | <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="../../../../../index-all.html"><FONT CLASS="NavBarFont1"><B>Index</B></FONT></A> </TD> |
---|
47 | <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="../../../../../help-doc.html"><FONT CLASS="NavBarFont1"><B>Help</B></FONT></A> </TD> |
---|
48 | </TR> |
---|
49 | </TABLE> |
---|
50 | </TD> |
---|
51 | <TD ALIGN="right" VALIGN="top" ROWSPAN=3><EM> |
---|
52 | </EM> |
---|
53 | </TD> |
---|
54 | </TR> |
---|
55 | |
---|
56 | <TR> |
---|
57 | <TD BGCOLOR="white" CLASS="NavBarCell2"><FONT SIZE="-2"> |
---|
58 | <A HREF="../../../../../org/apache/hadoop/mapred/lib/db/package-summary.html"><B>PREV PACKAGE</B></A> |
---|
59 | <A HREF="../../../../../org/apache/hadoop/mapred/tools/package-summary.html"><B>NEXT PACKAGE</B></A></FONT></TD> |
---|
60 | <TD BGCOLOR="white" CLASS="NavBarCell2"><FONT SIZE="-2"> |
---|
61 | <A HREF="../../../../../index.html?org/apache/hadoop/mapred/pipes/package-summary.html" target="_top"><B>FRAMES</B></A> |
---|
62 | <A HREF="package-summary.html" target="_top"><B>NO FRAMES</B></A> |
---|
63 | <SCRIPT type="text/javascript"> |
---|
64 | <!-- |
---|
65 | if(window==top) { |
---|
66 | document.writeln('<A HREF="../../../../../allclasses-noframe.html"><B>All Classes</B></A>'); |
---|
67 | } |
---|
68 | //--> |
---|
69 | </SCRIPT> |
---|
70 | <NOSCRIPT> |
---|
71 | <A HREF="../../../../../allclasses-noframe.html"><B>All Classes</B></A> |
---|
72 | </NOSCRIPT> |
---|
73 | |
---|
74 | |
---|
75 | </FONT></TD> |
---|
76 | </TR> |
---|
77 | </TABLE> |
---|
78 | <A NAME="skip-navbar_top"></A> |
---|
79 | <!-- ========= END OF TOP NAVBAR ========= --> |
---|
80 | |
---|
81 | <HR> |
---|
82 | <H2> |
---|
83 | Package org.apache.hadoop.mapred.pipes |
---|
84 | </H2> |
---|
85 | Hadoop Pipes allows C++ code to use Hadoop DFS and map/reduce. |
---|
86 | <P> |
---|
87 | <B>See:</B> |
---|
88 | <BR> |
---|
89 | <A HREF="#package_description"><B>Description</B></A> |
---|
90 | <P> |
---|
91 | |
---|
92 | <TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY=""> |
---|
93 | <TR BGCOLOR="#CCCCFF" CLASS="TableHeadingColor"> |
---|
94 | <TH ALIGN="left" COLSPAN="2"><FONT SIZE="+2"> |
---|
95 | <B>Class Summary</B></FONT></TH> |
---|
96 | </TR> |
---|
97 | <TR BGCOLOR="white" CLASS="TableRowColor"> |
---|
98 | <TD WIDTH="15%"><B><A HREF="../../../../../org/apache/hadoop/mapred/pipes/Submitter.html" title="class in org.apache.hadoop.mapred.pipes">Submitter</A></B></TD> |
---|
99 | <TD>The main entry point and job submitter.</TD> |
---|
100 | </TR> |
---|
101 | </TABLE> |
---|
102 | |
---|
103 | |
---|
104 | <P> |
---|
105 | <A NAME="package_description"><!-- --></A><H2> |
---|
106 | Package org.apache.hadoop.mapred.pipes Description |
---|
107 | </H2> |
---|
108 | |
---|
109 | <P> |
---|
110 | Hadoop Pipes allows C++ code to use Hadoop DFS and map/reduce. The |
---|
111 | primary approach is to split the C++ code into a separate process that |
---|
112 | does the application specific code. In many ways, the approach will be |
---|
113 | similar to Hadoop streaming, but using Writable serialization to |
---|
114 | convert the types into bytes that are sent to the process via a |
---|
115 | socket. |
---|
116 | |
---|
117 | <p> |
---|
118 | |
---|
119 | The class org.apache.hadoop.mapred.pipes.Submitter has a public static |
---|
120 | method to submit a job as a JobConf and a main method that takes an |
---|
121 | application and optional configuration file, input directories, and |
---|
122 | output directory. The cli for the main looks like: |
---|
123 | |
---|
124 | <pre> |
---|
125 | bin/hadoop pipes \ |
---|
126 | [-input <i>inputDir</i>] \ |
---|
127 | [-output <i>outputDir</i>] \ |
---|
128 | [-jar <i>applicationJarFile</i>] \ |
---|
129 | [-inputformat <i>class</i>] \ |
---|
130 | [-map <i>class</i>] \ |
---|
131 | [-partitioner <i>class</i>] \ |
---|
132 | [-reduce <i>class</i>] \ |
---|
133 | [-writer <i>class</i>] \ |
---|
134 | [-program <i>program url</i>] \ |
---|
135 | [-conf <i>configuration file</i>] \ |
---|
136 | [-D <i>property=value</i>] \ |
---|
137 | [-fs <i>local|namenode:port</i>] \ |
---|
138 | [-jt <i>local|jobtracker:port</i>] \ |
---|
139 | [-files <i>comma separated list of files</i>] \ |
---|
140 | [-libjars <i>comma separated list of jars</i>] \ |
---|
141 | [-archives <i>comma separated list of archives</i>] |
---|
142 | </pre> |
---|
143 | |
---|
144 | |
---|
145 | <p> |
---|
146 | |
---|
147 | The application programs link against a thin C++ wrapper library that |
---|
148 | handles the communication with the rest of the Hadoop system. The C++ |
---|
149 | interface is "swigable" so that interfaces can be generated for python |
---|
150 | and other scripting languages. All of the C++ functions and classes |
---|
151 | are in the HadoopPipes namespace. The job may consist of any |
---|
152 | combination of Java and C++ RecordReaders, Mappers, Paritioner, |
---|
153 | Combiner, Reducer, and RecordWriter. |
---|
154 | |
---|
155 | <p> |
---|
156 | |
---|
157 | Hadoop Pipes has a generic Java class for handling the mapper and |
---|
158 | reducer (PipesMapRunner and PipesReducer). They fork off the |
---|
159 | application program and communicate with it over a socket. The |
---|
160 | communication is handled by the C++ wrapper library and the |
---|
161 | PipesMapRunner and PipesReducer. |
---|
162 | |
---|
163 | <p> |
---|
164 | |
---|
165 | The application program passes in a factory object that can create |
---|
166 | the various objects needed by the framework to the runTask |
---|
167 | function. The framework creates the Mapper or Reducer as |
---|
168 | appropriate and calls the map or reduce method to invoke the |
---|
169 | application's code. The JobConf is available to the application. |
---|
170 | |
---|
171 | <p> |
---|
172 | |
---|
173 | The Mapper and Reducer objects get all of their inputs, outputs, and |
---|
174 | context via context objects. The advantage of using the context |
---|
175 | objects is that their interface can be extended with additional |
---|
176 | methods without breaking clients. Although this interface is different |
---|
177 | from the current Java interface, the plan is to migrate the Java |
---|
178 | interface in this direction. |
---|
179 | |
---|
180 | <p> |
---|
181 | |
---|
182 | Although the Java implementation is typed, the C++ interfaces of keys |
---|
183 | and values is just a byte buffer. Since STL strings provide precisely |
---|
184 | the right functionality and are standard, they will be used. The |
---|
185 | decision to not use stronger types was to simplify the interface. |
---|
186 | |
---|
187 | <p> |
---|
188 | |
---|
189 | The application can also define combiner functions. The combiner will |
---|
190 | be run locally by the framework in the application process to avoid |
---|
191 | the round trip to the Java process and back. Because the compare |
---|
192 | function is not available in C++, the combiner will use memcmp to |
---|
193 | sort the inputs to the combiner. This is not as general as the Java |
---|
194 | equivalent, which uses the user's comparator, but should cover the |
---|
195 | majority of the use cases. As the map function outputs key/value |
---|
196 | pairs, they will be buffered. When the buffer is full, it will be |
---|
197 | sorted and passed to the combiner. The output of the combiner will be |
---|
198 | sent to the Java process. |
---|
199 | |
---|
200 | <p> |
---|
201 | |
---|
202 | The application can also set a partition function to control which key |
---|
203 | is given to a particular reduce. If a partition function is not |
---|
204 | defined, the Java one will be used. The partition function will be |
---|
205 | called by the C++ framework before the key/value pair is sent back to |
---|
206 | Java. |
---|
207 | |
---|
208 | <p> |
---|
209 | |
---|
210 | The application programs can also register counters with a group and a name |
---|
211 | and also increment the counters and get the counter values. Word-count |
---|
212 | example illustrating pipes usage with counters is available at |
---|
213 | <a href="https://svn.apache.org/repos/asf/hadoop/core/trunk/src/examples/pipes/impl/wordcount-simple.cc">wordcount-simple.cc</a> |
---|
214 | <P> |
---|
215 | |
---|
216 | <P> |
---|
217 | <DL> |
---|
218 | </DL> |
---|
219 | <HR> |
---|
220 | |
---|
221 | |
---|
222 | <!-- ======= START OF BOTTOM NAVBAR ====== --> |
---|
223 | <A NAME="navbar_bottom"><!-- --></A> |
---|
224 | <A HREF="#skip-navbar_bottom" title="Skip navigation links"></A> |
---|
225 | <TABLE BORDER="0" WIDTH="100%" CELLPADDING="1" CELLSPACING="0" SUMMARY=""> |
---|
226 | <TR> |
---|
227 | <TD COLSPAN=2 BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> |
---|
228 | <A NAME="navbar_bottom_firstrow"><!-- --></A> |
---|
229 | <TABLE BORDER="0" CELLPADDING="0" CELLSPACING="3" SUMMARY=""> |
---|
230 | <TR ALIGN="center" VALIGN="top"> |
---|
231 | <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="../../../../../overview-summary.html"><FONT CLASS="NavBarFont1"><B>Overview</B></FONT></A> </TD> |
---|
232 | <TD BGCOLOR="#FFFFFF" CLASS="NavBarCell1Rev"> <FONT CLASS="NavBarFont1Rev"><B>Package</B></FONT> </TD> |
---|
233 | <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <FONT CLASS="NavBarFont1">Class</FONT> </TD> |
---|
234 | <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="package-use.html"><FONT CLASS="NavBarFont1"><B>Use</B></FONT></A> </TD> |
---|
235 | <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="package-tree.html"><FONT CLASS="NavBarFont1"><B>Tree</B></FONT></A> </TD> |
---|
236 | <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="../../../../../deprecated-list.html"><FONT CLASS="NavBarFont1"><B>Deprecated</B></FONT></A> </TD> |
---|
237 | <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="../../../../../index-all.html"><FONT CLASS="NavBarFont1"><B>Index</B></FONT></A> </TD> |
---|
238 | <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1"> <A HREF="../../../../../help-doc.html"><FONT CLASS="NavBarFont1"><B>Help</B></FONT></A> </TD> |
---|
239 | </TR> |
---|
240 | </TABLE> |
---|
241 | </TD> |
---|
242 | <TD ALIGN="right" VALIGN="top" ROWSPAN=3><EM> |
---|
243 | </EM> |
---|
244 | </TD> |
---|
245 | </TR> |
---|
246 | |
---|
247 | <TR> |
---|
248 | <TD BGCOLOR="white" CLASS="NavBarCell2"><FONT SIZE="-2"> |
---|
249 | <A HREF="../../../../../org/apache/hadoop/mapred/lib/db/package-summary.html"><B>PREV PACKAGE</B></A> |
---|
250 | <A HREF="../../../../../org/apache/hadoop/mapred/tools/package-summary.html"><B>NEXT PACKAGE</B></A></FONT></TD> |
---|
251 | <TD BGCOLOR="white" CLASS="NavBarCell2"><FONT SIZE="-2"> |
---|
252 | <A HREF="../../../../../index.html?org/apache/hadoop/mapred/pipes/package-summary.html" target="_top"><B>FRAMES</B></A> |
---|
253 | <A HREF="package-summary.html" target="_top"><B>NO FRAMES</B></A> |
---|
254 | <SCRIPT type="text/javascript"> |
---|
255 | <!-- |
---|
256 | if(window==top) { |
---|
257 | document.writeln('<A HREF="../../../../../allclasses-noframe.html"><B>All Classes</B></A>'); |
---|
258 | } |
---|
259 | //--> |
---|
260 | </SCRIPT> |
---|
261 | <NOSCRIPT> |
---|
262 | <A HREF="../../../../../allclasses-noframe.html"><B>All Classes</B></A> |
---|
263 | </NOSCRIPT> |
---|
264 | |
---|
265 | |
---|
266 | </FONT></TD> |
---|
267 | </TR> |
---|
268 | </TABLE> |
---|
269 | <A NAME="skip-navbar_bottom"></A> |
---|
270 | <!-- ======== END OF BOTTOM NAVBAR ======= --> |
---|
271 | |
---|
272 | <HR> |
---|
273 | Copyright © 2009 The Apache Software Foundation |
---|
274 | </BODY> |
---|
275 | </HTML> |
---|