source: proiecte/HadoopJUnit/hadoop-0.20.1/docs/api/org/apache/hadoop/mapred/pipes/package-summary.html @ 120

Last change on this file since 120 was 120, checked in by (none), 14 years ago

Added the mail files for the Hadoop JUNit Project

  • Property svn:executable set to *
File size: 10.7 KB
Line 
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
2<!--NewPage-->
3<HTML>
4<HEAD>
5<!-- Generated by javadoc (build 1.6.0_07) on Tue Sep 01 20:57:00 UTC 2009 -->
6<TITLE>
7org.apache.hadoop.mapred.pipes (Hadoop 0.20.1 API)
8</TITLE>
9
10<META NAME="date" CONTENT="2009-09-01">
11
12<LINK REL ="stylesheet" TYPE="text/css" HREF="../../../../../stylesheet.css" TITLE="Style">
13
14<SCRIPT type="text/javascript">
15function windowTitle()
16{
17    if (location.href.indexOf('is-external=true') == -1) {
18        parent.document.title="org.apache.hadoop.mapred.pipes (Hadoop 0.20.1 API)";
19    }
20}
21</SCRIPT>
22<NOSCRIPT>
23</NOSCRIPT>
24
25</HEAD>
26
27<BODY BGCOLOR="white" onload="windowTitle();">
28<HR>
29
30
31<!-- ========= START OF TOP NAVBAR ======= -->
32<A NAME="navbar_top"><!-- --></A>
33<A HREF="#skip-navbar_top" title="Skip navigation links"></A>
34<TABLE BORDER="0" WIDTH="100%" CELLPADDING="1" CELLSPACING="0" SUMMARY="">
35<TR>
36<TD COLSPAN=2 BGCOLOR="#EEEEFF" CLASS="NavBarCell1">
37<A NAME="navbar_top_firstrow"><!-- --></A>
38<TABLE BORDER="0" CELLPADDING="0" CELLSPACING="3" SUMMARY="">
39  <TR ALIGN="center" VALIGN="top">
40  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <A HREF="../../../../../overview-summary.html"><FONT CLASS="NavBarFont1"><B>Overview</B></FONT></A>&nbsp;</TD>
41  <TD BGCOLOR="#FFFFFF" CLASS="NavBarCell1Rev"> &nbsp;<FONT CLASS="NavBarFont1Rev"><B>Package</B></FONT>&nbsp;</TD>
42  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <FONT CLASS="NavBarFont1">Class</FONT>&nbsp;</TD>
43  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <A HREF="package-use.html"><FONT CLASS="NavBarFont1"><B>Use</B></FONT></A>&nbsp;</TD>
44  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <A HREF="package-tree.html"><FONT CLASS="NavBarFont1"><B>Tree</B></FONT></A>&nbsp;</TD>
45  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <A HREF="../../../../../deprecated-list.html"><FONT CLASS="NavBarFont1"><B>Deprecated</B></FONT></A>&nbsp;</TD>
46  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <A HREF="../../../../../index-all.html"><FONT CLASS="NavBarFont1"><B>Index</B></FONT></A>&nbsp;</TD>
47  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <A HREF="../../../../../help-doc.html"><FONT CLASS="NavBarFont1"><B>Help</B></FONT></A>&nbsp;</TD>
48  </TR>
49</TABLE>
50</TD>
51<TD ALIGN="right" VALIGN="top" ROWSPAN=3><EM>
52</EM>
53</TD>
54</TR>
55
56<TR>
57<TD BGCOLOR="white" CLASS="NavBarCell2"><FONT SIZE="-2">
58&nbsp;<A HREF="../../../../../org/apache/hadoop/mapred/lib/db/package-summary.html"><B>PREV PACKAGE</B></A>&nbsp;
59&nbsp;<A HREF="../../../../../org/apache/hadoop/mapred/tools/package-summary.html"><B>NEXT PACKAGE</B></A></FONT></TD>
60<TD BGCOLOR="white" CLASS="NavBarCell2"><FONT SIZE="-2">
61  <A HREF="../../../../../index.html?org/apache/hadoop/mapred/pipes/package-summary.html" target="_top"><B>FRAMES</B></A>  &nbsp;
62&nbsp;<A HREF="package-summary.html" target="_top"><B>NO FRAMES</B></A>  &nbsp;
63&nbsp;<SCRIPT type="text/javascript">
64  <!--
65  if(window==top) {
66    document.writeln('<A HREF="../../../../../allclasses-noframe.html"><B>All Classes</B></A>');
67  }
68  //-->
69</SCRIPT>
70<NOSCRIPT>
71  <A HREF="../../../../../allclasses-noframe.html"><B>All Classes</B></A>
72</NOSCRIPT>
73
74
75</FONT></TD>
76</TR>
77</TABLE>
78<A NAME="skip-navbar_top"></A>
79<!-- ========= END OF TOP NAVBAR ========= -->
80
81<HR>
82<H2>
83Package org.apache.hadoop.mapred.pipes
84</H2>
85Hadoop Pipes allows C++ code to use Hadoop DFS and map/reduce.
86<P>
87<B>See:</B>
88<BR>
89&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<A HREF="#package_description"><B>Description</B></A>
90<P>
91
92<TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY="">
93<TR BGCOLOR="#CCCCFF" CLASS="TableHeadingColor">
94<TH ALIGN="left" COLSPAN="2"><FONT SIZE="+2">
95<B>Class Summary</B></FONT></TH>
96</TR>
97<TR BGCOLOR="white" CLASS="TableRowColor">
98<TD WIDTH="15%"><B><A HREF="../../../../../org/apache/hadoop/mapred/pipes/Submitter.html" title="class in org.apache.hadoop.mapred.pipes">Submitter</A></B></TD>
99<TD>The main entry point and job submitter.</TD>
100</TR>
101</TABLE>
102&nbsp;
103
104<P>
105<A NAME="package_description"><!-- --></A><H2>
106Package org.apache.hadoop.mapred.pipes Description
107</H2>
108
109<P>
110Hadoop Pipes allows C++ code to use Hadoop DFS and map/reduce. The
111primary approach is to split the C++ code into a separate process that
112does the application specific code. In many ways, the approach will be
113similar to Hadoop streaming, but using Writable serialization to
114convert the types into bytes that are sent to the process via a
115socket.
116
117<p>
118
119The class org.apache.hadoop.mapred.pipes.Submitter has a public static
120method to submit a job as a JobConf and a main method that takes an
121application and optional configuration file, input directories, and
122output directory. The cli for the main looks like:
123
124<pre>
125bin/hadoop pipes \
126  [-input <i>inputDir</i>] \
127  [-output <i>outputDir</i>] \
128  [-jar <i>applicationJarFile</i>] \
129  [-inputformat <i>class</i>] \
130  [-map <i>class</i>] \
131  [-partitioner <i>class</i>] \
132  [-reduce <i>class</i>] \
133  [-writer <i>class</i>] \
134  [-program <i>program url</i>] \
135  [-conf <i>configuration file</i>] \
136  [-D <i>property=value</i>] \
137  [-fs <i>local|namenode:port</i>] \
138  [-jt <i>local|jobtracker:port</i>] \
139  [-files <i>comma separated list of files</i>] \
140  [-libjars <i>comma separated list of jars</i>] \
141  [-archives <i>comma separated list of archives</i>]
142</pre>
143
144
145<p>
146
147The application programs link against a thin C++ wrapper library that
148handles the communication with the rest of the Hadoop system.  The C++
149interface is "swigable" so that interfaces can be generated for python
150and other scripting languages. All of the C++ functions and classes
151are in the HadoopPipes namespace. The job may consist of any
152combination of Java and C++ RecordReaders, Mappers, Paritioner,
153Combiner, Reducer, and RecordWriter.
154
155<p>
156
157Hadoop Pipes has a generic Java class for handling the mapper and
158reducer (PipesMapRunner and PipesReducer). They fork off the
159application program and communicate with it over a socket. The
160communication is handled by the C++ wrapper library and the
161PipesMapRunner and PipesReducer.
162
163<p>
164
165The application program passes in a factory object that can create
166the various objects needed by the framework to the runTask
167function. The framework creates the Mapper or Reducer as
168appropriate and calls the map or reduce method to invoke the
169application's code. The JobConf is available to the application.
170
171<p>
172
173The Mapper and Reducer objects get all of their inputs, outputs, and
174context via context objects. The advantage of using the context
175objects is that their interface can be extended with additional
176methods without breaking clients. Although this interface is different
177from the current Java interface, the plan is to migrate the Java
178interface in this direction.
179
180<p>
181
182Although the Java implementation is typed, the C++ interfaces of keys
183and values is just a byte buffer. Since STL strings provide precisely
184the right functionality and are standard, they will be used. The
185decision to not use stronger types was to simplify the interface.
186
187<p>
188
189The application can also define combiner functions. The combiner will
190be run locally by the framework in the application process to avoid
191the round trip to the Java process and back. Because the compare
192function is not available in C++, the combiner will use memcmp to
193sort the inputs to the combiner. This is not as general as the Java
194equivalent, which uses the user's comparator, but should cover the
195majority of the use cases. As the map function outputs key/value
196pairs, they will be buffered. When the buffer is full, it will be
197sorted and passed to the combiner. The output of the combiner will be
198sent to the Java process.
199
200<p>
201
202The application can also set a partition function to control which key
203is given to a particular reduce. If a partition function is not
204defined, the Java one will be used. The partition function will be
205called by the C++ framework before the key/value pair is sent back to
206Java.
207
208<p>
209
210The application programs can also register counters with a group and a name
211and also increment the counters and get the counter values. Word-count
212example illustrating pipes usage with counters is available at
213<a href="https://svn.apache.org/repos/asf/hadoop/core/trunk/src/examples/pipes/impl/wordcount-simple.cc">wordcount-simple.cc</a>
214<P>
215
216<P>
217<DL>
218</DL>
219<HR>
220
221
222<!-- ======= START OF BOTTOM NAVBAR ====== -->
223<A NAME="navbar_bottom"><!-- --></A>
224<A HREF="#skip-navbar_bottom" title="Skip navigation links"></A>
225<TABLE BORDER="0" WIDTH="100%" CELLPADDING="1" CELLSPACING="0" SUMMARY="">
226<TR>
227<TD COLSPAN=2 BGCOLOR="#EEEEFF" CLASS="NavBarCell1">
228<A NAME="navbar_bottom_firstrow"><!-- --></A>
229<TABLE BORDER="0" CELLPADDING="0" CELLSPACING="3" SUMMARY="">
230  <TR ALIGN="center" VALIGN="top">
231  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <A HREF="../../../../../overview-summary.html"><FONT CLASS="NavBarFont1"><B>Overview</B></FONT></A>&nbsp;</TD>
232  <TD BGCOLOR="#FFFFFF" CLASS="NavBarCell1Rev"> &nbsp;<FONT CLASS="NavBarFont1Rev"><B>Package</B></FONT>&nbsp;</TD>
233  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <FONT CLASS="NavBarFont1">Class</FONT>&nbsp;</TD>
234  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <A HREF="package-use.html"><FONT CLASS="NavBarFont1"><B>Use</B></FONT></A>&nbsp;</TD>
235  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <A HREF="package-tree.html"><FONT CLASS="NavBarFont1"><B>Tree</B></FONT></A>&nbsp;</TD>
236  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <A HREF="../../../../../deprecated-list.html"><FONT CLASS="NavBarFont1"><B>Deprecated</B></FONT></A>&nbsp;</TD>
237  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <A HREF="../../../../../index-all.html"><FONT CLASS="NavBarFont1"><B>Index</B></FONT></A>&nbsp;</TD>
238  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <A HREF="../../../../../help-doc.html"><FONT CLASS="NavBarFont1"><B>Help</B></FONT></A>&nbsp;</TD>
239  </TR>
240</TABLE>
241</TD>
242<TD ALIGN="right" VALIGN="top" ROWSPAN=3><EM>
243</EM>
244</TD>
245</TR>
246
247<TR>
248<TD BGCOLOR="white" CLASS="NavBarCell2"><FONT SIZE="-2">
249&nbsp;<A HREF="../../../../../org/apache/hadoop/mapred/lib/db/package-summary.html"><B>PREV PACKAGE</B></A>&nbsp;
250&nbsp;<A HREF="../../../../../org/apache/hadoop/mapred/tools/package-summary.html"><B>NEXT PACKAGE</B></A></FONT></TD>
251<TD BGCOLOR="white" CLASS="NavBarCell2"><FONT SIZE="-2">
252  <A HREF="../../../../../index.html?org/apache/hadoop/mapred/pipes/package-summary.html" target="_top"><B>FRAMES</B></A>  &nbsp;
253&nbsp;<A HREF="package-summary.html" target="_top"><B>NO FRAMES</B></A>  &nbsp;
254&nbsp;<SCRIPT type="text/javascript">
255  <!--
256  if(window==top) {
257    document.writeln('<A HREF="../../../../../allclasses-noframe.html"><B>All Classes</B></A>');
258  }
259  //-->
260</SCRIPT>
261<NOSCRIPT>
262  <A HREF="../../../../../allclasses-noframe.html"><B>All Classes</B></A>
263</NOSCRIPT>
264
265
266</FONT></TD>
267</TR>
268</TABLE>
269<A NAME="skip-navbar_bottom"></A>
270<!-- ======== END OF BOTTOM NAVBAR ======= -->
271
272<HR>
273Copyright &copy; 2009 The Apache Software Foundation
274</BODY>
275</HTML>
Note: See TracBrowser for help on using the repository browser.