source: proiecte/HadoopJUnit/hadoop-0.20.1/docs/api/org/apache/hadoop/mapred/join/package-summary.html @ 120

Last change on this file since 120 was 120, checked in by (none), 14 years ago

Added the mail files for the Hadoop JUNit Project

  • Property svn:executable set to *
File size: 17.6 KB
Line 
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
2<!--NewPage-->
3<HTML>
4<HEAD>
5<!-- Generated by javadoc (build 1.6.0_07) on Tue Sep 01 20:56:59 UTC 2009 -->
6<TITLE>
7org.apache.hadoop.mapred.join (Hadoop 0.20.1 API)
8</TITLE>
9
10<META NAME="date" CONTENT="2009-09-01">
11
12<LINK REL ="stylesheet" TYPE="text/css" HREF="../../../../../stylesheet.css" TITLE="Style">
13
14<SCRIPT type="text/javascript">
15function windowTitle()
16{
17    if (location.href.indexOf('is-external=true') == -1) {
18        parent.document.title="org.apache.hadoop.mapred.join (Hadoop 0.20.1 API)";
19    }
20}
21</SCRIPT>
22<NOSCRIPT>
23</NOSCRIPT>
24
25</HEAD>
26
27<BODY BGCOLOR="white" onload="windowTitle();">
28<HR>
29
30
31<!-- ========= START OF TOP NAVBAR ======= -->
32<A NAME="navbar_top"><!-- --></A>
33<A HREF="#skip-navbar_top" title="Skip navigation links"></A>
34<TABLE BORDER="0" WIDTH="100%" CELLPADDING="1" CELLSPACING="0" SUMMARY="">
35<TR>
36<TD COLSPAN=2 BGCOLOR="#EEEEFF" CLASS="NavBarCell1">
37<A NAME="navbar_top_firstrow"><!-- --></A>
38<TABLE BORDER="0" CELLPADDING="0" CELLSPACING="3" SUMMARY="">
39  <TR ALIGN="center" VALIGN="top">
40  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <A HREF="../../../../../overview-summary.html"><FONT CLASS="NavBarFont1"><B>Overview</B></FONT></A>&nbsp;</TD>
41  <TD BGCOLOR="#FFFFFF" CLASS="NavBarCell1Rev"> &nbsp;<FONT CLASS="NavBarFont1Rev"><B>Package</B></FONT>&nbsp;</TD>
42  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <FONT CLASS="NavBarFont1">Class</FONT>&nbsp;</TD>
43  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <A HREF="package-use.html"><FONT CLASS="NavBarFont1"><B>Use</B></FONT></A>&nbsp;</TD>
44  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <A HREF="package-tree.html"><FONT CLASS="NavBarFont1"><B>Tree</B></FONT></A>&nbsp;</TD>
45  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <A HREF="../../../../../deprecated-list.html"><FONT CLASS="NavBarFont1"><B>Deprecated</B></FONT></A>&nbsp;</TD>
46  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <A HREF="../../../../../index-all.html"><FONT CLASS="NavBarFont1"><B>Index</B></FONT></A>&nbsp;</TD>
47  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <A HREF="../../../../../help-doc.html"><FONT CLASS="NavBarFont1"><B>Help</B></FONT></A>&nbsp;</TD>
48  </TR>
49</TABLE>
50</TD>
51<TD ALIGN="right" VALIGN="top" ROWSPAN=3><EM>
52</EM>
53</TD>
54</TR>
55
56<TR>
57<TD BGCOLOR="white" CLASS="NavBarCell2"><FONT SIZE="-2">
58&nbsp;<A HREF="../../../../../org/apache/hadoop/mapred/jobcontrol/package-summary.html"><B>PREV PACKAGE</B></A>&nbsp;
59&nbsp;<A HREF="../../../../../org/apache/hadoop/mapred/lib/package-summary.html"><B>NEXT PACKAGE</B></A></FONT></TD>
60<TD BGCOLOR="white" CLASS="NavBarCell2"><FONT SIZE="-2">
61  <A HREF="../../../../../index.html?org/apache/hadoop/mapred/join/package-summary.html" target="_top"><B>FRAMES</B></A>  &nbsp;
62&nbsp;<A HREF="package-summary.html" target="_top"><B>NO FRAMES</B></A>  &nbsp;
63&nbsp;<SCRIPT type="text/javascript">
64  <!--
65  if(window==top) {
66    document.writeln('<A HREF="../../../../../allclasses-noframe.html"><B>All Classes</B></A>');
67  }
68  //-->
69</SCRIPT>
70<NOSCRIPT>
71  <A HREF="../../../../../allclasses-noframe.html"><B>All Classes</B></A>
72</NOSCRIPT>
73
74
75</FONT></TD>
76</TR>
77</TABLE>
78<A NAME="skip-navbar_top"></A>
79<!-- ========= END OF TOP NAVBAR ========= -->
80
81<HR>
82<H2>
83Package org.apache.hadoop.mapred.join
84</H2>
85Given a set of sorted datasets keyed with the same class and yielding equal
86partitions, it is possible to effect a join of those datasets prior to the map.
87<P>
88<B>See:</B>
89<BR>
90&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<A HREF="#package_description"><B>Description</B></A>
91<P>
92
93<TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY="">
94<TR BGCOLOR="#CCCCFF" CLASS="TableHeadingColor">
95<TH ALIGN="left" COLSPAN="2"><FONT SIZE="+2">
96<B>Interface Summary</B></FONT></TH>
97</TR>
98<TR BGCOLOR="white" CLASS="TableRowColor">
99<TD WIDTH="15%"><B><A HREF="../../../../../org/apache/hadoop/mapred/join/ComposableInputFormat.html" title="interface in org.apache.hadoop.mapred.join">ComposableInputFormat&lt;K extends WritableComparable,V extends Writable&gt;</A></B></TD>
100<TD>Refinement of InputFormat requiring implementors to provide
101 ComposableRecordReader instead of RecordReader.</TD>
102</TR>
103<TR BGCOLOR="white" CLASS="TableRowColor">
104<TD WIDTH="15%"><B><A HREF="../../../../../org/apache/hadoop/mapred/join/ComposableRecordReader.html" title="interface in org.apache.hadoop.mapred.join">ComposableRecordReader&lt;K extends WritableComparable,V extends Writable&gt;</A></B></TD>
105<TD>Additional operations required of a RecordReader to participate in a join.</TD>
106</TR>
107<TR BGCOLOR="white" CLASS="TableRowColor">
108<TD WIDTH="15%"><B><A HREF="../../../../../org/apache/hadoop/mapred/join/ResetableIterator.html" title="interface in org.apache.hadoop.mapred.join">ResetableIterator&lt;T extends Writable&gt;</A></B></TD>
109<TD>This defines an interface to a stateful Iterator that can replay elements
110 added to it directly.</TD>
111</TR>
112</TABLE>
113&nbsp;
114
115<P>
116
117<TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY="">
118<TR BGCOLOR="#CCCCFF" CLASS="TableHeadingColor">
119<TH ALIGN="left" COLSPAN="2"><FONT SIZE="+2">
120<B>Class Summary</B></FONT></TH>
121</TR>
122<TR BGCOLOR="white" CLASS="TableRowColor">
123<TD WIDTH="15%"><B><A HREF="../../../../../org/apache/hadoop/mapred/join/ArrayListBackedIterator.html" title="class in org.apache.hadoop.mapred.join">ArrayListBackedIterator&lt;X extends Writable&gt;</A></B></TD>
124<TD>This class provides an implementation of ResetableIterator.</TD>
125</TR>
126<TR BGCOLOR="white" CLASS="TableRowColor">
127<TD WIDTH="15%"><B><A HREF="../../../../../org/apache/hadoop/mapred/join/CompositeInputFormat.html" title="class in org.apache.hadoop.mapred.join">CompositeInputFormat&lt;K extends WritableComparable&gt;</A></B></TD>
128<TD>An InputFormat capable of performing joins over a set of data sources sorted
129 and partitioned the same way.</TD>
130</TR>
131<TR BGCOLOR="white" CLASS="TableRowColor">
132<TD WIDTH="15%"><B><A HREF="../../../../../org/apache/hadoop/mapred/join/CompositeInputSplit.html" title="class in org.apache.hadoop.mapred.join">CompositeInputSplit</A></B></TD>
133<TD>This InputSplit contains a set of child InputSplits.</TD>
134</TR>
135<TR BGCOLOR="white" CLASS="TableRowColor">
136<TD WIDTH="15%"><B><A HREF="../../../../../org/apache/hadoop/mapred/join/CompositeRecordReader.html" title="class in org.apache.hadoop.mapred.join">CompositeRecordReader&lt;K extends WritableComparable,V extends Writable,X extends Writable&gt;</A></B></TD>
137<TD>A RecordReader that can effect joins of RecordReaders sharing a common key
138 type and partitioning.</TD>
139</TR>
140<TR BGCOLOR="white" CLASS="TableRowColor">
141<TD WIDTH="15%"><B><A HREF="../../../../../org/apache/hadoop/mapred/join/InnerJoinRecordReader.html" title="class in org.apache.hadoop.mapred.join">InnerJoinRecordReader&lt;K extends WritableComparable&gt;</A></B></TD>
142<TD>Full inner join.</TD>
143</TR>
144<TR BGCOLOR="white" CLASS="TableRowColor">
145<TD WIDTH="15%"><B><A HREF="../../../../../org/apache/hadoop/mapred/join/JoinRecordReader.html" title="class in org.apache.hadoop.mapred.join">JoinRecordReader&lt;K extends WritableComparable&gt;</A></B></TD>
146<TD>Base class for Composite joins returning Tuples of arbitrary Writables.</TD>
147</TR>
148<TR BGCOLOR="white" CLASS="TableRowColor">
149<TD WIDTH="15%"><B><A HREF="../../../../../org/apache/hadoop/mapred/join/MultiFilterRecordReader.html" title="class in org.apache.hadoop.mapred.join">MultiFilterRecordReader&lt;K extends WritableComparable,V extends Writable&gt;</A></B></TD>
150<TD>Base class for Composite join returning values derived from multiple
151 sources, but generally not tuples.</TD>
152</TR>
153<TR BGCOLOR="white" CLASS="TableRowColor">
154<TD WIDTH="15%"><B><A HREF="../../../../../org/apache/hadoop/mapred/join/OuterJoinRecordReader.html" title="class in org.apache.hadoop.mapred.join">OuterJoinRecordReader&lt;K extends WritableComparable&gt;</A></B></TD>
155<TD>Full outer join.</TD>
156</TR>
157<TR BGCOLOR="white" CLASS="TableRowColor">
158<TD WIDTH="15%"><B><A HREF="../../../../../org/apache/hadoop/mapred/join/OverrideRecordReader.html" title="class in org.apache.hadoop.mapred.join">OverrideRecordReader&lt;K extends WritableComparable,V extends Writable&gt;</A></B></TD>
159<TD>Prefer the &quot;rightmost&quot; data source for this key.</TD>
160</TR>
161<TR BGCOLOR="white" CLASS="TableRowColor">
162<TD WIDTH="15%"><B><A HREF="../../../../../org/apache/hadoop/mapred/join/Parser.html" title="class in org.apache.hadoop.mapred.join">Parser</A></B></TD>
163<TD>Very simple shift-reduce parser for join expressions.</TD>
164</TR>
165<TR BGCOLOR="white" CLASS="TableRowColor">
166<TD WIDTH="15%"><B><A HREF="../../../../../org/apache/hadoop/mapred/join/Parser.Node.html" title="class in org.apache.hadoop.mapred.join">Parser.Node</A></B></TD>
167<TD>&nbsp;</TD>
168</TR>
169<TR BGCOLOR="white" CLASS="TableRowColor">
170<TD WIDTH="15%"><B><A HREF="../../../../../org/apache/hadoop/mapred/join/Parser.NodeToken.html" title="class in org.apache.hadoop.mapred.join">Parser.NodeToken</A></B></TD>
171<TD>&nbsp;</TD>
172</TR>
173<TR BGCOLOR="white" CLASS="TableRowColor">
174<TD WIDTH="15%"><B><A HREF="../../../../../org/apache/hadoop/mapred/join/Parser.NumToken.html" title="class in org.apache.hadoop.mapred.join">Parser.NumToken</A></B></TD>
175<TD>&nbsp;</TD>
176</TR>
177<TR BGCOLOR="white" CLASS="TableRowColor">
178<TD WIDTH="15%"><B><A HREF="../../../../../org/apache/hadoop/mapred/join/Parser.StrToken.html" title="class in org.apache.hadoop.mapred.join">Parser.StrToken</A></B></TD>
179<TD>&nbsp;</TD>
180</TR>
181<TR BGCOLOR="white" CLASS="TableRowColor">
182<TD WIDTH="15%"><B><A HREF="../../../../../org/apache/hadoop/mapred/join/Parser.Token.html" title="class in org.apache.hadoop.mapred.join">Parser.Token</A></B></TD>
183<TD>Tagged-union type for tokens from the join expression.</TD>
184</TR>
185<TR BGCOLOR="white" CLASS="TableRowColor">
186<TD WIDTH="15%"><B><A HREF="../../../../../org/apache/hadoop/mapred/join/ResetableIterator.EMPTY.html" title="class in org.apache.hadoop.mapred.join">ResetableIterator.EMPTY&lt;U extends Writable&gt;</A></B></TD>
187<TD>&nbsp;</TD>
188</TR>
189<TR BGCOLOR="white" CLASS="TableRowColor">
190<TD WIDTH="15%"><B><A HREF="../../../../../org/apache/hadoop/mapred/join/StreamBackedIterator.html" title="class in org.apache.hadoop.mapred.join">StreamBackedIterator&lt;X extends Writable&gt;</A></B></TD>
191<TD>This class provides an implementation of ResetableIterator.</TD>
192</TR>
193<TR BGCOLOR="white" CLASS="TableRowColor">
194<TD WIDTH="15%"><B><A HREF="../../../../../org/apache/hadoop/mapred/join/TupleWritable.html" title="class in org.apache.hadoop.mapred.join">TupleWritable</A></B></TD>
195<TD>Writable type storing multiple <A HREF="../../../../../org/apache/hadoop/io/Writable.html" title="interface in org.apache.hadoop.io"><CODE>Writable</CODE></A>s.</TD>
196</TR>
197<TR BGCOLOR="white" CLASS="TableRowColor">
198<TD WIDTH="15%"><B><A HREF="../../../../../org/apache/hadoop/mapred/join/WrappedRecordReader.html" title="class in org.apache.hadoop.mapred.join">WrappedRecordReader&lt;K extends WritableComparable,U extends Writable&gt;</A></B></TD>
199<TD>Proxy class for a RecordReader participating in the join framework.</TD>
200</TR>
201</TABLE>
202&nbsp;
203
204<P>
205
206<TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY="">
207<TR BGCOLOR="#CCCCFF" CLASS="TableHeadingColor">
208<TH ALIGN="left" COLSPAN="2"><FONT SIZE="+2">
209<B>Enum Summary</B></FONT></TH>
210</TR>
211<TR BGCOLOR="white" CLASS="TableRowColor">
212<TD WIDTH="15%"><B><A HREF="../../../../../org/apache/hadoop/mapred/join/Parser.TType.html" title="enum in org.apache.hadoop.mapred.join">Parser.TType</A></B></TD>
213<TD>&nbsp;</TD>
214</TR>
215</TABLE>
216&nbsp;
217
218<P>
219<A NAME="package_description"><!-- --></A><H2>
220Package org.apache.hadoop.mapred.join Description
221</H2>
222
223<P>
224<p>Given a set of sorted datasets keyed with the same class and yielding equal
225partitions, it is possible to effect a join of those datasets prior to the map.
226This could save costs in re-partitioning, sorting, shuffling, and writing out
227data required in the general case.</p>
228
229<h3><a name="Interface"></a>Interface</h3>
230
231<p>The attached code offers the following interface to users of these
232classes.</p>
233
234<table>
235<tr><th>property</th><th>required</th><th>value</th></tr>
236<tr><td>mapred.join.expr</td><td>yes</td>
237    <td>Join expression to effect over input data</td></tr>
238<tr><td>mapred.join.keycomparator</td><td>no</td>
239    <td><tt>WritableComparator</tt> class to use for comparing keys</td></tr>
240<tr><td>mapred.join.define.&lt;ident&gt;</td><td>no</td>
241    <td>Class mapped to identifier in join expression</td></tr>
242</table>
243
244<p>The join expression understands the following grammar:</p>
245
246<pre>func ::= &lt;ident&gt;([&lt;func&gt;,]*&lt;func&gt;)
247func ::= tbl(&lt;class&gt;,"&lt;path&gt;");
248
249</pre>
250
251<p>Operations included in this patch are partitioned into one of two types:
252join operations emitting tuples and "multi-filter" operations emitting a
253single value from (but not necessarily included in) a set of input values.
254For a given key, each operation will consider the cross product of all
255values for all sources at that node.</p>
256
257<p>Identifiers supported by default:</p>
258
259<table>
260<tr><th>identifier</th><th>type</th><th>description</th></tr>
261<tr><td>inner</td><td>Join</td><td>Full inner join</td></tr>
262<tr><td>outer</td><td>Join</td><td>Full outer join</td></tr>
263<tr><td>override</td><td>MultiFilter</td>
264    <td>For a given key, prefer values from the rightmost source</td></tr>
265</table>
266
267<p>A user of this class must set the <tt>InputFormat</tt> for the job to
268<tt>CompositeInputFormat</tt> and define a join expression accepted by the
269preceding grammar. For example, both of the following are acceptable:</p>
270
271<pre>inner(tbl(org.apache.hadoop.mapred.SequenceFileInputFormat.class,
272          "hdfs://host:8020/foo/bar"),
273      tbl(org.apache.hadoop.mapred.SequenceFileInputFormat.class,
274          "hdfs://host:8020/foo/baz"))
275
276outer(override(tbl(org.apache.hadoop.mapred.SequenceFileInputFormat.class,
277                   "hdfs://host:8020/foo/bar"),
278               tbl(org.apache.hadoop.mapred.SequenceFileInputFormat.class,
279                   "hdfs://host:8020/foo/baz")),
280      tbl(org.apache.hadoop.mapred/SequenceFileInputFormat.class,
281          "hdfs://host:8020/foo/rab"))
282</pre>
283
284<p><tt>CompositeInputFormat</tt> includes a handful of convenience methods to
285aid construction of these verbose statements.</p>
286
287<p>As in the second example, joins may be nested. Users may provide a
288comparator class in the <tt>mapred.join.keycomparator</tt> property to specify
289the ordering of their keys, or accept the default comparator as returned by
290<tt>WritableComparator.get(keyclass)</tt>.</p>
291
292<p>Users can specify their own join operations, typically by overriding
293<tt>JoinRecordReader</tt> or <tt>MultiFilterRecordReader</tt> and mapping that
294class to an identifier in the join expression using the
295<tt>mapred.join.define.<em>ident</em></tt> property, where <em>ident</em> is
296the identifier appearing in the join expression. Users may elect to emit- or
297modify- values passing through their join operation. Consulting the existing
298operations for guidance is recommended. Adding arguments is considerably more
299complex (and only partially supported), as one must also add a <tt>Node</tt>
300type to the parse tree. One is probably better off extending
301<tt>RecordReader</tt> in most cases.</p>
302
303<a href="http://issues.apache.org/jira/browse/HADOOP-2085">JIRA</a>
304<P>
305
306<P>
307<DL>
308</DL>
309<HR>
310
311
312<!-- ======= START OF BOTTOM NAVBAR ====== -->
313<A NAME="navbar_bottom"><!-- --></A>
314<A HREF="#skip-navbar_bottom" title="Skip navigation links"></A>
315<TABLE BORDER="0" WIDTH="100%" CELLPADDING="1" CELLSPACING="0" SUMMARY="">
316<TR>
317<TD COLSPAN=2 BGCOLOR="#EEEEFF" CLASS="NavBarCell1">
318<A NAME="navbar_bottom_firstrow"><!-- --></A>
319<TABLE BORDER="0" CELLPADDING="0" CELLSPACING="3" SUMMARY="">
320  <TR ALIGN="center" VALIGN="top">
321  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <A HREF="../../../../../overview-summary.html"><FONT CLASS="NavBarFont1"><B>Overview</B></FONT></A>&nbsp;</TD>
322  <TD BGCOLOR="#FFFFFF" CLASS="NavBarCell1Rev"> &nbsp;<FONT CLASS="NavBarFont1Rev"><B>Package</B></FONT>&nbsp;</TD>
323  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <FONT CLASS="NavBarFont1">Class</FONT>&nbsp;</TD>
324  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <A HREF="package-use.html"><FONT CLASS="NavBarFont1"><B>Use</B></FONT></A>&nbsp;</TD>
325  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <A HREF="package-tree.html"><FONT CLASS="NavBarFont1"><B>Tree</B></FONT></A>&nbsp;</TD>
326  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <A HREF="../../../../../deprecated-list.html"><FONT CLASS="NavBarFont1"><B>Deprecated</B></FONT></A>&nbsp;</TD>
327  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <A HREF="../../../../../index-all.html"><FONT CLASS="NavBarFont1"><B>Index</B></FONT></A>&nbsp;</TD>
328  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <A HREF="../../../../../help-doc.html"><FONT CLASS="NavBarFont1"><B>Help</B></FONT></A>&nbsp;</TD>
329  </TR>
330</TABLE>
331</TD>
332<TD ALIGN="right" VALIGN="top" ROWSPAN=3><EM>
333</EM>
334</TD>
335</TR>
336
337<TR>
338<TD BGCOLOR="white" CLASS="NavBarCell2"><FONT SIZE="-2">
339&nbsp;<A HREF="../../../../../org/apache/hadoop/mapred/jobcontrol/package-summary.html"><B>PREV PACKAGE</B></A>&nbsp;
340&nbsp;<A HREF="../../../../../org/apache/hadoop/mapred/lib/package-summary.html"><B>NEXT PACKAGE</B></A></FONT></TD>
341<TD BGCOLOR="white" CLASS="NavBarCell2"><FONT SIZE="-2">
342  <A HREF="../../../../../index.html?org/apache/hadoop/mapred/join/package-summary.html" target="_top"><B>FRAMES</B></A>  &nbsp;
343&nbsp;<A HREF="package-summary.html" target="_top"><B>NO FRAMES</B></A>  &nbsp;
344&nbsp;<SCRIPT type="text/javascript">
345  <!--
346  if(window==top) {
347    document.writeln('<A HREF="../../../../../allclasses-noframe.html"><B>All Classes</B></A>');
348  }
349  //-->
350</SCRIPT>
351<NOSCRIPT>
352  <A HREF="../../../../../allclasses-noframe.html"><B>All Classes</B></A>
353</NOSCRIPT>
354
355
356</FONT></TD>
357</TR>
358</TABLE>
359<A NAME="skip-navbar_bottom"></A>
360<!-- ======== END OF BOTTOM NAVBAR ======= -->
361
362<HR>
363Copyright &copy; 2009 The Apache Software Foundation
364</BODY>
365</HTML>
Note: See TracBrowser for help on using the repository browser.