source: proiecte/HadoopJUnit/hadoop-0.20.1/docs/api/overview-summary.html @ 120

Last change on this file since 120 was 120, checked in by (none), 14 years ago

Added the mail files for the Hadoop JUNit Project

  • Property svn:executable set to *
File size: 28.4 KB
Line 
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
2<!--NewPage-->
3<HTML>
4<HEAD>
5<!-- Generated by javadoc (build 1.6.0_07) on Tue Sep 01 20:57:04 UTC 2009 -->
6<TITLE>
7Overview (Hadoop 0.20.1 API)
8</TITLE>
9
10<META NAME="date" CONTENT="2009-09-01">
11
12<LINK REL ="stylesheet" TYPE="text/css" HREF="stylesheet.css" TITLE="Style">
13
14<SCRIPT type="text/javascript">
15function windowTitle()
16{
17    if (location.href.indexOf('is-external=true') == -1) {
18        parent.document.title="Overview (Hadoop 0.20.1 API)";
19    }
20}
21</SCRIPT>
22<NOSCRIPT>
23</NOSCRIPT>
24
25</HEAD>
26
27<BODY BGCOLOR="white" onload="windowTitle();">
28<HR>
29
30
31<!-- ========= START OF TOP NAVBAR ======= -->
32<A NAME="navbar_top"><!-- --></A>
33<A HREF="#skip-navbar_top" title="Skip navigation links"></A>
34<TABLE BORDER="0" WIDTH="100%" CELLPADDING="1" CELLSPACING="0" SUMMARY="">
35<TR>
36<TD COLSPAN=2 BGCOLOR="#EEEEFF" CLASS="NavBarCell1">
37<A NAME="navbar_top_firstrow"><!-- --></A>
38<TABLE BORDER="0" CELLPADDING="0" CELLSPACING="3" SUMMARY="">
39  <TR ALIGN="center" VALIGN="top">
40  <TD BGCOLOR="#FFFFFF" CLASS="NavBarCell1Rev"> &nbsp;<FONT CLASS="NavBarFont1Rev"><B>Overview</B></FONT>&nbsp;</TD>
41  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <FONT CLASS="NavBarFont1">Package</FONT>&nbsp;</TD>
42  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <FONT CLASS="NavBarFont1">Class</FONT>&nbsp;</TD>
43  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <FONT CLASS="NavBarFont1">Use</FONT>&nbsp;</TD>
44  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <A HREF="overview-tree.html"><FONT CLASS="NavBarFont1"><B>Tree</B></FONT></A>&nbsp;</TD>
45  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <A HREF="deprecated-list.html"><FONT CLASS="NavBarFont1"><B>Deprecated</B></FONT></A>&nbsp;</TD>
46  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <A HREF="index-all.html"><FONT CLASS="NavBarFont1"><B>Index</B></FONT></A>&nbsp;</TD>
47  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <A HREF="help-doc.html"><FONT CLASS="NavBarFont1"><B>Help</B></FONT></A>&nbsp;</TD>
48  </TR>
49</TABLE>
50</TD>
51<TD ALIGN="right" VALIGN="top" ROWSPAN=3><EM>
52</EM>
53</TD>
54</TR>
55
56<TR>
57<TD BGCOLOR="white" CLASS="NavBarCell2"><FONT SIZE="-2">
58&nbsp;PREV&nbsp;
59&nbsp;NEXT</FONT></TD>
60<TD BGCOLOR="white" CLASS="NavBarCell2"><FONT SIZE="-2">
61  <A HREF="index.html?overview-summary.html" target="_top"><B>FRAMES</B></A>  &nbsp;
62&nbsp;<A HREF="overview-summary.html" target="_top"><B>NO FRAMES</B></A>  &nbsp;
63&nbsp;<SCRIPT type="text/javascript">
64  <!--
65  if(window==top) {
66    document.writeln('<A HREF="allclasses-noframe.html"><B>All Classes</B></A>');
67  }
68  //-->
69</SCRIPT>
70<NOSCRIPT>
71  <A HREF="allclasses-noframe.html"><B>All Classes</B></A>
72</NOSCRIPT>
73
74
75</FONT></TD>
76</TR>
77</TABLE>
78<A NAME="skip-navbar_top"></A>
79<!-- ========= END OF TOP NAVBAR ========= -->
80
81<HR>
82<CENTER>
83<H1>
84Hadoop 0.20.1 API
85</H1>
86</CENTER>
87Hadoop is a distributed computing platform.
88<P>
89<B>See:</B>
90<BR>
91&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<A HREF="#overview_description"><B>Description</B></A>
92<P>
93
94<TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY="">
95<TR BGCOLOR="#CCCCFF" CLASS="TableHeadingColor">
96<TH ALIGN="left" COLSPAN="2"><FONT SIZE="+2">
97<B>Core</B></FONT></TH>
98</TR>
99<TR BGCOLOR="white" CLASS="TableRowColor">
100<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/package-summary.html">org.apache.hadoop</A></B></TD>
101<TD>&nbsp;</TD>
102</TR>
103<TR BGCOLOR="white" CLASS="TableRowColor">
104<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/conf/package-summary.html">org.apache.hadoop.conf</A></B></TD>
105<TD>Configuration of system parameters.</TD>
106</TR>
107<TR BGCOLOR="white" CLASS="TableRowColor">
108<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/filecache/package-summary.html">org.apache.hadoop.filecache</A></B></TD>
109<TD>&nbsp;</TD>
110</TR>
111<TR BGCOLOR="white" CLASS="TableRowColor">
112<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/fs/package-summary.html">org.apache.hadoop.fs</A></B></TD>
113<TD>An abstract file system API.</TD>
114</TR>
115<TR BGCOLOR="white" CLASS="TableRowColor">
116<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/fs/ftp/package-summary.html">org.apache.hadoop.fs.ftp</A></B></TD>
117<TD>&nbsp;</TD>
118</TR>
119<TR BGCOLOR="white" CLASS="TableRowColor">
120<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/fs/kfs/package-summary.html">org.apache.hadoop.fs.kfs</A></B></TD>
121<TD>A client for the Kosmos filesystem (KFS)</TD>
122</TR>
123<TR BGCOLOR="white" CLASS="TableRowColor">
124<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/fs/permission/package-summary.html">org.apache.hadoop.fs.permission</A></B></TD>
125<TD>&nbsp;</TD>
126</TR>
127<TR BGCOLOR="white" CLASS="TableRowColor">
128<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/fs/s3/package-summary.html">org.apache.hadoop.fs.s3</A></B></TD>
129<TD>A distributed, block-based implementation of <A HREF="org/apache/hadoop/fs/FileSystem.html" title="class in org.apache.hadoop.fs"><CODE>FileSystem</CODE></A> that uses <a href="http://aws.amazon.com/s3">Amazon S3</a>
130as a backing store.</TD>
131</TR>
132<TR BGCOLOR="white" CLASS="TableRowColor">
133<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/fs/s3native/package-summary.html">org.apache.hadoop.fs.s3native</A></B></TD>
134<TD>
135A distributed implementation of <A HREF="org/apache/hadoop/fs/FileSystem.html" title="class in org.apache.hadoop.fs"><CODE>FileSystem</CODE></A> for reading and writing files on
136<a href="http://aws.amazon.com/s3">Amazon S3</a>.</TD>
137</TR>
138<TR BGCOLOR="white" CLASS="TableRowColor">
139<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/fs/shell/package-summary.html">org.apache.hadoop.fs.shell</A></B></TD>
140<TD>&nbsp;</TD>
141</TR>
142<TR BGCOLOR="white" CLASS="TableRowColor">
143<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/http/package-summary.html">org.apache.hadoop.http</A></B></TD>
144<TD>&nbsp;</TD>
145</TR>
146<TR BGCOLOR="white" CLASS="TableRowColor">
147<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/io/package-summary.html">org.apache.hadoop.io</A></B></TD>
148<TD>Generic i/o code for use when reading and writing data to the network,
149to databases, and to files.</TD>
150</TR>
151<TR BGCOLOR="white" CLASS="TableRowColor">
152<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/io/compress/package-summary.html">org.apache.hadoop.io.compress</A></B></TD>
153<TD>&nbsp;</TD>
154</TR>
155<TR BGCOLOR="white" CLASS="TableRowColor">
156<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/io/compress/bzip2/package-summary.html">org.apache.hadoop.io.compress.bzip2</A></B></TD>
157<TD>&nbsp;</TD>
158</TR>
159<TR BGCOLOR="white" CLASS="TableRowColor">
160<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/io/compress/zlib/package-summary.html">org.apache.hadoop.io.compress.zlib</A></B></TD>
161<TD>&nbsp;</TD>
162</TR>
163<TR BGCOLOR="white" CLASS="TableRowColor">
164<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/io/file/tfile/package-summary.html">org.apache.hadoop.io.file.tfile</A></B></TD>
165<TD>&nbsp;</TD>
166</TR>
167<TR BGCOLOR="white" CLASS="TableRowColor">
168<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/io/retry/package-summary.html">org.apache.hadoop.io.retry</A></B></TD>
169<TD>
170A mechanism for selectively retrying methods that throw exceptions under certain circumstances.</TD>
171</TR>
172<TR BGCOLOR="white" CLASS="TableRowColor">
173<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/io/serializer/package-summary.html">org.apache.hadoop.io.serializer</A></B></TD>
174<TD>
175This package provides a mechanism for using different serialization frameworks
176in Hadoop.</TD>
177</TR>
178<TR BGCOLOR="white" CLASS="TableRowColor">
179<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/ipc/package-summary.html">org.apache.hadoop.ipc</A></B></TD>
180<TD>Tools to help define network clients and servers.</TD>
181</TR>
182<TR BGCOLOR="white" CLASS="TableRowColor">
183<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/ipc/metrics/package-summary.html">org.apache.hadoop.ipc.metrics</A></B></TD>
184<TD>&nbsp;</TD>
185</TR>
186<TR BGCOLOR="white" CLASS="TableRowColor">
187<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/log/package-summary.html">org.apache.hadoop.log</A></B></TD>
188<TD>&nbsp;</TD>
189</TR>
190<TR BGCOLOR="white" CLASS="TableRowColor">
191<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/mapred/package-summary.html">org.apache.hadoop.mapred</A></B></TD>
192<TD>A software framework for easily writing applications which process vast
193amounts of data (multi-terabyte data-sets) parallelly on large clusters
194(thousands of nodes) built of commodity hardware in a reliable, fault-tolerant
195manner.</TD>
196</TR>
197<TR BGCOLOR="white" CLASS="TableRowColor">
198<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/mapred/jobcontrol/package-summary.html">org.apache.hadoop.mapred.jobcontrol</A></B></TD>
199<TD>Utilities for managing dependent jobs.</TD>
200</TR>
201<TR BGCOLOR="white" CLASS="TableRowColor">
202<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/mapred/join/package-summary.html">org.apache.hadoop.mapred.join</A></B></TD>
203<TD>Given a set of sorted datasets keyed with the same class and yielding equal
204partitions, it is possible to effect a join of those datasets prior to the map.</TD>
205</TR>
206<TR BGCOLOR="white" CLASS="TableRowColor">
207<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/mapred/lib/package-summary.html">org.apache.hadoop.mapred.lib</A></B></TD>
208<TD>Library of generally useful mappers, reducers, and partitioners.</TD>
209</TR>
210<TR BGCOLOR="white" CLASS="TableRowColor">
211<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/mapred/lib/aggregate/package-summary.html">org.apache.hadoop.mapred.lib.aggregate</A></B></TD>
212<TD>Classes for performing various counting and aggregations.</TD>
213</TR>
214<TR BGCOLOR="white" CLASS="TableRowColor">
215<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/mapred/lib/db/package-summary.html">org.apache.hadoop.mapred.lib.db</A></B></TD>
216<TD>org.apache.hadoop.mapred.lib.db Package</TD>
217</TR>
218<TR BGCOLOR="white" CLASS="TableRowColor">
219<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/mapred/pipes/package-summary.html">org.apache.hadoop.mapred.pipes</A></B></TD>
220<TD>Hadoop Pipes allows C++ code to use Hadoop DFS and map/reduce.</TD>
221</TR>
222<TR BGCOLOR="white" CLASS="TableRowColor">
223<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/mapred/tools/package-summary.html">org.apache.hadoop.mapred.tools</A></B></TD>
224<TD>&nbsp;</TD>
225</TR>
226<TR BGCOLOR="white" CLASS="TableRowColor">
227<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/mapreduce/package-summary.html">org.apache.hadoop.mapreduce</A></B></TD>
228<TD>&nbsp;</TD>
229</TR>
230<TR BGCOLOR="white" CLASS="TableRowColor">
231<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/mapreduce/lib/input/package-summary.html">org.apache.hadoop.mapreduce.lib.input</A></B></TD>
232<TD>&nbsp;</TD>
233</TR>
234<TR BGCOLOR="white" CLASS="TableRowColor">
235<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/mapreduce/lib/map/package-summary.html">org.apache.hadoop.mapreduce.lib.map</A></B></TD>
236<TD>&nbsp;</TD>
237</TR>
238<TR BGCOLOR="white" CLASS="TableRowColor">
239<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/mapreduce/lib/output/package-summary.html">org.apache.hadoop.mapreduce.lib.output</A></B></TD>
240<TD>&nbsp;</TD>
241</TR>
242<TR BGCOLOR="white" CLASS="TableRowColor">
243<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/mapreduce/lib/partition/package-summary.html">org.apache.hadoop.mapreduce.lib.partition</A></B></TD>
244<TD>&nbsp;</TD>
245</TR>
246<TR BGCOLOR="white" CLASS="TableRowColor">
247<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/mapreduce/lib/reduce/package-summary.html">org.apache.hadoop.mapreduce.lib.reduce</A></B></TD>
248<TD>&nbsp;</TD>
249</TR>
250<TR BGCOLOR="white" CLASS="TableRowColor">
251<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/metrics/package-summary.html">org.apache.hadoop.metrics</A></B></TD>
252<TD>This package defines an API for reporting performance metric information.</TD>
253</TR>
254<TR BGCOLOR="white" CLASS="TableRowColor">
255<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/metrics/file/package-summary.html">org.apache.hadoop.metrics.file</A></B></TD>
256<TD>Implementation of the metrics package that writes the metrics to a file.</TD>
257</TR>
258<TR BGCOLOR="white" CLASS="TableRowColor">
259<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/metrics/ganglia/package-summary.html">org.apache.hadoop.metrics.ganglia</A></B></TD>
260<TD>Implementation of the metrics package that sends metric data to
261<a href="http://ganglia.sourceforge.net/">Ganglia</a>.</TD>
262</TR>
263<TR BGCOLOR="white" CLASS="TableRowColor">
264<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/metrics/jvm/package-summary.html">org.apache.hadoop.metrics.jvm</A></B></TD>
265<TD>&nbsp;</TD>
266</TR>
267<TR BGCOLOR="white" CLASS="TableRowColor">
268<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/metrics/spi/package-summary.html">org.apache.hadoop.metrics.spi</A></B></TD>
269<TD>The Service Provider Interface for the Metrics API.</TD>
270</TR>
271<TR BGCOLOR="white" CLASS="TableRowColor">
272<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/metrics/util/package-summary.html">org.apache.hadoop.metrics.util</A></B></TD>
273<TD>&nbsp;</TD>
274</TR>
275<TR BGCOLOR="white" CLASS="TableRowColor">
276<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/net/package-summary.html">org.apache.hadoop.net</A></B></TD>
277<TD>Network-related classes.</TD>
278</TR>
279<TR BGCOLOR="white" CLASS="TableRowColor">
280<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/record/package-summary.html">org.apache.hadoop.record</A></B></TD>
281<TD>Hadoop record I/O contains classes and a record description language
282  translator for simplifying serialization and deserialization of records in a
283  language-neutral manner.</TD>
284</TR>
285<TR BGCOLOR="white" CLASS="TableRowColor">
286<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/record/compiler/package-summary.html">org.apache.hadoop.record.compiler</A></B></TD>
287<TD>This package contains classes needed for code generation
288  from the hadoop record compiler.</TD>
289</TR>
290<TR BGCOLOR="white" CLASS="TableRowColor">
291<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/record/compiler/ant/package-summary.html">org.apache.hadoop.record.compiler.ant</A></B></TD>
292<TD>&nbsp;</TD>
293</TR>
294<TR BGCOLOR="white" CLASS="TableRowColor">
295<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/record/compiler/generated/package-summary.html">org.apache.hadoop.record.compiler.generated</A></B></TD>
296<TD>This package contains code generated by JavaCC from the
297  Hadoop record syntax file rcc.jj.</TD>
298</TR>
299<TR BGCOLOR="white" CLASS="TableRowColor">
300<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/record/meta/package-summary.html">org.apache.hadoop.record.meta</A></B></TD>
301<TD>&nbsp;</TD>
302</TR>
303<TR BGCOLOR="white" CLASS="TableRowColor">
304<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/security/package-summary.html">org.apache.hadoop.security</A></B></TD>
305<TD>&nbsp;</TD>
306</TR>
307<TR BGCOLOR="white" CLASS="TableRowColor">
308<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/security/authorize/package-summary.html">org.apache.hadoop.security.authorize</A></B></TD>
309<TD>&nbsp;</TD>
310</TR>
311<TR BGCOLOR="white" CLASS="TableRowColor">
312<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/util/package-summary.html">org.apache.hadoop.util</A></B></TD>
313<TD>Common utilities.</TD>
314</TR>
315<TR BGCOLOR="white" CLASS="TableRowColor">
316<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/util/bloom/package-summary.html">org.apache.hadoop.util.bloom</A></B></TD>
317<TD>&nbsp;</TD>
318</TR>
319<TR BGCOLOR="white" CLASS="TableRowColor">
320<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/util/hash/package-summary.html">org.apache.hadoop.util.hash</A></B></TD>
321<TD>&nbsp;</TD>
322</TR>
323</TABLE>
324
325<P>
326&nbsp;
327<TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY="">
328<TR BGCOLOR="#CCCCFF" CLASS="TableHeadingColor">
329<TH ALIGN="left" COLSPAN="2"><FONT SIZE="+2">
330<B>Examples</B></FONT></TH>
331</TR>
332<TR BGCOLOR="white" CLASS="TableRowColor">
333<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/examples/package-summary.html">org.apache.hadoop.examples</A></B></TD>
334<TD>Hadoop example code.</TD>
335</TR>
336<TR BGCOLOR="white" CLASS="TableRowColor">
337<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/examples/dancing/package-summary.html">org.apache.hadoop.examples.dancing</A></B></TD>
338<TD>This package is a distributed implementation of Knuth's <a
339href="http://en.wikipedia.org/wiki/Dancing_Links">dancing links</a>
340algorithm that can run under Hadoop.</TD>
341</TR>
342<TR BGCOLOR="white" CLASS="TableRowColor">
343<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/examples/terasort/package-summary.html">org.apache.hadoop.examples.terasort</A></B></TD>
344<TD>This package consists of 3 map/reduce applications for Hadoop to
345compete in the annual <a
346href="http://www.hpl.hp.com/hosted/sortbenchmark" target="_top">terabyte sort</a>
347competition.</TD>
348</TR>
349</TABLE>
350
351<P>
352&nbsp;
353<TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY="">
354<TR BGCOLOR="#CCCCFF" CLASS="TableHeadingColor">
355<TH ALIGN="left" COLSPAN="2"><FONT SIZE="+2">
356<B>contrib: Streaming</B></FONT></TH>
357</TR>
358<TR BGCOLOR="white" CLASS="TableRowColor">
359<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/streaming/package-summary.html">org.apache.hadoop.streaming</A></B></TD>
360<TD><tt>Hadoop Streaming</tt> is a utility which allows users to create and run
361Map-Reduce jobs with any executables (e.g.</TD>
362</TR>
363</TABLE>
364
365<P>
366&nbsp;
367<TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY="">
368<TR BGCOLOR="#CCCCFF" CLASS="TableHeadingColor">
369<TH ALIGN="left" COLSPAN="2"><FONT SIZE="+2">
370<B>contrib: DataJoin</B></FONT></TH>
371</TR>
372<TR BGCOLOR="white" CLASS="TableRowColor">
373<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/contrib/utils/join/package-summary.html">org.apache.hadoop.contrib.utils.join</A></B></TD>
374<TD>&nbsp;</TD>
375</TR>
376</TABLE>
377
378<P>
379&nbsp;
380<TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY="">
381<TR BGCOLOR="#CCCCFF" CLASS="TableHeadingColor">
382<TH ALIGN="left" COLSPAN="2"><FONT SIZE="+2">
383<B>contrib: Index</B></FONT></TH>
384</TR>
385<TR BGCOLOR="white" CLASS="TableRowColor">
386<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/contrib/index/example/package-summary.html">org.apache.hadoop.contrib.index.example</A></B></TD>
387<TD>&nbsp;</TD>
388</TR>
389<TR BGCOLOR="white" CLASS="TableRowColor">
390<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/contrib/index/lucene/package-summary.html">org.apache.hadoop.contrib.index.lucene</A></B></TD>
391<TD>&nbsp;</TD>
392</TR>
393<TR BGCOLOR="white" CLASS="TableRowColor">
394<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/contrib/index/main/package-summary.html">org.apache.hadoop.contrib.index.main</A></B></TD>
395<TD>&nbsp;</TD>
396</TR>
397<TR BGCOLOR="white" CLASS="TableRowColor">
398<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/contrib/index/mapred/package-summary.html">org.apache.hadoop.contrib.index.mapred</A></B></TD>
399<TD>&nbsp;</TD>
400</TR>
401</TABLE>
402
403<P>
404&nbsp;
405<TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY="">
406<TR BGCOLOR="#CCCCFF" CLASS="TableHeadingColor">
407<TH ALIGN="left" COLSPAN="2"><FONT SIZE="+2">
408<B>contrib: FailMon</B></FONT></TH>
409</TR>
410<TR BGCOLOR="white" CLASS="TableRowColor">
411<TD WIDTH="20%"><B><A HREF="org/apache/hadoop/contrib/failmon/package-summary.html">org.apache.hadoop.contrib.failmon</A></B></TD>
412<TD>&nbsp;</TD>
413</TR>
414</TABLE>
415
416<P>
417&nbsp;<A NAME="overview_description"><!-- --></A>
418<P>
419Hadoop is a distributed computing platform.
420
421<p>Hadoop primarily consists of the <a 
422href="org/apache/hadoop/hdfs/package-summary.html">Hadoop Distributed FileSystem
423(HDFS)</a> and an
424implementation of the <a href="org/apache/hadoop/mapred/package-summary.html">
425Map-Reduce</a> programming paradigm.</p>
426
427
428<p>Hadoop is a software framework that lets one easily write and run applications
429that process vast amounts of data. Here's what makes Hadoop especially useful:</p>
430<ul>
431  <li>
432    <b>Scalable</b>: Hadoop can reliably store and process petabytes.
433  </li>
434  <li>
435    <b>Economical</b>: It distributes the data and processing across clusters
436    of commonly available computers. These clusters can number into the thousands
437    of nodes.
438  </li>
439  <li>
440    <b>Efficient</b>: By distributing the data, Hadoop can process it in parallel
441    on the nodes where the data is located. This makes it extremely rapid.
442  </li>
443  <li>
444    <b>Reliable</b>: Hadoop automatically maintains multiple copies of data and
445    automatically redeploys computing tasks based on failures.
446  </li>
447</ul> 
448
449<h2>Requirements</h2>
450
451<h3>Platforms</h3>
452
453<ul>
454  <li>
455    Hadoop was been demonstrated on GNU/Linux clusters with 2000 nodes.
456  </li>
457  <li>
458    Win32 is supported as a <i>development</i> platform. Distributed operation
459    has not been well tested on Win32, so this is not a <i>production</i> 
460    platform.
461  </li> 
462</ul>
463 
464<h3>Requisite Software</h3>
465
466<ol>
467  <li>
468    Java 1.6.x, preferably from
469    <a href="http://java.sun.com/javase/downloads/">Sun</a>.
470    Set <tt>JAVA_HOME</tt> to the root of your Java installation.
471  </li>
472  <li>
473    ssh must be installed and sshd must be running to use Hadoop's
474    scripts to manage remote Hadoop daemons.
475  </li>
476  <li>
477    rsync may be installed to use Hadoop's scripts to manage remote
478    Hadoop installations.
479  </li>
480</ol>
481
482<h4>Additional requirements for Windows</h4>
483
484<ol>
485  <li>
486    <a href="http://www.cygwin.com/">Cygwin</a> - Required for shell support in
487    addition to the required software above.
488  </li>
489</ol>
490 
491<h3>Installing Required Software</h3>
492
493<p>If your platform does not have the required software listed above, you
494will have to install it.</p>
495
496<p>For example on Ubuntu Linux:</p>
497<p><blockquote><pre>
498$ sudo apt-get install ssh<br>
499$ sudo apt-get install rsync<br>
500</pre></blockquote></p>
501
502<p>On Windows, if you did not install the required software when you
503installed cygwin, start the cygwin installer and select the packages:</p>
504<ul>
505  <li>openssh - the "Net" category</li>
506  <li>rsync - the "Net" category</li>
507</ul>
508
509<h2>Getting Started</h2>
510
511<p>First, you need to get a copy of the Hadoop code.</p>
512
513<p>Edit the file <tt>conf/hadoop-env.sh</tt> to define at least
514<tt>JAVA_HOME</tt>.</p>
515
516<p>Try the following command:</p>
517<tt>bin/hadoop</tt>
518<p>This will display the documentation for the Hadoop command script.</p>
519
520<h2>Standalone operation</h2>
521
522<p>By default, Hadoop is configured to run things in a non-distributed
523mode, as a single Java process.  This is useful for debugging, and can
524be demonstrated as follows:</p>
525<tt>
526mkdir input<br>
527cp conf/*.xml input<br>
528bin/hadoop jar hadoop-*-examples.jar grep input output 'dfs[a-z.]+'<br>
529cat output/*
530</tt>
531<p>This will display counts for each match of the <a
532href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">
533regular expression.</a></p>
534
535<p>Note that input is specified as a <em>directory</em> containing input
536files and that output is also specified as a directory where parts are
537written.</p>
538
539<h2>Distributed operation</h2>
540
541To configure Hadoop for distributed operation you must specify the
542following:
543
544<ol>
545
546<li>The NameNode (Distributed Filesystem master) host.  This is
547specified with the configuration property <tt><a
548 href="../core-default.html#fs.default.name">fs.default.name</a></tt>.
549</li>
550
551<li>The <A HREF="org/apache/hadoop/mapred/JobTracker.html" title="class in org.apache.hadoop.mapred"><CODE>JobTracker</CODE></A> (MapReduce master)
552host and port.  This is specified with the configuration property
553<tt><a
554href="../mapred-default.html#mapred.job.tracker">mapred.job.tracker</a></tt>.
555</li>
556
557<li>A <em>slaves</em> file that lists the names of all the hosts in
558the cluster.  The default slaves file is <tt>conf/slaves</tt>.
559
560</ol>
561
562<h3>Pseudo-distributed configuration</h3>
563
564You can in fact run everything on a single host.  To run things this
565way, put the following in:
566<br/>
567<br/>
568conf/core-site.xml:
569<xmp><configuration>
570
571  <property>
572    <name>fs.default.name</name>
573    <value>hdfs://localhost/</value>
574  </property>
575
576</configuration></xmp>
577
578conf/hdfs-site.xml:
579<xmp><configuration>
580
581  <property>
582    <name>dfs.replication</name>
583    <value>1</value>
584  </property>
585
586</configuration></xmp>
587
588conf/mapred-site.xml:
589<xmp><configuration>
590
591  <property>
592    <name>mapred.job.tracker</name>
593    <value>localhost:9001</value>
594  </property>
595
596</configuration></xmp>
597
598<p>(We also set the HDFS replication level to 1 in order to
599reduce warnings when running on a single node.)</p>
600
601<p>Now check that the command <br><tt>ssh localhost</tt><br> does not
602require a password.  If it does, execute the following commands:</p>
603
604<p><tt>ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa<br>
605cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys
606</tt></p>
607
608<h3>Bootstrapping</h3>
609
610<p>A new distributed filesystem must be formatted with the following
611command, run on the master node:</p>
612
613<p><tt>bin/hadoop namenode -format</tt></p>
614
615<p>The Hadoop daemons are started with the following command:</p>
616
617<p><tt>bin/start-all.sh</tt></p>
618
619<p>Daemon log output is written to the <tt>logs/</tt> directory.</p>
620
621<p>Input files are copied into the distributed filesystem as follows:</p>
622
623<p><tt>bin/hadoop fs -put input input</tt></p>
624
625<h3>Distributed execution</h3>
626
627<p>Things are run as before, but output must be copied locally to
628examine it:</p>
629
630<tt>
631bin/hadoop jar hadoop-*-examples.jar grep input output 'dfs[a-z.]+'<br>
632bin/hadoop fs -get output output
633cat output/*
634</tt>
635
636<p>When you're done, stop the daemons with:</p>
637
638<p><tt>bin/stop-all.sh</tt></p>
639
640<h3>Fully-distributed operation</h3>
641
642<p>Fully distributed operation is just like the pseudo-distributed operation
643described above, except, specify:</p>
644
645<ol>
646
647<li>The hostname or IP address of your master server in the value
648for <tt><a
649href="../core-default.html#fs.default.name">fs.default.name</a></tt>,
650  as <tt><em>hdfs://master.example.com/</em></tt> in <tt>conf/core-site.xml</tt>.</li>
651
652<li>The host and port of the your master server in the value
653of <tt><a href="../mapred-default.html#mapred.job.tracker">mapred.job.tracker</a></tt>
654as <tt><em>master.example.com</em>:<em>port</em></tt> in <tt>conf/mapred-site.xml</tt>.</li>
655
656<li>Directories for <tt><a
657href="../hdfs-default.html#dfs.name.dir">dfs.name.dir</a></tt> and
658<tt><a href="../hdfs-default.html#dfs.data.dir">dfs.data.dir</a> 
659in <tt>conf/hdfs-site.xml</tt>.
660</tt>These are local directories used to hold distributed filesystem
661data on the master node and slave nodes respectively.  Note
662that <tt>dfs.data.dir</tt> may contain a space- or comma-separated
663list of directory names, so that data may be stored on multiple local
664devices.</li>
665
666<li><tt><a href="../mapred-default.html#mapred.local.dir">mapred.local.dir</a></tt>
667  in <tt>conf/mapred-site.xml</tt>, the local directory where temporary
668  MapReduce data is stored.  It also may be a list of directories.</li>
669
670<li><tt><a
671href="../mapred-default.html#mapred.map.tasks">mapred.map.tasks</a></tt>
672and <tt><a
673href="../mapred-default.html#mapred.reduce.tasks">mapred.reduce.tasks</a></tt> 
674in <tt>conf/mapred-site.xml</tt>.
675As a rule of thumb, use 10x the
676number of slave processors for <tt>mapred.map.tasks</tt>, and 2x the
677number of slave processors for <tt>mapred.reduce.tasks</tt>.</li>
678
679</ol>
680
681<p>Finally, list all slave hostnames or IP addresses in your
682<tt>conf/slaves</tt> file, one per line.  Then format your filesystem
683and start your cluster on your master node, as above.
684<P>
685
686<P>
687<HR>
688
689
690<!-- ======= START OF BOTTOM NAVBAR ====== -->
691<A NAME="navbar_bottom"><!-- --></A>
692<A HREF="#skip-navbar_bottom" title="Skip navigation links"></A>
693<TABLE BORDER="0" WIDTH="100%" CELLPADDING="1" CELLSPACING="0" SUMMARY="">
694<TR>
695<TD COLSPAN=2 BGCOLOR="#EEEEFF" CLASS="NavBarCell1">
696<A NAME="navbar_bottom_firstrow"><!-- --></A>
697<TABLE BORDER="0" CELLPADDING="0" CELLSPACING="3" SUMMARY="">
698  <TR ALIGN="center" VALIGN="top">
699  <TD BGCOLOR="#FFFFFF" CLASS="NavBarCell1Rev"> &nbsp;<FONT CLASS="NavBarFont1Rev"><B>Overview</B></FONT>&nbsp;</TD>
700  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <FONT CLASS="NavBarFont1">Package</FONT>&nbsp;</TD>
701  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <FONT CLASS="NavBarFont1">Class</FONT>&nbsp;</TD>
702  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <FONT CLASS="NavBarFont1">Use</FONT>&nbsp;</TD>
703  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <A HREF="overview-tree.html"><FONT CLASS="NavBarFont1"><B>Tree</B></FONT></A>&nbsp;</TD>
704  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <A HREF="deprecated-list.html"><FONT CLASS="NavBarFont1"><B>Deprecated</B></FONT></A>&nbsp;</TD>
705  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <A HREF="index-all.html"><FONT CLASS="NavBarFont1"><B>Index</B></FONT></A>&nbsp;</TD>
706  <TD BGCOLOR="#EEEEFF" CLASS="NavBarCell1">    <A HREF="help-doc.html"><FONT CLASS="NavBarFont1"><B>Help</B></FONT></A>&nbsp;</TD>
707  </TR>
708</TABLE>
709</TD>
710<TD ALIGN="right" VALIGN="top" ROWSPAN=3><EM>
711</EM>
712</TD>
713</TR>
714
715<TR>
716<TD BGCOLOR="white" CLASS="NavBarCell2"><FONT SIZE="-2">
717&nbsp;PREV&nbsp;
718&nbsp;NEXT</FONT></TD>
719<TD BGCOLOR="white" CLASS="NavBarCell2"><FONT SIZE="-2">
720  <A HREF="index.html?overview-summary.html" target="_top"><B>FRAMES</B></A>  &nbsp;
721&nbsp;<A HREF="overview-summary.html" target="_top"><B>NO FRAMES</B></A>  &nbsp;
722&nbsp;<SCRIPT type="text/javascript">
723  <!--
724  if(window==top) {
725    document.writeln('<A HREF="allclasses-noframe.html"><B>All Classes</B></A>');
726  }
727  //-->
728</SCRIPT>
729<NOSCRIPT>
730  <A HREF="allclasses-noframe.html"><B>All Classes</B></A>
731</NOSCRIPT>
732
733
734</FONT></TD>
735</TR>
736</TABLE>
737<A NAME="skip-navbar_bottom"></A>
738<!-- ======== END OF BOTTOM NAVBAR ======= -->
739
740<HR>
741Copyright &copy; 2009 The Apache Software Foundation
742</BODY>
743</HTML>
Note: See TracBrowser for help on using the repository browser.