source: proiecte/HadoopJUnit/hadoop-0.20.1/docs/quickstart.html @ 120

Last change on this file since 120 was 120, checked in by (none), 14 years ago

Added the mail files for the Hadoop JUNit Project

  • Property svn:executable set to *
File size: 17.9 KB
Line 
1<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
2<html>
3<head>
4<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
5<meta content="Apache Forrest" name="Generator">
6<meta name="Forrest-version" content="0.8">
7<meta name="Forrest-skin-name" content="pelt">
8<title>Quick Start</title>
9<link type="text/css" href="skin/basic.css" rel="stylesheet">
10<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
11<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
12<link type="text/css" href="skin/profile.css" rel="stylesheet">
13<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
14<link rel="shortcut icon" href="images/favicon.ico">
15</head>
16<body onload="init()">
17<script type="text/javascript">ndeSetTextSize();</script>
18<div id="top">
19<!--+
20    |breadtrail
21    +-->
22<div class="breadtrail">
23<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://hadoop.apache.org/">Hadoop</a> &gt; <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
24</div>
25<!--+
26    |header
27    +-->
28<div class="header">
29<!--+
30    |start group logo
31    +-->
32<div class="grouplogo">
33<a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
34</div>
35<!--+
36    |end group logo
37    +-->
38<!--+
39    |start Project Logo
40    +-->
41<div class="projectlogo">
42<a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.gif" title="Scalable Computing Platform"></a>
43</div>
44<!--+
45    |end Project Logo
46    +-->
47<!--+
48    |start Search
49    +-->
50<div class="searchbox">
51<form action="http://www.google.com/search" method="get" class="roundtopsmall">
52<input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
53                    <input name="Search" value="Search" type="submit">
54</form>
55</div>
56<!--+
57    |end search
58    +-->
59<!--+
60    |start Tabs
61    +-->
62<ul id="tabs">
63<li>
64<a class="unselected" href="http://hadoop.apache.org/core/">Project</a>
65</li>
66<li>
67<a class="unselected" href="http://wiki.apache.org/hadoop">Wiki</a>
68</li>
69<li class="current">
70<a class="selected" href="index.html">Hadoop 0.20 Documentation</a>
71</li>
72</ul>
73<!--+
74    |end Tabs
75    +-->
76</div>
77</div>
78<div id="main">
79<div id="publishedStrip">
80<!--+
81    |start Subtabs
82    +-->
83<div id="level2tabs"></div>
84<!--+
85    |end Endtabs
86    +-->
87<script type="text/javascript"><!--
88document.write("Last Published: " + document.lastModified);
89//  --></script>
90</div>
91<!--+
92    |breadtrail
93    +-->
94<div class="breadtrail">
95
96             &nbsp;
97           </div>
98<!--+
99    |start Menu, mainarea
100    +-->
101<!--+
102    |start Menu
103    +-->
104<div id="menu">
105<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Getting Started</div>
106<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
107<div class="menuitem">
108<a href="index.html">Overview</a>
109</div>
110<div class="menupage">
111<div class="menupagetitle">Quick Start</div>
112</div>
113<div class="menuitem">
114<a href="cluster_setup.html">Cluster Setup</a>
115</div>
116<div class="menuitem">
117<a href="mapred_tutorial.html">Map/Reduce Tutorial</a>
118</div>
119</div>
120<div onclick="SwitchMenu('menu_1.2', 'skin/')" id="menu_1.2Title" class="menutitle">Programming Guides</div>
121<div id="menu_1.2" class="menuitemgroup">
122<div class="menuitem">
123<a href="commands_manual.html">Commands</a>
124</div>
125<div class="menuitem">
126<a href="distcp.html">DistCp</a>
127</div>
128<div class="menuitem">
129<a href="native_libraries.html">Native Libraries</a>
130</div>
131<div class="menuitem">
132<a href="streaming.html">Streaming</a>
133</div>
134<div class="menuitem">
135<a href="fair_scheduler.html">Fair Scheduler</a>
136</div>
137<div class="menuitem">
138<a href="capacity_scheduler.html">Capacity Scheduler</a>
139</div>
140<div class="menuitem">
141<a href="service_level_auth.html">Service Level Authorization</a>
142</div>
143<div class="menuitem">
144<a href="vaidya.html">Vaidya</a>
145</div>
146<div class="menuitem">
147<a href="hadoop_archives.html">Archives</a>
148</div>
149</div>
150<div onclick="SwitchMenu('menu_1.3', 'skin/')" id="menu_1.3Title" class="menutitle">HDFS</div>
151<div id="menu_1.3" class="menuitemgroup">
152<div class="menuitem">
153<a href="hdfs_user_guide.html">User Guide</a>
154</div>
155<div class="menuitem">
156<a href="hdfs_design.html">Architecture</a>
157</div>
158<div class="menuitem">
159<a href="hdfs_shell.html">File System Shell Guide</a>
160</div>
161<div class="menuitem">
162<a href="hdfs_permissions_guide.html">Permissions Guide</a>
163</div>
164<div class="menuitem">
165<a href="hdfs_quota_admin_guide.html">Quotas Guide</a>
166</div>
167<div class="menuitem">
168<a href="SLG_user_guide.html">Synthetic Load Generator Guide</a>
169</div>
170<div class="menuitem">
171<a href="libhdfs.html">C API libhdfs</a>
172</div>
173</div>
174<div onclick="SwitchMenu('menu_1.4', 'skin/')" id="menu_1.4Title" class="menutitle">HOD</div>
175<div id="menu_1.4" class="menuitemgroup">
176<div class="menuitem">
177<a href="hod_user_guide.html">User Guide</a>
178</div>
179<div class="menuitem">
180<a href="hod_admin_guide.html">Admin Guide</a>
181</div>
182<div class="menuitem">
183<a href="hod_config_guide.html">Config Guide</a>
184</div>
185</div>
186<div onclick="SwitchMenu('menu_1.5', 'skin/')" id="menu_1.5Title" class="menutitle">Miscellaneous</div>
187<div id="menu_1.5" class="menuitemgroup">
188<div class="menuitem">
189<a href="api/index.html">API Docs</a>
190</div>
191<div class="menuitem">
192<a href="jdiff/changes.html">API Changes</a>
193</div>
194<div class="menuitem">
195<a href="http://wiki.apache.org/hadoop/">Wiki</a>
196</div>
197<div class="menuitem">
198<a href="http://wiki.apache.org/hadoop/FAQ">FAQ</a>
199</div>
200<div class="menuitem">
201<a href="releasenotes.html">Release Notes</a>
202</div>
203<div class="menuitem">
204<a href="changes.html">Change Log</a>
205</div>
206</div>
207<div id="credit"></div>
208<div id="roundbottom">
209<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
210<!--+
211  |alternative credits
212  +-->
213<div id="credit2"></div>
214</div>
215<!--+
216    |end Menu
217    +-->
218<!--+
219    |start content
220    +-->
221<div id="content">
222<div title="Portable Document Format" class="pdflink">
223<a class="dida" href="quickstart.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
224        PDF</a>
225</div>
226<h1>Quick Start</h1>
227<div id="minitoc-area">
228<ul class="minitoc">
229<li>
230<a href="#Purpose">Purpose</a>
231</li>
232<li>
233<a href="#PreReqs">Pre-requisites</a>
234<ul class="minitoc">
235<li>
236<a href="#Supported+Platforms">Supported Platforms</a>
237</li>
238<li>
239<a href="#Required+Software">Required Software</a>
240</li>
241<li>
242<a href="#Installing+Software">Installing Software</a>
243</li>
244</ul>
245</li>
246<li>
247<a href="#Download">Download</a>
248</li>
249<li>
250<a href="#Prepare+to+Start+the+Hadoop+Cluster">Prepare to Start the Hadoop Cluster</a>
251</li>
252<li>
253<a href="#Local">Standalone Operation</a>
254</li>
255<li>
256<a href="#PseudoDistributed">Pseudo-Distributed Operation</a>
257<ul class="minitoc">
258<li>
259<a href="#Configuration">Configuration</a>
260</li>
261<li>
262<a href="#Setup+passphraseless">Setup passphraseless ssh</a>
263</li>
264<li>
265<a href="#Execution">Execution</a>
266</li>
267</ul>
268</li>
269<li>
270<a href="#FullyDistributed">Fully-Distributed Operation</a>
271</li>
272</ul>
273</div>
274 
275   
276<a name="N1000D"></a><a name="Purpose"></a>
277<h2 class="h3">Purpose</h2>
278<div class="section">
279<p>The purpose of this document is to help you get a single-node Hadoop
280      installation up and running very quickly so that you can get a flavour
281      of the Hadoop Distributed File System
282      (see <a href="hdfs_design.html"> <acronym title="Hadoop Distributed File System">HDFS</acronym> Architecture</a>) and
283      the Map/Reduce framework; that is, perform simple operations on HDFS and
284      run example jobs.</p>
285</div>
286   
287   
288<a name="N1001F"></a><a name="PreReqs"></a>
289<h2 class="h3">Pre-requisites</h2>
290<div class="section">
291<a name="N10025"></a><a name="Supported+Platforms"></a>
292<h3 class="h4">Supported Platforms</h3>
293<ul>
294         
295<li>
296            GNU/Linux is supported as a development and production platform.
297            Hadoop has been demonstrated on GNU/Linux clusters with 2000 nodes.
298          </li>
299         
300<li>
301            Win32 is supported as a <em>development platform</em>. Distributed
302            operation has not been well tested on Win32, so it is not
303            supported as a <em>production platform</em>.
304          </li>
305       
306</ul>
307<a name="N1003B"></a><a name="Required+Software"></a>
308<h3 class="h4">Required Software</h3>
309<p>Required software for Linux and Windows include:</p>
310<ol>
311         
312<li>
313            Java<sup>TM</sup> 1.6.x, preferably from Sun, must be installed.
314          </li>
315         
316<li>
317           
318<strong>ssh</strong> must be installed and <strong>sshd</strong> must
319            be running to use the Hadoop scripts that manage remote Hadoop
320            daemons.
321          </li>
322       
323</ol>
324<p>Additional requirements for Windows include:</p>
325<ol>
326         
327<li>
328           
329<a href="http://www.cygwin.com/">Cygwin</a> - Required for shell
330            support in addition to the required software above.
331          </li>
332       
333</ol>
334<a name="N10064"></a><a name="Installing+Software"></a>
335<h3 class="h4">Installing Software</h3>
336<p>If your cluster doesn't have the requisite software you will need to
337        install it.</p>
338<p>For example on Ubuntu Linux:</p>
339<p>
340         
341<span class="codefrag">$ sudo apt-get install ssh</span>
342<br>
343         
344<span class="codefrag">$ sudo apt-get install rsync</span>
345       
346</p>
347<p>On Windows, if you did not install the required software when you
348        installed cygwin, start the cygwin installer and select the packages:</p>
349<ul>
350         
351<li>openssh - the <em>Net</em> category</li>
352       
353</ul>
354</div>
355   
356   
357<a name="N10088"></a><a name="Download"></a>
358<h2 class="h3">Download</h2>
359<div class="section">
360<p>
361        To get a Hadoop distribution, download a recent
362        <a href="http://hadoop.apache.org/core/releases.html">stable release</a> from one of the Apache Download
363        Mirrors.
364      </p>
365</div>
366
367   
368<a name="N10096"></a><a name="Prepare+to+Start+the+Hadoop+Cluster"></a>
369<h2 class="h3">Prepare to Start the Hadoop Cluster</h2>
370<div class="section">
371<p>
372        Unpack the downloaded Hadoop distribution. In the distribution, edit the
373        file <span class="codefrag">conf/hadoop-env.sh</span> to define at least
374        <span class="codefrag">JAVA_HOME</span> to be the root of your Java installation.
375      </p>
376<p>
377            Try the following command:<br>
378       
379<span class="codefrag">$ bin/hadoop</span>
380<br>
381        This will display the usage documentation for the <strong>hadoop</strong> 
382        script.
383      </p>
384<p>Now you are ready to start your Hadoop cluster in one of the three supported
385      modes:
386      </p>
387<ul>
388       
389<li>Local (Standalone) Mode</li>
390       
391<li>Pseudo-Distributed Mode</li>
392       
393<li>Fully-Distributed Mode</li>
394     
395</ul>
396</div>
397   
398   
399<a name="N100C1"></a><a name="Local"></a>
400<h2 class="h3">Standalone Operation</h2>
401<div class="section">
402<p>By default, Hadoop is configured to run in a non-distributed
403      mode, as a single Java process. This is useful for debugging.</p>
404<p>
405        The following example copies the unpacked <span class="codefrag">conf</span> directory to
406        use as input and then finds and displays every match of the given regular
407        expression. Output is written to the given <span class="codefrag">output</span> directory.
408        <br>
409       
410<span class="codefrag">$ mkdir input</span>
411<br>
412       
413<span class="codefrag">$ cp conf/*.xml input</span>
414<br>
415       
416<span class="codefrag">
417          $ bin/hadoop jar hadoop-*-examples.jar grep input output 'dfs[a-z.]+'
418        </span>
419<br>
420       
421<span class="codefrag">$ cat output/*</span>
422     
423</p>
424</div>
425   
426   
427<a name="N100E5"></a><a name="PseudoDistributed"></a>
428<h2 class="h3">Pseudo-Distributed Operation</h2>
429<div class="section">
430<p>Hadoop can also be run on a single-node in a pseudo-distributed mode
431          where each Hadoop daemon runs in a separate Java process.</p>
432<a name="N100EE"></a><a name="Configuration"></a>
433<h3 class="h4">Configuration</h3>
434<p>Use the following:
435        <br>
436       
437<span class="codefrag">conf/core-site.xml</span>:</p>
438<table class="ForrestTable" cellspacing="1" cellpadding="4">
439       
440<tr>
441<td colspan="1" rowspan="1">&lt;configuration&gt;</td>
442</tr>
443
444         
445<tr>
446<td colspan="1" rowspan="1">&nbsp;&nbsp;&lt;property&gt;</td>
447</tr>
448           
449<tr>
450<td colspan="1" rowspan="1">&nbsp;&nbsp;&nbsp;&nbsp;&lt;name&gt;fs.default.name&lt;/name&gt;</td>
451</tr>
452           
453<tr>
454<td colspan="1" rowspan="1">&nbsp;&nbsp;&nbsp;&nbsp;&lt;value&gt;hdfs://localhost:9000&lt;/value&gt;</td>
455</tr>
456         
457<tr>
458<td colspan="1" rowspan="1">&nbsp;&nbsp;&lt;/property&gt;</td>
459</tr>
460
461       
462<tr>
463<td colspan="1" rowspan="1">&lt;/configuration&gt;</td>
464</tr>
465       
466</table>
467<p>
468<br>
469<span class="codefrag">conf/hdfs-site.xml</span>:</p>
470<table class="ForrestTable" cellspacing="1" cellpadding="4">
471       
472<tr>
473<td colspan="1" rowspan="1">&lt;configuration&gt;</td>
474</tr>
475
476         
477<tr>
478<td colspan="1" rowspan="1">&nbsp;&nbsp;&lt;property&gt;</td>
479</tr>
480           
481<tr>
482<td colspan="1" rowspan="1">&nbsp;&nbsp;&nbsp;&nbsp;&lt;name&gt;dfs.replication&lt;/name&gt;</td>
483</tr>
484           
485<tr>
486<td colspan="1" rowspan="1">&nbsp;&nbsp;&nbsp;&nbsp;&lt;value&gt;1&lt;/value&gt;</td>
487</tr>
488         
489<tr>
490<td colspan="1" rowspan="1">&nbsp;&nbsp;&lt;/property&gt;</td>
491</tr>
492
493       
494<tr>
495<td colspan="1" rowspan="1">&lt;/configuration&gt;</td>
496</tr>
497       
498</table>
499<p>
500<br>
501<span class="codefrag">conf/mapred-site.xml</span>:</p>
502<table class="ForrestTable" cellspacing="1" cellpadding="4">
503       
504<tr>
505<td colspan="1" rowspan="1">&lt;configuration&gt;</td>
506</tr>
507
508         
509<tr>
510<td colspan="1" rowspan="1">&nbsp;&nbsp;&lt;property&gt;</td>
511</tr>
512           
513<tr>
514<td colspan="1" rowspan="1">&nbsp;&nbsp;&nbsp;&nbsp;&lt;name&gt;mapred.job.tracker&lt;/name&gt;</td>
515</tr>
516           
517<tr>
518<td colspan="1" rowspan="1">&nbsp;&nbsp;&nbsp;&nbsp;&lt;value&gt;localhost:9001&lt;/value&gt;</td>
519</tr>
520         
521<tr>
522<td colspan="1" rowspan="1">&nbsp;&nbsp;&lt;/property&gt;</td>
523</tr>
524
525       
526<tr>
527<td colspan="1" rowspan="1">&lt;/configuration&gt;</td>
528</tr>
529       
530</table>
531<a name="N1017E"></a><a name="Setup+passphraseless"></a>
532<h3 class="h4">Setup passphraseless ssh</h3>
533<p>
534          Now check that you can ssh to the localhost without a passphrase:<br>
535         
536<span class="codefrag">$ ssh localhost</span>
537       
538</p>
539<p>
540          If you cannot ssh to localhost without a passphrase, execute the
541          following commands:<br>
542                 
543<span class="codefrag">$ ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa</span>
544<br>
545                 
546<span class="codefrag">$ cat ~/.ssh/id_dsa.pub &gt;&gt; ~/.ssh/authorized_keys</span>
547               
548</p>
549<a name="N1019B"></a><a name="Execution"></a>
550<h3 class="h4">Execution</h3>
551<p>
552          Format a new distributed-filesystem:<br>
553         
554<span class="codefrag">$ bin/hadoop namenode -format</span>
555       
556</p>
557<p>
558                  Start the hadoop daemons:<br>
559         
560<span class="codefrag">$ bin/start-all.sh</span>
561       
562</p>
563<p>The hadoop daemon log output is written to the
564        <span class="codefrag">${HADOOP_LOG_DIR}</span> directory (defaults to
565        <span class="codefrag">${HADOOP_HOME}/logs</span>).</p>
566<p>Browse the web interface for the NameNode and the JobTracker; by
567        default they are available at:</p>
568<ul>
569         
570<li>
571           
572<span class="codefrag">NameNode</span> -
573            <a href="http://localhost:50070/">http://localhost:50070/</a>
574         
575</li>
576         
577<li>
578           
579<span class="codefrag">JobTracker</span> -
580            <a href="http://localhost:50030/">http://localhost:50030/</a>
581         
582</li>
583       
584</ul>
585<p>
586          Copy the input files into the distributed filesystem:<br>
587                 
588<span class="codefrag">$ bin/hadoop fs -put conf input</span>
589               
590</p>
591<p>
592          Run some of the examples provided:<br>
593         
594<span class="codefrag">
595            $ bin/hadoop jar hadoop-*-examples.jar grep input output 'dfs[a-z.]+'
596          </span>
597       
598</p>
599<p>Examine the output files:</p>
600<p>
601          Copy the output files from the distributed filesystem to the local
602          filesytem and examine them:<br>
603         
604<span class="codefrag">$ bin/hadoop fs -get output output</span>
605<br>
606         
607<span class="codefrag">$ cat output/*</span>
608       
609</p>
610<p> or </p>
611<p>
612          View the output files on the distributed filesystem:<br>
613         
614<span class="codefrag">$ bin/hadoop fs -cat output/*</span>
615       
616</p>
617<p>
618                  When you're done, stop the daemons with:<br>
619                 
620<span class="codefrag">$ bin/stop-all.sh</span>
621               
622</p>
623</div>
624   
625   
626<a name="N10208"></a><a name="FullyDistributed"></a>
627<h2 class="h3">Fully-Distributed Operation</h2>
628<div class="section">
629<p>For information on setting up fully-distributed, non-trivial clusters
630          see <a href="cluster_setup.html">Hadoop Cluster Setup</a>.</p>
631</div>
632   
633   
634<p>
635     
636<em>Java and JNI are trademarks or registered trademarks of
637      Sun Microsystems, Inc. in the United States and other countries.</em>
638   
639</p>
640   
641 
642</div>
643<!--+
644    |end content
645    +-->
646<div class="clearboth">&nbsp;</div>
647</div>
648<div id="footer">
649<!--+
650    |start bottomstrip
651    +-->
652<div class="lastmodified">
653<script type="text/javascript"><!--
654document.write("Last Published: " + document.lastModified);
655//  --></script>
656</div>
657<div class="copyright">
658        Copyright &copy;
659         2008 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
660</div>
661<!--+
662    |end bottomstrip
663    +-->
664</div>
665</body>
666</html>
Note: See TracBrowser for help on using the repository browser.