source: proiecte/HadoopJUnit/hadoop-0.20.1/docs/hadoop_archives.html @ 142

Last change on this file since 142 was 120, checked in by (none), 14 years ago

Added the mail files for the Hadoop JUNit Project

  • Property svn:executable set to *
File size: 10.4 KB
Line 
1<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
2<html>
3<head>
4<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
5<meta content="Apache Forrest" name="Generator">
6<meta name="Forrest-version" content="0.8">
7<meta name="Forrest-skin-name" content="pelt">
8<title>Archives Guide</title>
9<link type="text/css" href="skin/basic.css" rel="stylesheet">
10<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
11<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
12<link type="text/css" href="skin/profile.css" rel="stylesheet">
13<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
14<link rel="shortcut icon" href="images/favicon.ico">
15</head>
16<body onload="init()">
17<script type="text/javascript">ndeSetTextSize();</script>
18<div id="top">
19<!--+
20    |breadtrail
21    +-->
22<div class="breadtrail">
23<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://hadoop.apache.org/">Hadoop</a> &gt; <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
24</div>
25<!--+
26    |header
27    +-->
28<div class="header">
29<!--+
30    |start group logo
31    +-->
32<div class="grouplogo">
33<a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
34</div>
35<!--+
36    |end group logo
37    +-->
38<!--+
39    |start Project Logo
40    +-->
41<div class="projectlogo">
42<a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.gif" title="Scalable Computing Platform"></a>
43</div>
44<!--+
45    |end Project Logo
46    +-->
47<!--+
48    |start Search
49    +-->
50<div class="searchbox">
51<form action="http://www.google.com/search" method="get" class="roundtopsmall">
52<input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
53                    <input name="Search" value="Search" type="submit">
54</form>
55</div>
56<!--+
57    |end search
58    +-->
59<!--+
60    |start Tabs
61    +-->
62<ul id="tabs">
63<li>
64<a class="unselected" href="http://hadoop.apache.org/core/">Project</a>
65</li>
66<li>
67<a class="unselected" href="http://wiki.apache.org/hadoop">Wiki</a>
68</li>
69<li class="current">
70<a class="selected" href="index.html">Hadoop 0.20 Documentation</a>
71</li>
72</ul>
73<!--+
74    |end Tabs
75    +-->
76</div>
77</div>
78<div id="main">
79<div id="publishedStrip">
80<!--+
81    |start Subtabs
82    +-->
83<div id="level2tabs"></div>
84<!--+
85    |end Endtabs
86    +-->
87<script type="text/javascript"><!--
88document.write("Last Published: " + document.lastModified);
89//  --></script>
90</div>
91<!--+
92    |breadtrail
93    +-->
94<div class="breadtrail">
95
96             &nbsp;
97           </div>
98<!--+
99    |start Menu, mainarea
100    +-->
101<!--+
102    |start Menu
103    +-->
104<div id="menu">
105<div onclick="SwitchMenu('menu_1.1', 'skin/')" id="menu_1.1Title" class="menutitle">Getting Started</div>
106<div id="menu_1.1" class="menuitemgroup">
107<div class="menuitem">
108<a href="index.html">Overview</a>
109</div>
110<div class="menuitem">
111<a href="quickstart.html">Quick Start</a>
112</div>
113<div class="menuitem">
114<a href="cluster_setup.html">Cluster Setup</a>
115</div>
116<div class="menuitem">
117<a href="mapred_tutorial.html">Map/Reduce Tutorial</a>
118</div>
119</div>
120<div onclick="SwitchMenu('menu_selected_1.2', 'skin/')" id="menu_selected_1.2Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Programming Guides</div>
121<div id="menu_selected_1.2" class="selectedmenuitemgroup" style="display: block;">
122<div class="menuitem">
123<a href="commands_manual.html">Commands</a>
124</div>
125<div class="menuitem">
126<a href="distcp.html">DistCp</a>
127</div>
128<div class="menuitem">
129<a href="native_libraries.html">Native Libraries</a>
130</div>
131<div class="menuitem">
132<a href="streaming.html">Streaming</a>
133</div>
134<div class="menuitem">
135<a href="fair_scheduler.html">Fair Scheduler</a>
136</div>
137<div class="menuitem">
138<a href="capacity_scheduler.html">Capacity Scheduler</a>
139</div>
140<div class="menuitem">
141<a href="service_level_auth.html">Service Level Authorization</a>
142</div>
143<div class="menuitem">
144<a href="vaidya.html">Vaidya</a>
145</div>
146<div class="menupage">
147<div class="menupagetitle">Archives</div>
148</div>
149</div>
150<div onclick="SwitchMenu('menu_1.3', 'skin/')" id="menu_1.3Title" class="menutitle">HDFS</div>
151<div id="menu_1.3" class="menuitemgroup">
152<div class="menuitem">
153<a href="hdfs_user_guide.html">User Guide</a>
154</div>
155<div class="menuitem">
156<a href="hdfs_design.html">Architecture</a>
157</div>
158<div class="menuitem">
159<a href="hdfs_shell.html">File System Shell Guide</a>
160</div>
161<div class="menuitem">
162<a href="hdfs_permissions_guide.html">Permissions Guide</a>
163</div>
164<div class="menuitem">
165<a href="hdfs_quota_admin_guide.html">Quotas Guide</a>
166</div>
167<div class="menuitem">
168<a href="SLG_user_guide.html">Synthetic Load Generator Guide</a>
169</div>
170<div class="menuitem">
171<a href="libhdfs.html">C API libhdfs</a>
172</div>
173</div>
174<div onclick="SwitchMenu('menu_1.4', 'skin/')" id="menu_1.4Title" class="menutitle">HOD</div>
175<div id="menu_1.4" class="menuitemgroup">
176<div class="menuitem">
177<a href="hod_user_guide.html">User Guide</a>
178</div>
179<div class="menuitem">
180<a href="hod_admin_guide.html">Admin Guide</a>
181</div>
182<div class="menuitem">
183<a href="hod_config_guide.html">Config Guide</a>
184</div>
185</div>
186<div onclick="SwitchMenu('menu_1.5', 'skin/')" id="menu_1.5Title" class="menutitle">Miscellaneous</div>
187<div id="menu_1.5" class="menuitemgroup">
188<div class="menuitem">
189<a href="api/index.html">API Docs</a>
190</div>
191<div class="menuitem">
192<a href="jdiff/changes.html">API Changes</a>
193</div>
194<div class="menuitem">
195<a href="http://wiki.apache.org/hadoop/">Wiki</a>
196</div>
197<div class="menuitem">
198<a href="http://wiki.apache.org/hadoop/FAQ">FAQ</a>
199</div>
200<div class="menuitem">
201<a href="releasenotes.html">Release Notes</a>
202</div>
203<div class="menuitem">
204<a href="changes.html">Change Log</a>
205</div>
206</div>
207<div id="credit"></div>
208<div id="roundbottom">
209<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
210<!--+
211  |alternative credits
212  +-->
213<div id="credit2"></div>
214</div>
215<!--+
216    |end Menu
217    +-->
218<!--+
219    |start content
220    +-->
221<div id="content">
222<div title="Portable Document Format" class="pdflink">
223<a class="dida" href="hadoop_archives.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
224        PDF</a>
225</div>
226<h1>Archives Guide</h1>
227<div id="minitoc-area">
228<ul class="minitoc">
229<li>
230<a href="#What+are+Hadoop+archives%3F"> What are Hadoop archives? </a>
231</li>
232<li>
233<a href="#How+to+create+an+archive%3F"> How to create an archive? </a>
234</li>
235<li>
236<a href="#How+to+look+up+files+in+archives%3F"> How to look up files in archives? </a>
237</li>
238</ul>
239</div>
240       
241<a name="N1000D"></a><a name="What+are+Hadoop+archives%3F"></a>
242<h2 class="h3"> What are Hadoop archives? </h2>
243<div class="section">
244<p>
245        Hadoop archives are special format archives. A Hadoop archive
246        maps to a file system directory. A Hadoop archive always has a *.har
247        extension. A Hadoop archive directory contains metadata (in the form
248        of _index and _masterindex) and data (part-*) files. The _index file contains
249        the name of the files that are part of the archive and the location
250        within the part files.
251        </p>
252</div>
253       
254<a name="N10017"></a><a name="How+to+create+an+archive%3F"></a>
255<h2 class="h3"> How to create an archive? </h2>
256<div class="section">
257<p>
258       
259<span class="codefrag">Usage: hadoop archive -archiveName name &lt;src&gt;* &lt;dest&gt;</span>
260       
261</p>
262<p>
263        -archiveName is the name of the archive you would like to create.
264        An example would be foo.har. The name should have a *.har extension.
265        The inputs are file system pathnames which work as usual with regular
266        expressions. The destination directory would contain the archive.
267        Note that this is a Map/Reduce job that creates the archives. You would
268        need a map reduce cluster to run this. The following is an example:</p>
269<p>
270       
271<span class="codefrag">hadoop archive -archiveName foo.har /user/hadoop/dir1 /user/hadoop/dir2 /user/zoo/</span>
272       
273</p>
274<p>
275        In the above example /user/hadoop/dir1 and /user/hadoop/dir2 will be
276        archived in the following file system directory -- /user/zoo/foo.har.
277        The sources are not changed or removed when an archive is created.
278        </p>
279</div>
280       
281<a name="N1002F"></a><a name="How+to+look+up+files+in+archives%3F"></a>
282<h2 class="h3"> How to look up files in archives? </h2>
283<div class="section">
284<p>
285        The archive exposes itself as a file system layer. So all the fs shell
286        commands in the archives work but with a different URI. Also, note that
287        archives are immutable. So, rename's, deletes and creates return
288        an error. URI for Hadoop Archives is
289        </p>
290<p>
291<span class="codefrag">har://scheme-hostname:port/archivepath/fileinarchive</span>
292</p>
293<p>
294        If no scheme is provided it assumes the underlying filesystem.
295        In that case the URI would look like
296        </p>
297<p>
298<span class="codefrag">
299        har:///archivepath/fileinarchive</span>
300</p>
301<p>
302        Here is an example of archive. The input to the archives is /dir. The directory dir contains
303        files filea, fileb. To archive /dir to /user/hadoop/foo.har, the command is
304        </p>
305<p>
306<span class="codefrag">hadoop archive -archiveName foo.har /dir /user/hadoop</span>
307       
308</p>
309<p>
310        To get file listing for files in the created archive
311        </p>
312<p>
313<span class="codefrag">hadoop dfs -lsr har:///user/hadoop/foo.har</span>
314</p>
315<p>To cat filea in archive -
316        </p>
317<p>
318<span class="codefrag">hadoop dfs -cat har:///user/hadoop/foo.har/dir/filea</span>
319</p>
320</div>
321       
322</div>
323<!--+
324    |end content
325    +-->
326<div class="clearboth">&nbsp;</div>
327</div>
328<div id="footer">
329<!--+
330    |start bottomstrip
331    +-->
332<div class="lastmodified">
333<script type="text/javascript"><!--
334document.write("Last Published: " + document.lastModified);
335//  --></script>
336</div>
337<div class="copyright">
338        Copyright &copy;
339         2008 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
340</div>
341<!--+
342    |end bottomstrip
343    +-->
344</div>
345</body>
346</html>
Note: See TracBrowser for help on using the repository browser.