[120] | 1 | /** |
---|
| 2 | * Licensed to the Apache Software Foundation (ASF) under one |
---|
| 3 | * or more contributor license agreements. See the NOTICE file |
---|
| 4 | * distributed with this work for additional information |
---|
| 5 | * regarding copyright ownership. The ASF licenses this file |
---|
| 6 | * to you under the Apache License, Version 2.0 (the |
---|
| 7 | * "License"); you may not use this file except in compliance |
---|
| 8 | * with the License. You may obtain a copy of the License at |
---|
| 9 | * |
---|
| 10 | * http://www.apache.org/licenses/LICENSE-2.0 |
---|
| 11 | * |
---|
| 12 | * Unless required by applicable law or agreed to in writing, software |
---|
| 13 | * distributed under the License is distributed on an "AS IS" BASIS, |
---|
| 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
---|
| 15 | * See the License for the specific language governing permissions and |
---|
| 16 | * limitations under the License. |
---|
| 17 | */ |
---|
| 18 | |
---|
| 19 | package org.apache.hadoop.fs; |
---|
| 20 | |
---|
| 21 | |
---|
| 22 | import java.io.IOException; |
---|
| 23 | import java.util.Iterator; |
---|
| 24 | |
---|
| 25 | import org.apache.hadoop.conf.Configuration; |
---|
| 26 | import org.apache.hadoop.hdfs.MiniDFSCluster; |
---|
| 27 | import org.apache.hadoop.fs.FSDataOutputStream; |
---|
| 28 | import org.apache.hadoop.fs.FileSystem; |
---|
| 29 | import org.apache.hadoop.fs.FsShell; |
---|
| 30 | import org.apache.hadoop.fs.Path; |
---|
| 31 | import org.apache.hadoop.io.LongWritable; |
---|
| 32 | import org.apache.hadoop.io.Text; |
---|
| 33 | import org.apache.hadoop.mapred.FileInputFormat; |
---|
| 34 | import org.apache.hadoop.mapred.FileOutputFormat; |
---|
| 35 | import org.apache.hadoop.mapred.JobClient; |
---|
| 36 | import org.apache.hadoop.mapred.JobConf; |
---|
| 37 | import org.apache.hadoop.mapred.Mapper; |
---|
| 38 | import org.apache.hadoop.mapred.MiniMRCluster; |
---|
| 39 | import org.apache.hadoop.mapred.OutputCollector; |
---|
| 40 | import org.apache.hadoop.mapred.Reducer; |
---|
| 41 | import org.apache.hadoop.mapred.Reporter; |
---|
| 42 | import org.apache.hadoop.mapred.TextInputFormat; |
---|
| 43 | import org.apache.hadoop.mapred.TextOutputFormat; |
---|
| 44 | import org.apache.hadoop.tools.HadoopArchives; |
---|
| 45 | import org.apache.hadoop.util.ToolRunner; |
---|
| 46 | |
---|
| 47 | import junit.framework.TestCase; |
---|
| 48 | |
---|
| 49 | /** |
---|
| 50 | * test the har file system |
---|
| 51 | * create a har filesystem |
---|
| 52 | * run fs commands |
---|
| 53 | * and then run a map reduce job |
---|
| 54 | */ |
---|
| 55 | public class TestHarFileSystem extends TestCase { |
---|
| 56 | private Path inputPath; |
---|
| 57 | private MiniDFSCluster dfscluster; |
---|
| 58 | private MiniMRCluster mapred; |
---|
| 59 | private FileSystem fs; |
---|
| 60 | private Path filea, fileb, filec; |
---|
| 61 | private Path archivePath; |
---|
| 62 | |
---|
| 63 | protected void setUp() throws Exception { |
---|
| 64 | super.setUp(); |
---|
| 65 | dfscluster = new MiniDFSCluster(new JobConf(), 2, true, null); |
---|
| 66 | fs = dfscluster.getFileSystem(); |
---|
| 67 | mapred = new MiniMRCluster(2, fs.getUri().toString(), 1); |
---|
| 68 | inputPath = new Path(fs.getHomeDirectory(), "test"); |
---|
| 69 | filea = new Path(inputPath,"a"); |
---|
| 70 | fileb = new Path(inputPath,"b"); |
---|
| 71 | filec = new Path(inputPath,"c"); |
---|
| 72 | archivePath = new Path(fs.getHomeDirectory(), "tmp"); |
---|
| 73 | } |
---|
| 74 | |
---|
| 75 | protected void tearDown() throws Exception { |
---|
| 76 | try { |
---|
| 77 | if (mapred != null) { |
---|
| 78 | mapred.shutdown(); |
---|
| 79 | } |
---|
| 80 | if (dfscluster != null) { |
---|
| 81 | dfscluster.shutdown(); |
---|
| 82 | } |
---|
| 83 | } catch(Exception e) { |
---|
| 84 | System.err.println(e); |
---|
| 85 | } |
---|
| 86 | super.tearDown(); |
---|
| 87 | } |
---|
| 88 | |
---|
| 89 | static class TextMapperReducer implements Mapper<LongWritable, Text, Text, Text>, |
---|
| 90 | Reducer<Text, Text, Text, Text> { |
---|
| 91 | |
---|
| 92 | public void configure(JobConf conf) { |
---|
| 93 | //do nothing |
---|
| 94 | } |
---|
| 95 | |
---|
| 96 | public void map(LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { |
---|
| 97 | output.collect(value, new Text("")); |
---|
| 98 | } |
---|
| 99 | |
---|
| 100 | public void close() throws IOException { |
---|
| 101 | // do nothing |
---|
| 102 | } |
---|
| 103 | |
---|
| 104 | public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { |
---|
| 105 | while(values.hasNext()) { |
---|
| 106 | values.next(); |
---|
| 107 | output.collect(key, null); |
---|
| 108 | } |
---|
| 109 | } |
---|
| 110 | } |
---|
| 111 | |
---|
| 112 | public void testArchives() throws Exception { |
---|
| 113 | fs.mkdirs(inputPath); |
---|
| 114 | |
---|
| 115 | FSDataOutputStream out = fs.create(filea); |
---|
| 116 | out.write("a".getBytes()); |
---|
| 117 | out.close(); |
---|
| 118 | out = fs.create(fileb); |
---|
| 119 | out.write("b".getBytes()); |
---|
| 120 | out.close(); |
---|
| 121 | out = fs.create(filec); |
---|
| 122 | out.write("c".getBytes()); |
---|
| 123 | out.close(); |
---|
| 124 | Configuration conf = mapred.createJobConf(); |
---|
| 125 | HadoopArchives har = new HadoopArchives(conf); |
---|
| 126 | String[] args = new String[3]; |
---|
| 127 | //check for destination not specfied |
---|
| 128 | args[0] = "-archiveName"; |
---|
| 129 | args[1] = "foo.har"; |
---|
| 130 | args[2] = inputPath.toString(); |
---|
| 131 | int ret = ToolRunner.run(har, args); |
---|
| 132 | assertTrue(ret != 0); |
---|
| 133 | args = new String[4]; |
---|
| 134 | //check for wrong archiveName |
---|
| 135 | args[0] = "-archiveName"; |
---|
| 136 | args[1] = "/d/foo.har"; |
---|
| 137 | args[2] = inputPath.toString(); |
---|
| 138 | args[3] = archivePath.toString(); |
---|
| 139 | ret = ToolRunner.run(har, args); |
---|
| 140 | assertTrue(ret != 0); |
---|
| 141 | // se if dest is a file |
---|
| 142 | args[1] = "foo.har"; |
---|
| 143 | args[3] = filec.toString(); |
---|
| 144 | ret = ToolRunner.run(har, args); |
---|
| 145 | assertTrue(ret != 0); |
---|
| 146 | //this is a valid run |
---|
| 147 | args[0] = "-archiveName"; |
---|
| 148 | args[1] = "foo.har"; |
---|
| 149 | args[2] = inputPath.toString(); |
---|
| 150 | args[3] = archivePath.toString(); |
---|
| 151 | ret = ToolRunner.run(har, args); |
---|
| 152 | //checl for the existenece of the archive |
---|
| 153 | assertTrue(ret == 0); |
---|
| 154 | ///try running it again. it should not |
---|
| 155 | // override the directory |
---|
| 156 | ret = ToolRunner.run(har, args); |
---|
| 157 | assertTrue(ret != 0); |
---|
| 158 | Path finalPath = new Path(archivePath, "foo.har"); |
---|
| 159 | Path fsPath = new Path(inputPath.toUri().getPath()); |
---|
| 160 | String relative = fsPath.toString().substring(1); |
---|
| 161 | Path filePath = new Path(finalPath, relative); |
---|
| 162 | //make it a har path |
---|
| 163 | Path harPath = new Path("har://" + filePath.toUri().getPath()); |
---|
| 164 | assertTrue(fs.exists(new Path(finalPath, "_index"))); |
---|
| 165 | assertTrue(fs.exists(new Path(finalPath, "_masterindex"))); |
---|
| 166 | assertTrue(!fs.exists(new Path(finalPath, "_logs"))); |
---|
| 167 | //creation tested |
---|
| 168 | //check if the archive is same |
---|
| 169 | // do ls and cat on all the files |
---|
| 170 | FsShell shell = new FsShell(conf); |
---|
| 171 | args = new String[2]; |
---|
| 172 | args[0] = "-ls"; |
---|
| 173 | args[1] = harPath.toString(); |
---|
| 174 | ret = ToolRunner.run(shell, args); |
---|
| 175 | // ls should work. |
---|
| 176 | assertTrue((ret == 0)); |
---|
| 177 | //now check for contents of filea |
---|
| 178 | // fileb and filec |
---|
| 179 | Path harFilea = new Path(harPath, "a"); |
---|
| 180 | Path harFileb = new Path(harPath, "b"); |
---|
| 181 | Path harFilec = new Path(harPath, "c"); |
---|
| 182 | FileSystem harFs = harFilea.getFileSystem(conf); |
---|
| 183 | FSDataInputStream fin = harFs.open(harFilea); |
---|
| 184 | byte[] b = new byte[4]; |
---|
| 185 | int readBytes = fin.read(b); |
---|
| 186 | fin.close(); |
---|
| 187 | assertTrue("strings are equal ", (b[0] == "a".getBytes()[0])); |
---|
| 188 | fin = harFs.open(harFileb); |
---|
| 189 | fin.read(b); |
---|
| 190 | fin.close(); |
---|
| 191 | assertTrue("strings are equal ", (b[0] == "b".getBytes()[0])); |
---|
| 192 | fin = harFs.open(harFilec); |
---|
| 193 | fin.read(b); |
---|
| 194 | fin.close(); |
---|
| 195 | assertTrue("strings are equal ", (b[0] == "c".getBytes()[0])); |
---|
| 196 | // ok all files match |
---|
| 197 | // run a map reduce job |
---|
| 198 | Path outdir = new Path(fs.getHomeDirectory(), "mapout"); |
---|
| 199 | JobConf jobconf = mapred.createJobConf(); |
---|
| 200 | FileInputFormat.addInputPath(jobconf, harPath); |
---|
| 201 | jobconf.setInputFormat(TextInputFormat.class); |
---|
| 202 | jobconf.setOutputFormat(TextOutputFormat.class); |
---|
| 203 | FileOutputFormat.setOutputPath(jobconf, outdir); |
---|
| 204 | jobconf.setMapperClass(TextMapperReducer.class); |
---|
| 205 | jobconf.setMapOutputKeyClass(Text.class); |
---|
| 206 | jobconf.setMapOutputValueClass(Text.class); |
---|
| 207 | jobconf.setReducerClass(TextMapperReducer.class); |
---|
| 208 | jobconf.setNumReduceTasks(1); |
---|
| 209 | JobClient.runJob(jobconf); |
---|
| 210 | args[1] = outdir.toString(); |
---|
| 211 | ret = ToolRunner.run(shell, args); |
---|
| 212 | |
---|
| 213 | FileStatus[] status = fs.globStatus(new Path(outdir, "part*")); |
---|
| 214 | Path reduceFile = status[0].getPath(); |
---|
| 215 | FSDataInputStream reduceIn = fs.open(reduceFile); |
---|
| 216 | b = new byte[6]; |
---|
| 217 | reduceIn.read(b); |
---|
| 218 | //assuming all the 6 bytes were read. |
---|
| 219 | Text readTxt = new Text(b); |
---|
| 220 | assertTrue("a\nb\nc\n".equals(readTxt.toString())); |
---|
| 221 | assertTrue("number of bytes left should be -1", reduceIn.read(b) == -1); |
---|
| 222 | reduceIn.close(); |
---|
| 223 | } |
---|
| 224 | } |
---|