[120] | 1 | /** |
---|
| 2 | * Licensed to the Apache Software Foundation (ASF) under one |
---|
| 3 | * or more contributor license agreements. See the NOTICE file |
---|
| 4 | * distributed with this work for additional information |
---|
| 5 | * regarding copyright ownership. The ASF licenses this file |
---|
| 6 | * to you under the Apache License, Version 2.0 (the |
---|
| 7 | * "License"); you may not use this file except in compliance |
---|
| 8 | * with the License. You may obtain a copy of the License at |
---|
| 9 | * |
---|
| 10 | * http://www.apache.org/licenses/LICENSE-2.0 |
---|
| 11 | * |
---|
| 12 | * Unless required by applicable law or agreed to in writing, software |
---|
| 13 | * distributed under the License is distributed on an "AS IS" BASIS, |
---|
| 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
---|
| 15 | * See the License for the specific language governing permissions and |
---|
| 16 | * limitations under the License. |
---|
| 17 | */ |
---|
| 18 | package org.apache.hadoop.io.compress; |
---|
| 19 | |
---|
| 20 | import java.io.BufferedInputStream; |
---|
| 21 | import java.io.BufferedOutputStream; |
---|
| 22 | import java.io.DataInputStream; |
---|
| 23 | import java.io.DataOutputStream; |
---|
| 24 | import java.io.IOException; |
---|
| 25 | import java.util.Random; |
---|
| 26 | |
---|
| 27 | import junit.framework.TestCase; |
---|
| 28 | |
---|
| 29 | import org.apache.commons.logging.Log; |
---|
| 30 | import org.apache.commons.logging.LogFactory; |
---|
| 31 | import org.apache.hadoop.conf.Configuration; |
---|
| 32 | import org.apache.hadoop.fs.FSDataOutputStream; |
---|
| 33 | import org.apache.hadoop.fs.FileSystem; |
---|
| 34 | import org.apache.hadoop.fs.Path; |
---|
| 35 | import org.apache.hadoop.io.DataInputBuffer; |
---|
| 36 | import org.apache.hadoop.io.DataOutputBuffer; |
---|
| 37 | import org.apache.hadoop.io.RandomDatum; |
---|
| 38 | import org.apache.hadoop.io.SequenceFile; |
---|
| 39 | import org.apache.hadoop.io.Text; |
---|
| 40 | import org.apache.hadoop.io.Writable; |
---|
| 41 | import org.apache.hadoop.util.ReflectionUtils; |
---|
| 42 | import org.apache.hadoop.io.SequenceFile.CompressionType; |
---|
| 43 | import org.apache.hadoop.io.compress.CompressionOutputStream; |
---|
| 44 | import org.apache.hadoop.io.compress.zlib.ZlibFactory; |
---|
| 45 | |
---|
| 46 | public class TestCodec extends TestCase { |
---|
| 47 | |
---|
| 48 | private static final Log LOG= |
---|
| 49 | LogFactory.getLog(TestCodec.class); |
---|
| 50 | |
---|
| 51 | private Configuration conf = new Configuration(); |
---|
| 52 | private int count = 10000; |
---|
| 53 | private int seed = new Random().nextInt(); |
---|
| 54 | |
---|
| 55 | public void testDefaultCodec() throws IOException { |
---|
| 56 | codecTest(conf, seed, 0, "org.apache.hadoop.io.compress.DefaultCodec"); |
---|
| 57 | codecTest(conf, seed, count, "org.apache.hadoop.io.compress.DefaultCodec"); |
---|
| 58 | } |
---|
| 59 | |
---|
| 60 | public void testGzipCodec() throws IOException { |
---|
| 61 | codecTest(conf, seed, 0, "org.apache.hadoop.io.compress.GzipCodec"); |
---|
| 62 | codecTest(conf, seed, count, "org.apache.hadoop.io.compress.GzipCodec"); |
---|
| 63 | } |
---|
| 64 | |
---|
| 65 | public void testBZip2Codec() throws IOException { |
---|
| 66 | codecTest(conf, seed, 0, "org.apache.hadoop.io.compress.BZip2Codec"); |
---|
| 67 | codecTest(conf, seed, count, "org.apache.hadoop.io.compress.BZip2Codec"); |
---|
| 68 | } |
---|
| 69 | |
---|
| 70 | private static void codecTest(Configuration conf, int seed, int count, |
---|
| 71 | String codecClass) |
---|
| 72 | throws IOException { |
---|
| 73 | |
---|
| 74 | // Create the codec |
---|
| 75 | CompressionCodec codec = null; |
---|
| 76 | try { |
---|
| 77 | codec = (CompressionCodec) |
---|
| 78 | ReflectionUtils.newInstance(conf.getClassByName(codecClass), conf); |
---|
| 79 | } catch (ClassNotFoundException cnfe) { |
---|
| 80 | throw new IOException("Illegal codec!"); |
---|
| 81 | } |
---|
| 82 | LOG.info("Created a Codec object of type: " + codecClass); |
---|
| 83 | |
---|
| 84 | // Generate data |
---|
| 85 | DataOutputBuffer data = new DataOutputBuffer(); |
---|
| 86 | RandomDatum.Generator generator = new RandomDatum.Generator(seed); |
---|
| 87 | for(int i=0; i < count; ++i) { |
---|
| 88 | generator.next(); |
---|
| 89 | RandomDatum key = generator.getKey(); |
---|
| 90 | RandomDatum value = generator.getValue(); |
---|
| 91 | |
---|
| 92 | key.write(data); |
---|
| 93 | value.write(data); |
---|
| 94 | } |
---|
| 95 | DataInputBuffer originalData = new DataInputBuffer(); |
---|
| 96 | DataInputStream originalIn = new DataInputStream(new BufferedInputStream(originalData)); |
---|
| 97 | originalData.reset(data.getData(), 0, data.getLength()); |
---|
| 98 | |
---|
| 99 | LOG.info("Generated " + count + " records"); |
---|
| 100 | |
---|
| 101 | // Compress data |
---|
| 102 | DataOutputBuffer compressedDataBuffer = new DataOutputBuffer(); |
---|
| 103 | CompressionOutputStream deflateFilter = |
---|
| 104 | codec.createOutputStream(compressedDataBuffer); |
---|
| 105 | DataOutputStream deflateOut = |
---|
| 106 | new DataOutputStream(new BufferedOutputStream(deflateFilter)); |
---|
| 107 | deflateOut.write(data.getData(), 0, data.getLength()); |
---|
| 108 | deflateOut.flush(); |
---|
| 109 | deflateFilter.finish(); |
---|
| 110 | LOG.info("Finished compressing data"); |
---|
| 111 | |
---|
| 112 | // De-compress data |
---|
| 113 | DataInputBuffer deCompressedDataBuffer = new DataInputBuffer(); |
---|
| 114 | deCompressedDataBuffer.reset(compressedDataBuffer.getData(), 0, |
---|
| 115 | compressedDataBuffer.getLength()); |
---|
| 116 | CompressionInputStream inflateFilter = |
---|
| 117 | codec.createInputStream(deCompressedDataBuffer); |
---|
| 118 | DataInputStream inflateIn = |
---|
| 119 | new DataInputStream(new BufferedInputStream(inflateFilter)); |
---|
| 120 | |
---|
| 121 | // Check |
---|
| 122 | for(int i=0; i < count; ++i) { |
---|
| 123 | RandomDatum k1 = new RandomDatum(); |
---|
| 124 | RandomDatum v1 = new RandomDatum(); |
---|
| 125 | k1.readFields(originalIn); |
---|
| 126 | v1.readFields(originalIn); |
---|
| 127 | |
---|
| 128 | RandomDatum k2 = new RandomDatum(); |
---|
| 129 | RandomDatum v2 = new RandomDatum(); |
---|
| 130 | k2.readFields(inflateIn); |
---|
| 131 | v2.readFields(inflateIn); |
---|
| 132 | } |
---|
| 133 | LOG.info("SUCCESS! Completed checking " + count + " records"); |
---|
| 134 | } |
---|
| 135 | |
---|
| 136 | public void testCodecPoolGzipReuse() throws Exception { |
---|
| 137 | Configuration conf = new Configuration(); |
---|
| 138 | conf.setBoolean("hadoop.native.lib", true); |
---|
| 139 | if (!ZlibFactory.isNativeZlibLoaded(conf)) { |
---|
| 140 | LOG.warn("testCodecPoolGzipReuse skipped: native libs not loaded"); |
---|
| 141 | return; |
---|
| 142 | } |
---|
| 143 | GzipCodec gzc = ReflectionUtils.newInstance(GzipCodec.class, conf); |
---|
| 144 | DefaultCodec dfc = ReflectionUtils.newInstance(DefaultCodec.class, conf); |
---|
| 145 | Compressor c1 = CodecPool.getCompressor(gzc); |
---|
| 146 | Compressor c2 = CodecPool.getCompressor(dfc); |
---|
| 147 | CodecPool.returnCompressor(c1); |
---|
| 148 | CodecPool.returnCompressor(c2); |
---|
| 149 | assertTrue("Got mismatched ZlibCompressor", c2 != CodecPool.getCompressor(gzc)); |
---|
| 150 | } |
---|
| 151 | |
---|
| 152 | public void testSequenceFileDefaultCodec() throws IOException, ClassNotFoundException, |
---|
| 153 | InstantiationException, IllegalAccessException { |
---|
| 154 | sequenceFileCodecTest(conf, 100, "org.apache.hadoop.io.compress.DefaultCodec", 100); |
---|
| 155 | sequenceFileCodecTest(conf, 200000, "org.apache.hadoop.io.compress.DefaultCodec", 1000000); |
---|
| 156 | } |
---|
| 157 | |
---|
| 158 | public void testSequenceFileBZip2Codec() throws IOException, ClassNotFoundException, |
---|
| 159 | InstantiationException, IllegalAccessException { |
---|
| 160 | sequenceFileCodecTest(conf, 0, "org.apache.hadoop.io.compress.BZip2Codec", 100); |
---|
| 161 | sequenceFileCodecTest(conf, 100, "org.apache.hadoop.io.compress.BZip2Codec", 100); |
---|
| 162 | sequenceFileCodecTest(conf, 200000, "org.apache.hadoop.io.compress.BZip2Codec", 1000000); |
---|
| 163 | } |
---|
| 164 | |
---|
| 165 | private static void sequenceFileCodecTest(Configuration conf, int lines, |
---|
| 166 | String codecClass, int blockSize) |
---|
| 167 | throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException { |
---|
| 168 | |
---|
| 169 | Path filePath = new Path("SequenceFileCodecTest." + codecClass); |
---|
| 170 | // Configuration |
---|
| 171 | conf.setInt("io.seqfile.compress.blocksize", blockSize); |
---|
| 172 | |
---|
| 173 | // Create the SequenceFile |
---|
| 174 | FileSystem fs = FileSystem.get(conf); |
---|
| 175 | LOG.info("Creating SequenceFile with codec \"" + codecClass + "\""); |
---|
| 176 | SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, filePath, |
---|
| 177 | Text.class, Text.class, CompressionType.BLOCK, |
---|
| 178 | (CompressionCodec)Class.forName(codecClass).newInstance()); |
---|
| 179 | |
---|
| 180 | // Write some data |
---|
| 181 | LOG.info("Writing to SequenceFile..."); |
---|
| 182 | for (int i=0; i<lines; i++) { |
---|
| 183 | Text key = new Text("key" + i); |
---|
| 184 | Text value = new Text("value" + i); |
---|
| 185 | writer.append(key, value); |
---|
| 186 | } |
---|
| 187 | writer.close(); |
---|
| 188 | |
---|
| 189 | // Read the data back and check |
---|
| 190 | LOG.info("Reading from the SequenceFile..."); |
---|
| 191 | SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, conf); |
---|
| 192 | |
---|
| 193 | Writable key = (Writable)reader.getKeyClass().newInstance(); |
---|
| 194 | Writable value = (Writable)reader.getValueClass().newInstance(); |
---|
| 195 | |
---|
| 196 | int lc = 0; |
---|
| 197 | try { |
---|
| 198 | while (reader.next(key, value)) { |
---|
| 199 | assertEquals("key" + lc, key.toString()); |
---|
| 200 | assertEquals("value" + lc, value.toString()); |
---|
| 201 | lc ++; |
---|
| 202 | } |
---|
| 203 | } finally { |
---|
| 204 | reader.close(); |
---|
| 205 | } |
---|
| 206 | assertEquals(lines, lc); |
---|
| 207 | |
---|
| 208 | // Delete temporary files |
---|
| 209 | fs.delete(filePath, false); |
---|
| 210 | |
---|
| 211 | LOG.info("SUCCESS! Completed SequenceFileCodecTest with codec \"" + codecClass + "\""); |
---|
| 212 | } |
---|
| 213 | |
---|
| 214 | public static void main(String[] args) { |
---|
| 215 | int count = 10000; |
---|
| 216 | String codecClass = "org.apache.hadoop.io.compress.DefaultCodec"; |
---|
| 217 | |
---|
| 218 | String usage = "TestCodec [-count N] [-codec <codec class>]"; |
---|
| 219 | if (args.length == 0) { |
---|
| 220 | System.err.println(usage); |
---|
| 221 | System.exit(-1); |
---|
| 222 | } |
---|
| 223 | |
---|
| 224 | try { |
---|
| 225 | for (int i=0; i < args.length; ++i) { // parse command line |
---|
| 226 | if (args[i] == null) { |
---|
| 227 | continue; |
---|
| 228 | } else if (args[i].equals("-count")) { |
---|
| 229 | count = Integer.parseInt(args[++i]); |
---|
| 230 | } else if (args[i].equals("-codec")) { |
---|
| 231 | codecClass = args[++i]; |
---|
| 232 | } |
---|
| 233 | } |
---|
| 234 | |
---|
| 235 | Configuration conf = new Configuration(); |
---|
| 236 | int seed = 0; |
---|
| 237 | codecTest(conf, seed, count, codecClass); |
---|
| 238 | } catch (Exception e) { |
---|
| 239 | System.err.println("Caught: " + e); |
---|
| 240 | e.printStackTrace(); |
---|
| 241 | } |
---|
| 242 | |
---|
| 243 | } |
---|
| 244 | |
---|
| 245 | public TestCodec(String name) { |
---|
| 246 | super(name); |
---|
| 247 | } |
---|
| 248 | |
---|
| 249 | } |
---|