source: proiecte/HadoopJUnit/hadoop-0.20.1/src/test/org/apache/hadoop/io/compress/TestCodec.java @ 120

Last change on this file since 120 was 120, checked in by (none), 14 years ago

Added the mail files for the Hadoop JUNit Project

  • Property svn:executable set to *
File size: 9.0 KB
Line 
1/**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements.  See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership.  The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License.  You may obtain a copy of the License at
9 *
10 *     http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18package org.apache.hadoop.io.compress;
19
20import java.io.BufferedInputStream;
21import java.io.BufferedOutputStream;
22import java.io.DataInputStream;
23import java.io.DataOutputStream;
24import java.io.IOException;
25import java.util.Random;
26
27import junit.framework.TestCase;
28
29import org.apache.commons.logging.Log;
30import org.apache.commons.logging.LogFactory;
31import org.apache.hadoop.conf.Configuration;
32import org.apache.hadoop.fs.FSDataOutputStream;
33import org.apache.hadoop.fs.FileSystem;
34import org.apache.hadoop.fs.Path;
35import org.apache.hadoop.io.DataInputBuffer;
36import org.apache.hadoop.io.DataOutputBuffer;
37import org.apache.hadoop.io.RandomDatum;
38import org.apache.hadoop.io.SequenceFile;
39import org.apache.hadoop.io.Text;
40import org.apache.hadoop.io.Writable;
41import org.apache.hadoop.util.ReflectionUtils;
42import org.apache.hadoop.io.SequenceFile.CompressionType;
43import org.apache.hadoop.io.compress.CompressionOutputStream;
44import org.apache.hadoop.io.compress.zlib.ZlibFactory;
45
46public class TestCodec extends TestCase {
47
48  private static final Log LOG= 
49    LogFactory.getLog(TestCodec.class);
50
51  private Configuration conf = new Configuration();
52  private int count = 10000;
53  private int seed = new Random().nextInt();
54 
55  public void testDefaultCodec() throws IOException {
56    codecTest(conf, seed, 0, "org.apache.hadoop.io.compress.DefaultCodec");
57    codecTest(conf, seed, count, "org.apache.hadoop.io.compress.DefaultCodec");
58  }
59 
60  public void testGzipCodec() throws IOException {
61    codecTest(conf, seed, 0, "org.apache.hadoop.io.compress.GzipCodec");
62    codecTest(conf, seed, count, "org.apache.hadoop.io.compress.GzipCodec");
63  }
64 
65  public void testBZip2Codec() throws IOException {
66    codecTest(conf, seed, 0, "org.apache.hadoop.io.compress.BZip2Codec");
67    codecTest(conf, seed, count, "org.apache.hadoop.io.compress.BZip2Codec");
68  }
69
70  private static void codecTest(Configuration conf, int seed, int count, 
71                                String codecClass) 
72    throws IOException {
73   
74    // Create the codec
75    CompressionCodec codec = null;
76    try {
77      codec = (CompressionCodec)
78        ReflectionUtils.newInstance(conf.getClassByName(codecClass), conf);
79    } catch (ClassNotFoundException cnfe) {
80      throw new IOException("Illegal codec!");
81    }
82    LOG.info("Created a Codec object of type: " + codecClass);
83
84    // Generate data
85    DataOutputBuffer data = new DataOutputBuffer();
86    RandomDatum.Generator generator = new RandomDatum.Generator(seed);
87    for(int i=0; i < count; ++i) {
88      generator.next();
89      RandomDatum key = generator.getKey();
90      RandomDatum value = generator.getValue();
91     
92      key.write(data);
93      value.write(data);
94    }
95    DataInputBuffer originalData = new DataInputBuffer();
96    DataInputStream originalIn = new DataInputStream(new BufferedInputStream(originalData));
97    originalData.reset(data.getData(), 0, data.getLength());
98   
99    LOG.info("Generated " + count + " records");
100   
101    // Compress data
102    DataOutputBuffer compressedDataBuffer = new DataOutputBuffer();
103    CompressionOutputStream deflateFilter = 
104      codec.createOutputStream(compressedDataBuffer);
105    DataOutputStream deflateOut = 
106      new DataOutputStream(new BufferedOutputStream(deflateFilter));
107    deflateOut.write(data.getData(), 0, data.getLength());
108    deflateOut.flush();
109    deflateFilter.finish();
110    LOG.info("Finished compressing data");
111   
112    // De-compress data
113    DataInputBuffer deCompressedDataBuffer = new DataInputBuffer();
114    deCompressedDataBuffer.reset(compressedDataBuffer.getData(), 0, 
115                                 compressedDataBuffer.getLength());
116    CompressionInputStream inflateFilter = 
117      codec.createInputStream(deCompressedDataBuffer);
118    DataInputStream inflateIn = 
119      new DataInputStream(new BufferedInputStream(inflateFilter));
120
121    // Check
122    for(int i=0; i < count; ++i) {
123      RandomDatum k1 = new RandomDatum();
124      RandomDatum v1 = new RandomDatum();
125      k1.readFields(originalIn);
126      v1.readFields(originalIn);
127     
128      RandomDatum k2 = new RandomDatum();
129      RandomDatum v2 = new RandomDatum();
130      k2.readFields(inflateIn);
131      v2.readFields(inflateIn);
132    }
133    LOG.info("SUCCESS! Completed checking " + count + " records");
134  }
135
136  public void testCodecPoolGzipReuse() throws Exception {
137    Configuration conf = new Configuration();
138    conf.setBoolean("hadoop.native.lib", true);
139    if (!ZlibFactory.isNativeZlibLoaded(conf)) {
140      LOG.warn("testCodecPoolGzipReuse skipped: native libs not loaded");
141      return;
142    }
143    GzipCodec gzc = ReflectionUtils.newInstance(GzipCodec.class, conf);
144    DefaultCodec dfc = ReflectionUtils.newInstance(DefaultCodec.class, conf);
145    Compressor c1 = CodecPool.getCompressor(gzc);
146    Compressor c2 = CodecPool.getCompressor(dfc);
147    CodecPool.returnCompressor(c1);
148    CodecPool.returnCompressor(c2);
149    assertTrue("Got mismatched ZlibCompressor", c2 != CodecPool.getCompressor(gzc));
150  }
151
152  public void testSequenceFileDefaultCodec() throws IOException, ClassNotFoundException, 
153      InstantiationException, IllegalAccessException {
154    sequenceFileCodecTest(conf, 100, "org.apache.hadoop.io.compress.DefaultCodec", 100);
155    sequenceFileCodecTest(conf, 200000, "org.apache.hadoop.io.compress.DefaultCodec", 1000000);
156  }
157 
158  public void testSequenceFileBZip2Codec() throws IOException, ClassNotFoundException, 
159      InstantiationException, IllegalAccessException {
160    sequenceFileCodecTest(conf, 0, "org.apache.hadoop.io.compress.BZip2Codec", 100);
161    sequenceFileCodecTest(conf, 100, "org.apache.hadoop.io.compress.BZip2Codec", 100);
162    sequenceFileCodecTest(conf, 200000, "org.apache.hadoop.io.compress.BZip2Codec", 1000000);
163  }
164 
165  private static void sequenceFileCodecTest(Configuration conf, int lines, 
166                                String codecClass, int blockSize) 
167    throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException {
168
169    Path filePath = new Path("SequenceFileCodecTest." + codecClass);
170    // Configuration
171    conf.setInt("io.seqfile.compress.blocksize", blockSize);
172   
173    // Create the SequenceFile
174    FileSystem fs = FileSystem.get(conf);
175    LOG.info("Creating SequenceFile with codec \"" + codecClass + "\"");
176    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, filePath, 
177        Text.class, Text.class, CompressionType.BLOCK, 
178        (CompressionCodec)Class.forName(codecClass).newInstance());
179   
180    // Write some data
181    LOG.info("Writing to SequenceFile...");
182    for (int i=0; i<lines; i++) {
183      Text key = new Text("key" + i);
184      Text value = new Text("value" + i);
185      writer.append(key, value);
186    }
187    writer.close();
188   
189    // Read the data back and check
190    LOG.info("Reading from the SequenceFile...");
191    SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, conf);
192   
193    Writable key = (Writable)reader.getKeyClass().newInstance();
194    Writable value = (Writable)reader.getValueClass().newInstance();
195   
196    int lc = 0;
197    try {
198      while (reader.next(key, value)) {
199        assertEquals("key" + lc, key.toString());
200        assertEquals("value" + lc, value.toString());
201        lc ++;
202      }
203    } finally {
204      reader.close();
205    }
206    assertEquals(lines, lc);
207
208    // Delete temporary files
209    fs.delete(filePath, false);
210
211    LOG.info("SUCCESS! Completed SequenceFileCodecTest with codec \"" + codecClass + "\"");
212  }
213 
214  public static void main(String[] args) {
215    int count = 10000;
216    String codecClass = "org.apache.hadoop.io.compress.DefaultCodec";
217
218    String usage = "TestCodec [-count N] [-codec <codec class>]";
219    if (args.length == 0) {
220      System.err.println(usage);
221      System.exit(-1);
222    }
223
224    try {
225      for (int i=0; i < args.length; ++i) {       // parse command line
226        if (args[i] == null) {
227          continue;
228        } else if (args[i].equals("-count")) {
229          count = Integer.parseInt(args[++i]);
230        } else if (args[i].equals("-codec")) {
231          codecClass = args[++i];
232        }
233      }
234
235      Configuration conf = new Configuration();
236      int seed = 0;
237      codecTest(conf, seed, count, codecClass);
238    } catch (Exception e) {
239      System.err.println("Caught: " + e);
240      e.printStackTrace();
241    }
242   
243  }
244
245  public TestCodec(String name) {
246    super(name);
247  }
248
249}
Note: See TracBrowser for help on using the repository browser.