source: proiecte/HadoopJUnit/hadoop-0.20.1/src/test/org/apache/hadoop/mapred/TestMultiFileInputFormat.java @ 120

Last change on this file since 120 was 120, checked in by (none), 14 years ago

Added the mail files for the Hadoop JUNit Project

  • Property svn:executable set to *
File size: 5.3 KB
Line 
1/**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements.  See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership.  The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License.  You may obtain a copy of the License at
9 *
10 *     http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18package org.apache.hadoop.mapred;
19
20import java.io.IOException;
21import java.util.BitSet;
22import java.util.HashMap;
23import java.util.Random;
24
25import junit.framework.TestCase;
26
27import org.apache.commons.logging.Log;
28import org.apache.commons.logging.LogFactory;
29import org.apache.hadoop.fs.FSDataOutputStream;
30import org.apache.hadoop.fs.FileSystem;
31import org.apache.hadoop.fs.Path;
32import org.apache.hadoop.io.Text;
33
34public class TestMultiFileInputFormat extends TestCase{
35
36  private static JobConf job = new JobConf();
37
38  private static final Log LOG = LogFactory.getLog(TestMultiFileInputFormat.class);
39 
40  private static final int MAX_SPLIT_COUNT  = 10000;
41  private static final int SPLIT_COUNT_INCR = 6000;
42  private static final int MAX_BYTES = 1024;
43  private static final int MAX_NUM_FILES = 10000;
44  private static final int NUM_FILES_INCR = 8000;
45 
46  private Random rand = new Random(System.currentTimeMillis());
47  private HashMap<String, Long> lengths = new HashMap<String, Long>();
48 
49  /** Dummy class to extend MultiFileInputFormat*/
50  private class DummyMultiFileInputFormat extends MultiFileInputFormat<Text, Text> {
51    @Override
52    public RecordReader<Text,Text> getRecordReader(InputSplit split, JobConf job
53        , Reporter reporter) throws IOException {
54      return null;
55    }
56  }
57 
58  private Path initFiles(FileSystem fs, int numFiles, int numBytes) throws IOException{
59    Path dir = new Path(System.getProperty("test.build.data",".") + "/mapred");
60    Path multiFileDir = new Path(dir, "test.multifile");
61    fs.delete(multiFileDir, true);
62    fs.mkdirs(multiFileDir);
63    LOG.info("Creating " + numFiles + " file(s) in " + multiFileDir);
64    for(int i=0; i<numFiles ;i++) {
65      Path path = new Path(multiFileDir, "file_" + i);
66       FSDataOutputStream out = fs.create(path);
67       if (numBytes == -1) {
68         numBytes = rand.nextInt(MAX_BYTES);
69       }
70       for(int j=0; j< numBytes; j++) {
71         out.write(rand.nextInt());
72       }
73       out.close();
74       if(LOG.isDebugEnabled()) {
75         LOG.debug("Created file " + path + " with length " + numBytes);
76       }
77       lengths.put(path.getName(), new Long(numBytes));
78    }
79    FileInputFormat.setInputPaths(job, multiFileDir);
80    return multiFileDir;
81  }
82 
83  public void testFormat() throws IOException {
84    if(LOG.isInfoEnabled()) {
85      LOG.info("Test started");
86      LOG.info("Max split count           = " + MAX_SPLIT_COUNT);
87      LOG.info("Split count increment     = " + SPLIT_COUNT_INCR);
88      LOG.info("Max bytes per file        = " + MAX_BYTES);
89      LOG.info("Max number of files       = " + MAX_NUM_FILES);
90      LOG.info("Number of files increment = " + NUM_FILES_INCR);
91    }
92   
93    MultiFileInputFormat<Text,Text> format = new DummyMultiFileInputFormat();
94    FileSystem fs = FileSystem.getLocal(job);
95   
96    for(int numFiles = 1; numFiles< MAX_NUM_FILES ; 
97      numFiles+= (NUM_FILES_INCR / 2) + rand.nextInt(NUM_FILES_INCR / 2)) {
98     
99      Path dir = initFiles(fs, numFiles, -1);
100      BitSet bits = new BitSet(numFiles);
101      for(int i=1;i< MAX_SPLIT_COUNT ;i+= rand.nextInt(SPLIT_COUNT_INCR) + 1) {
102        LOG.info("Running for Num Files=" + numFiles + ", split count=" + i);
103       
104        MultiFileSplit[] splits = (MultiFileSplit[])format.getSplits(job, i);
105        bits.clear();
106       
107        for(MultiFileSplit split : splits) {
108          long splitLength = 0;
109          for(Path p : split.getPaths()) {
110            long length = fs.getContentSummary(p).getLength();
111            assertEquals(length, lengths.get(p.getName()).longValue());
112            splitLength += length;
113            String name = p.getName();
114            int index = Integer.parseInt(
115                name.substring(name.lastIndexOf("file_") + 5));
116            assertFalse(bits.get(index));
117            bits.set(index);
118          }
119          assertEquals(splitLength, split.getLength());
120        }
121      }
122      assertEquals(bits.cardinality(), numFiles);
123      fs.delete(dir, true);
124    }
125    LOG.info("Test Finished");
126  }
127 
128  public void testFormatWithLessPathsThanSplits() throws Exception {
129    MultiFileInputFormat<Text,Text> format = new DummyMultiFileInputFormat();
130    FileSystem fs = FileSystem.getLocal(job);     
131   
132    // Test with no path
133    initFiles(fs, 0, -1);   
134    assertEquals(0, format.getSplits(job, 2).length);
135   
136    // Test with 2 path and 4 splits
137    initFiles(fs, 2, 500);
138    assertEquals(2, format.getSplits(job, 4).length);
139  }
140 
141  public static void main(String[] args) throws Exception{
142    TestMultiFileInputFormat test = new TestMultiFileInputFormat();
143    test.testFormat();
144  }
145}
Note: See TracBrowser for help on using the repository browser.