source: proiecte/HadoopJUnit/hadoop-0.20.1/src/test/org/apache/hadoop/hdfs/TestCrcCorruption.java @ 120

Last change on this file since 120 was 120, checked in by (none), 14 years ago

Added the mail files for the Hadoop JUNit Project

  • Property svn:executable set to *
File size: 8.8 KB
Line 
1/**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements.  See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership.  The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License.  You may obtain a copy of the License at
9 *
10 *     http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19package org.apache.hadoop.hdfs;
20
21import java.io.*;
22import java.nio.channels.FileChannel;
23import java.nio.ByteBuffer;
24import java.util.Random;
25import junit.framework.*;
26import org.apache.hadoop.conf.Configuration;
27import org.apache.hadoop.fs.FileSystem;
28import org.apache.hadoop.fs.LocalFileSystem;
29import org.apache.hadoop.fs.ChecksumException;
30import org.apache.hadoop.fs.Path;
31
32/**
33 * A JUnit test for corrupted file handling.
34 * This test creates a bunch of files/directories with replication
35 * factor of 2. Then verifies that a client can automatically
36 * access the remaining valid replica inspite of the following
37 * types of simulated errors:
38 *
39 *  1. Delete meta file on one replica
40 *  2. Truncates meta file on one replica
41 *  3. Corrupts the meta file header on one replica
42 *  4. Corrupts any random offset and portion of the meta file
43 *  5. Swaps two meta files, i.e the format of the meta files
44 *     are valid but their CRCs do not match with their corresponding
45 *     data blocks
46 * The above tests are run for varied values of io.bytes.per.checksum
47 * and dfs.block.size. It tests for the case when the meta file is
48 * multiple blocks.
49 *
50 * Another portion of the test is commented out till HADOOP-1557
51 * is addressed:
52 *  1. Create file with 2 replica, corrupt the meta file of replica,
53 *     decrease replication factor from 2 to 1. Validate that the
54 *     remaining replica is the good one.
55 *  2. Create file with 2 replica, corrupt the meta file of one replica,
56 *     increase replication factor of file to 3. verify that the new
57 *     replica was created from the non-corrupted replica.
58 */
59public class TestCrcCorruption extends TestCase {
60 
61  public TestCrcCorruption(String testName) {
62    super(testName);
63  }
64
65  protected void setUp() throws Exception {
66  }
67
68  protected void tearDown() throws Exception {
69  }
70 
71  /**
72   * check if DFS can handle corrupted CRC blocks
73   */
74  private void thistest(Configuration conf, DFSTestUtil util) throws Exception {
75    MiniDFSCluster cluster = null;
76    int numDataNodes = 2;
77    short replFactor = 2;
78    Random random = new Random();
79
80    try {
81      cluster = new MiniDFSCluster(conf, numDataNodes, true, null);
82      cluster.waitActive();
83      FileSystem fs = cluster.getFileSystem();
84      util.createFiles(fs, "/srcdat", replFactor);
85      util.waitReplication(fs, "/srcdat", (short)2);
86
87      // Now deliberately remove/truncate meta blocks from the first
88      // directory of the first datanode. The complete absense of a meta
89      // file disallows this Datanode to send data to another datanode.
90      // However, a client is alowed access to this block.
91      //
92      File data_dir = new File(System.getProperty("test.build.data"),
93                               "dfs/data/data1/current");
94      assertTrue("data directory does not exist", data_dir.exists());
95      File[] blocks = data_dir.listFiles();
96      assertTrue("Blocks do not exist in data-dir", (blocks != null) && (blocks.length > 0));
97      int num = 0;
98      for (int idx = 0; idx < blocks.length; idx++) {
99        if (blocks[idx].getName().startsWith("blk_") &&
100            blocks[idx].getName().endsWith(".meta")) {
101          num++;
102          if (num % 3 == 0) {
103            //
104            // remove .meta file
105            //
106            System.out.println("Deliberately removing file " + blocks[idx].getName());
107            assertTrue("Cannot remove file.", blocks[idx].delete());
108          } else if (num % 3 == 1) {
109            //
110            // shorten .meta file
111            //
112            RandomAccessFile file = new RandomAccessFile(blocks[idx], "rw");
113            FileChannel channel = file.getChannel();
114            int newsize = random.nextInt((int)channel.size()/2);
115            System.out.println("Deliberately truncating file " + 
116                               blocks[idx].getName() + 
117                               " to size " + newsize + " bytes.");
118            channel.truncate(newsize);
119            file.close();
120          } else {
121            //
122            // corrupt a few bytes of the metafile
123            //
124            RandomAccessFile file = new RandomAccessFile(blocks[idx], "rw");
125            FileChannel channel = file.getChannel();
126            long position = 0;
127            //
128            // The very first time, corrupt the meta header at offset 0
129            //
130            if (num != 2) {
131              position = (long)random.nextInt((int)channel.size());
132            }
133            int length = random.nextInt((int)(channel.size() - position + 1));
134            byte[] buffer = new byte[length];
135            random.nextBytes(buffer);
136            channel.write(ByteBuffer.wrap(buffer), position);
137            System.out.println("Deliberately corrupting file " + 
138                               blocks[idx].getName() + 
139                               " at offset " + position +
140                               " length " + length);
141            file.close();
142          }
143        }
144      }
145      //
146      // Now deliberately corrupt all meta blocks from the second
147      // directory of the first datanode
148      //
149      data_dir = new File(System.getProperty("test.build.data"),
150                               "dfs/data/data2/current");
151      assertTrue("data directory does not exist", data_dir.exists());
152      blocks = data_dir.listFiles();
153      assertTrue("Blocks do not exist in data-dir", (blocks != null) && (blocks.length > 0));
154
155      int count = 0;
156      File previous = null;
157      for (int idx = 0; idx < blocks.length; idx++) {
158        if (blocks[idx].getName().startsWith("blk_") &&
159            blocks[idx].getName().endsWith(".meta")) {
160          //
161          // Move the previous metafile into the current one.
162          //
163          count++;
164          if (count % 2 == 0) {
165            System.out.println("Deliberately insertimg bad crc into files " +
166                                blocks[idx].getName() + " " + previous.getName());
167            assertTrue("Cannot remove file.", blocks[idx].delete());
168            assertTrue("Cannot corrupt meta file.", previous.renameTo(blocks[idx]));
169            assertTrue("Cannot recreate empty meta file.", previous.createNewFile());
170            previous = null;
171          } else {
172            previous = blocks[idx];
173          }
174        }
175      }
176
177      //
178      // Only one replica is possibly corrupted. The other replica should still
179      // be good. Verify.
180      //
181      assertTrue("Corrupted replicas not handled properly.",
182                 util.checkFiles(fs, "/srcdat"));
183      System.out.println("All File still have a valid replica");
184
185      //
186      // set replication factor back to 1. This causes only one replica of
187      // of each block to remain in HDFS. The check is to make sure that
188      // the corrupted replica generated above is the one that gets deleted.
189      // This test is currently disabled until HADOOP-1557 is solved.
190      //
191      util.setReplication(fs, "/srcdat", (short)1);
192      //util.waitReplication(fs, "/srcdat", (short)1);
193      //System.out.println("All Files done with removing replicas");
194      //assertTrue("Excess replicas deleted. Corrupted replicas found.",
195      //           util.checkFiles(fs, "/srcdat"));
196      System.out.println("The excess-corrupted-replica test is disabled " +
197                         " pending HADOOP-1557");
198
199      util.cleanup(fs, "/srcdat");
200    } finally {
201      if (cluster != null) { cluster.shutdown(); }
202    }
203  }
204
205  public void testCrcCorruption() throws Exception {
206    //
207    // default parameters
208    //
209    System.out.println("TestCrcCorruption with default parameters");
210    Configuration conf1 = new Configuration();
211    conf1.setInt("dfs.blockreport.intervalMsec", 3 * 1000);
212    DFSTestUtil util1 = new DFSTestUtil("TestCrcCorruption", 40, 3, 8*1024);
213    thistest(conf1, util1);
214
215    //
216    // specific parameters
217    //
218    System.out.println("TestCrcCorruption with specific parameters");
219    Configuration conf2 = new Configuration();
220    conf2.setInt("io.bytes.per.checksum", 17);
221    conf2.setInt("dfs.block.size", 34);
222    DFSTestUtil util2 = new DFSTestUtil("TestCrcCorruption", 40, 3, 400);
223    thistest(conf2, util2);
224  }
225}
Note: See TracBrowser for help on using the repository browser.