From 8e05dbf2bddce95d5f5a5bae5df61acabf0ba7c5 Mon Sep 17 00:00:00 2001 From: Lei Xu Date: Mon, 2 Nov 2015 18:05:43 -0800 Subject: [PATCH] HDFS-9308. Add truncateMeta() and deleteMeta() to MiniDFSCluster. (Tony Wu via lei) --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 + .../apache/hadoop/hdfs/MiniDFSCluster.java | 22 ++++ .../apache/hadoop/hdfs/TestCrcCorruption.java | 118 +++++------------- .../apache/hadoop/hdfs/TestLeaseRecovery.java | 35 ++++-- 4 files changed, 75 insertions(+), 102 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 3c6054932c..19ea5c1955 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -1661,6 +1661,8 @@ Release 2.8.0 - UNRELEASED HDFS-9312. Fix TestReplication to be FsDataset-agnostic. (lei) + HDFS-9308. Add truncateMeta() and deleteMeta() to MiniDFSCluster. (Tony Wu via lei) + BUG FIXES HDFS-7501. TransactionsSinceLastCheckpoint can be negative on SBNs. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java index 7ebf333947..c81f1541bb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java @@ -2117,6 +2117,28 @@ public void corruptMeta(int i, ExtendedBlock blk) throws IOException { getMaterializedReplica(i, blk).corruptMeta(); } + /** + * Corrupt the metadata of a block by deleting it. + * @param i index of the datanode + * @param blk name of the block. + */ + public void deleteMeta(int i, ExtendedBlock blk) + throws IOException { + getMaterializedReplica(i, blk).deleteMeta(); + } + + /** + * Corrupt the metadata of a block by truncating it to a new size. + * @param i index of the datanode. + * @param blk name of the block. + * @param newSize the new size of the metadata file. + * @throws IOException if any I/O errors. + */ + public void truncateMeta(int i, ExtendedBlock blk, int newSize) + throws IOException { + getMaterializedReplica(i, blk).truncateMeta(newSize); + } + public boolean changeGenStampOfBlock(int dnIndex, ExtendedBlock blk, long newGenStamp) throws IOException { File blockFile = getBlockFile(dnIndex, blk); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestCrcCorruption.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestCrcCorruption.java index 3850ff28fa..398bcc27a7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestCrcCorruption.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestCrcCorruption.java @@ -22,11 +22,8 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; -import java.io.File; import java.io.IOException; -import java.io.RandomAccessFile; -import java.nio.ByteBuffer; -import java.nio.channels.FileChannel; +import java.util.List; import java.util.Random; import org.apache.hadoop.conf.Configuration; @@ -35,12 +32,15 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; -import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; +import org.apache.hadoop.hdfs.server.datanode.DataNode; +import org.apache.hadoop.hdfs.server.datanode.FinalizedReplica; import org.apache.hadoop.io.IOUtils; import org.junit.Before; import org.junit.Test; import org.mockito.Mockito; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * A JUnit test for corrupted file handling. @@ -70,6 +70,8 @@ * replica was created from the non-corrupted replica. */ public class TestCrcCorruption { + public static final Logger LOG = + LoggerFactory.getLogger(TestCrcCorruption.class); private DFSClientFaultInjector faultInjector; @@ -167,90 +169,26 @@ private void thistest(Configuration conf, DFSTestUtil util) throws Exception { // file disallows this Datanode to send data to another datanode. // However, a client is alowed access to this block. // - File storageDir = cluster.getInstanceStorageDir(0, 1); - String bpid = cluster.getNamesystem().getBlockPoolId(); - File data_dir = MiniDFSCluster.getFinalizedDir(storageDir, bpid); - assertTrue("data directory does not exist", data_dir.exists()); - File[] blocks = data_dir.listFiles(); - assertTrue("Blocks do not exist in data-dir", (blocks != null) && (blocks.length > 0)); - int num = 0; - for (int idx = 0; idx < blocks.length; idx++) { - if (blocks[idx].getName().startsWith(Block.BLOCK_FILE_PREFIX) && - blocks[idx].getName().endsWith(".meta")) { - num++; - if (num % 3 == 0) { - // - // remove .meta file - // - System.out.println("Deliberately removing file " + blocks[idx].getName()); - assertTrue("Cannot remove file.", blocks[idx].delete()); - } else if (num % 3 == 1) { - // - // shorten .meta file - // - RandomAccessFile file = new RandomAccessFile(blocks[idx], "rw"); - FileChannel channel = file.getChannel(); - int newsize = random.nextInt((int)channel.size()/2); - System.out.println("Deliberately truncating file " + - blocks[idx].getName() + - " to size " + newsize + " bytes."); - channel.truncate(newsize); - file.close(); - } else { - // - // corrupt a few bytes of the metafile - // - RandomAccessFile file = new RandomAccessFile(blocks[idx], "rw"); - FileChannel channel = file.getChannel(); - long position = 0; - // - // The very first time, corrupt the meta header at offset 0 - // - if (num != 2) { - position = (long)random.nextInt((int)channel.size()); - } - int length = random.nextInt((int)(channel.size() - position + 1)); - byte[] buffer = new byte[length]; - random.nextBytes(buffer); - channel.write(ByteBuffer.wrap(buffer), position); - System.out.println("Deliberately corrupting file " + - blocks[idx].getName() + - " at offset " + position + - " length " + length); - file.close(); - } - } - } - - // - // Now deliberately corrupt all meta blocks from the second - // directory of the first datanode - // - storageDir = cluster.getInstanceStorageDir(0, 1); - data_dir = MiniDFSCluster.getFinalizedDir(storageDir, bpid); - assertTrue("data directory does not exist", data_dir.exists()); - blocks = data_dir.listFiles(); - assertTrue("Blocks do not exist in data-dir", (blocks != null) && (blocks.length > 0)); + final int dnIdx = 0; + final DataNode dn = cluster.getDataNodes().get(dnIdx); + final String bpid = cluster.getNamesystem().getBlockPoolId(); + List replicas = + dn.getFSDataset().getFinalizedBlocks(bpid); + assertTrue("Replicas do not exist", !replicas.isEmpty()); - int count = 0; - File previous = null; - for (int idx = 0; idx < blocks.length; idx++) { - if (blocks[idx].getName().startsWith("blk_") && - blocks[idx].getName().endsWith(".meta")) { - // - // Move the previous metafile into the current one. - // - count++; - if (count % 2 == 0) { - System.out.println("Deliberately insertimg bad crc into files " + - blocks[idx].getName() + " " + previous.getName()); - assertTrue("Cannot remove file.", blocks[idx].delete()); - assertTrue("Cannot corrupt meta file.", previous.renameTo(blocks[idx])); - assertTrue("Cannot recreate empty meta file.", previous.createNewFile()); - previous = null; - } else { - previous = blocks[idx]; - } + for (int idx = 0; idx < replicas.size(); idx++) { + FinalizedReplica replica = replicas.get(idx); + ExtendedBlock eb = new ExtendedBlock(bpid, replica); + if (idx % 3 == 0) { + LOG.info("Deliberately removing meta for block " + eb); + cluster.deleteMeta(dnIdx, eb); + } else if (idx % 3 == 1) { + final int newSize = 2; // bytes + LOG.info("Deliberately truncating meta file for block " + + eb + " to size " + newSize + " bytes."); + cluster.truncateMeta(dnIdx, eb, newSize); + } else { + cluster.corruptMeta(dnIdx, eb); } } @@ -260,7 +198,7 @@ private void thistest(Configuration conf, DFSTestUtil util) throws Exception { // assertTrue("Corrupted replicas not handled properly.", util.checkFiles(fs, "/srcdat")); - System.out.println("All File still have a valid replica"); + LOG.info("All File still have a valid replica"); // // set replication factor back to 1. This causes only one replica of @@ -273,7 +211,7 @@ private void thistest(Configuration conf, DFSTestUtil util) throws Exception { //System.out.println("All Files done with removing replicas"); //assertTrue("Excess replicas deleted. Corrupted replicas found.", // util.checkFiles(fs, "/srcdat")); - System.out.println("The excess-corrupted-replica test is disabled " + + LOG.info("The excess-corrupted-replica test is disabled " + " pending HADOOP-1557"); util.cleanup(fs, "/srcdat"); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java index c9f3842d8b..4ae130df3d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java @@ -20,9 +20,7 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; -import java.io.File; import java.io.IOException; -import java.io.RandomAccessFile; import java.util.EnumSet; import org.apache.hadoop.conf.Configuration; @@ -32,7 +30,6 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties; import org.apache.hadoop.hdfs.protocol.Block; -import org.apache.hadoop.hdfs.protocol.BlockLocalPathInfo; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocol.HdfsConstants; @@ -178,8 +175,9 @@ public void testBlockRecoveryWithLessMetafile() throws Exception { Path file = new Path("/testRecoveryFile"); DistributedFileSystem dfs = cluster.getFileSystem(); FSDataOutputStream out = dfs.create(file); + final int FILE_SIZE = 2 * 1024 * 1024; int count = 0; - while (count < 2 * 1024 * 1024) { + while (count < FILE_SIZE) { out.writeBytes("Data"); count += 4; } @@ -190,15 +188,23 @@ public void testBlockRecoveryWithLessMetafile() throws Exception { LocatedBlocks locations = cluster.getNameNodeRpc().getBlockLocations( file.toString(), 0, count); ExtendedBlock block = locations.get(0).getBlock(); - DataNode dn = cluster.getDataNodes().get(0); - BlockLocalPathInfo localPathInfo = dn.getBlockLocalPathInfo(block, null); - File metafile = new File(localPathInfo.getMetaPath()); - assertTrue(metafile.exists()); - // reduce the block meta file size - RandomAccessFile raf = new RandomAccessFile(metafile, "rw"); - raf.setLength(metafile.length() - 20); - raf.close(); + // Calculate meta file size + // From DataNode.java, checksum size is given by: + // (length of data + BYTE_PER_CHECKSUM - 1)/BYTES_PER_CHECKSUM * + // CHECKSUM_SIZE + final int CHECKSUM_SIZE = 4; // CRC32 & CRC32C + final int bytesPerChecksum = conf.getInt( + DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, + DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_DEFAULT); + final int metaFileSize = + (FILE_SIZE + bytesPerChecksum - 1) / bytesPerChecksum * CHECKSUM_SIZE + + 8; // meta file header is 8 bytes + final int newMetaFileSize = metaFileSize - CHECKSUM_SIZE; + + // Corrupt the block meta file by dropping checksum for bytesPerChecksum + // bytes. Lease recovery is expected to recover the uncorrupted file length. + cluster.truncateMeta(0, block, newMetaFileSize); // restart DN to make replica to RWR DataNodeProperties dnProp = cluster.stopDataNode(0); @@ -213,6 +219,11 @@ public void testBlockRecoveryWithLessMetafile() throws Exception { } assertTrue("File should be closed", newdfs.recoverLease(file)); + // Verify file length after lease recovery. The new file length should not + // include the bytes with corrupted checksum. + final long expectedNewFileLen = FILE_SIZE - bytesPerChecksum; + final long newFileLen = newdfs.getFileStatus(file).getLen(); + assertEquals(newFileLen, expectedNewFileLen); } /**