From 12645ff6c06bc514930177a78610017897e2e0b7 Mon Sep 17 00:00:00 2001 From: Todd Lipcon Date: Mon, 23 Jan 2012 18:11:09 +0000 Subject: [PATCH] HDFS-2822. processMisReplicatedBlock incorrectly identifies under-construction blocks as under-replicated. Contributed by Todd Lipcon. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1234925 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++ .../server/blockmanagement/BlockManager.java | 10 +++- .../org/apache/hadoop/hdfs/TestSafeMode.java | 49 +++++++++++++++++++ 3 files changed, 61 insertions(+), 1 deletion(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 99bb920b95..86b2bc3238 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -350,6 +350,9 @@ Release 0.23.1 - UNRELEASED HDFS-2816. Fix missing license header in httpfs findbugsExcludeFile.xml. (hitesh via tucu) + HDFS-2822. processMisReplicatedBlock incorrectly identifies + under-construction blocks as under-replicated. (todd) + Release 0.23.0 - 2011-11-01 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index fa47abc653..99a7f339c8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -1759,7 +1759,8 @@ private void invalidateCorruptReplicas(Block blk) { public void processMisReplicatedBlocks() { assert namesystem.hasWriteLock(); - long nrInvalid = 0, nrOverReplicated = 0, nrUnderReplicated = 0; + long nrInvalid = 0, nrOverReplicated = 0, nrUnderReplicated = 0, + nrUnderConstruction = 0; neededReplications.clear(); for (BlockInfo block : blocksMap.getBlocks()) { INodeFile fileINode = block.getINode(); @@ -1769,6 +1770,12 @@ public void processMisReplicatedBlocks() { addToInvalidates(block); continue; } + if (!block.isComplete()) { + // Incomplete blocks are never considered mis-replicated -- + // they'll be reached when they are completed or recovered. + nrUnderConstruction++; + continue; + } // calculate current replication short expectedReplication = fileINode.getReplication(); NumberReplicas num = countNodes(block); @@ -1792,6 +1799,7 @@ public void processMisReplicatedBlocks() { LOG.info("Number of invalid blocks = " + nrInvalid); LOG.info("Number of under-replicated blocks = " + nrUnderReplicated); LOG.info("Number of over-replicated blocks = " + nrOverReplicated); + LOG.info("Number of blocks being written = " + nrUnderConstruction); } /** Set replication for the blocks. */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java index 6ec5f8bf52..939097be71 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java @@ -19,22 +19,30 @@ package org.apache.hadoop.hdfs; import java.io.IOException; +import java.util.List; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil; +import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; +import org.apache.hadoop.io.IOUtils; import static org.junit.Assert.*; import org.junit.Before; import org.junit.After; import org.junit.Test; +import com.google.common.collect.Lists; + /** * Tests to verify safe mode correctness. */ public class TestSafeMode { + private static final int BLOCK_SIZE = 1024; Configuration conf; MiniDFSCluster cluster; FileSystem fs; @@ -43,6 +51,7 @@ public class TestSafeMode { @Before public void startUp() throws IOException { conf = new HdfsConfiguration(); + conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); cluster.waitActive(); fs = cluster.getFileSystem(); @@ -128,6 +137,46 @@ public void testNoExtensionIfNoBlocks() throws IOException { assertEquals("", status); } + /** + * Test that, when under-replicated blocks are processed at the end of + * safe-mode, blocks currently under construction are not considered + * under-construction or missing. Regression test for HDFS-2822. + */ + @Test + public void testRbwBlocksNotConsideredUnderReplicated() throws IOException { + List stms = Lists.newArrayList(); + try { + // Create some junk blocks so that the NN doesn't just immediately + // exit safemode on restart. + DFSTestUtil.createFile(fs, new Path("/junk-blocks"), + BLOCK_SIZE*4, (short)1, 1L); + // Create several files which are left open. It's important to + // create several here, because otherwise the first iteration of the + // replication monitor will pull them off the replication queue and + // hide this bug from the test! + for (int i = 0; i < 10; i++) { + FSDataOutputStream stm = fs.create( + new Path("/append-" + i), true, BLOCK_SIZE, (short) 1, BLOCK_SIZE); + stms.add(stm); + stm.write(1); + stm.hflush(); + } + + cluster.restartNameNode(); + FSNamesystem ns = cluster.getNameNode(0).getNamesystem(); + BlockManagerTestUtil.updateState(ns.getBlockManager()); + assertEquals(0, ns.getPendingReplicationBlocks()); + assertEquals(0, ns.getCorruptReplicaBlocks()); + assertEquals(0, ns.getMissingBlocksCount()); + + } finally { + for (FSDataOutputStream stm : stms) { + IOUtils.closeStream(stm); + } + cluster.shutdown(); + } + } + public interface FSRun { public abstract void run(FileSystem fs) throws IOException; }