From 4aa664f677f309216f1efc768a58ee3dbbaf66be Mon Sep 17 00:00:00 2001 From: Kihwal Lee Date: Tue, 18 Feb 2014 14:53:05 +0000 Subject: [PATCH] HDFS-5780. TestRBWBlockInvalidation times out intemittently. Contributed by Mit Desai. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1569368 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++ .../TestRBWBlockInvalidation.java | 31 ++++++++++--------- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 2a819d3099..73a9643d09 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -430,6 +430,9 @@ Release 2.4.0 - UNRELEASED HDFS-5942. Fix javadoc in OfflineImageViewer. (Akira Ajisaka via cnauroth) + HDFS-5780. TestRBWBlockInvalidation times out intemittently. (Mit Desai + via kihwal) + BREAKDOWN OF HDFS-5698 SUBTASKS AND RELATED JIRAS HDFS-5717. Save FSImage header in protobuf. (Haohui Mai via jing9) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestRBWBlockInvalidation.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestRBWBlockInvalidation.java index 2e5d70b096..e909dc9d80 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestRBWBlockInvalidation.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestRBWBlockInvalidation.java @@ -66,7 +66,7 @@ private static NumberReplicas countReplicas(final FSNamesystem namesystem, * datanode, namenode should ask to invalidate that corrupted block and * schedule replication for one more replica for that under replicated block. */ - @Test(timeout=60000) + @Test(timeout=600000) public void testBlockInvalidationWhenRBWReplicaMissedInDN() throws IOException, InterruptedException { // This test cannot pass on Windows due to file locking enforcement. It will @@ -75,7 +75,7 @@ public void testBlockInvalidationWhenRBWReplicaMissedInDN() Configuration conf = new HdfsConfiguration(); conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 2); - conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 100); + conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 300); conf.setLong(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY, 1); conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2) @@ -104,23 +104,24 @@ public void testBlockInvalidationWhenRBWReplicaMissedInDN() metaFile.delete()); out.close(); - - // Check datanode has reported the corrupt block. - int corruptReplicas = 0; + + int liveReplicas = 0; while (true) { - if ((corruptReplicas = countReplicas(namesystem, blk).corruptReplicas()) > 0) { + if ((liveReplicas = countReplicas(namesystem, blk).liveReplicas()) < 2) { + // This confirms we have a corrupt replica + LOG.info("Live Replicas after corruption: " + liveReplicas); break; } Thread.sleep(100); } - assertEquals("There should be 1 replica in the corruptReplicasMap", 1, - corruptReplicas); - - // Check the block has got replicated to another datanode. - blk = DFSTestUtil.getFirstBlock(fs, testPath); - int liveReplicas = 0; + assertEquals("There should be less than 2 replicas in the " + + "liveReplicasMap", 1, liveReplicas); + while (true) { - if ((liveReplicas = countReplicas(namesystem, blk).liveReplicas()) > 1) { + if ((liveReplicas = + countReplicas(namesystem, blk).liveReplicas()) > 1) { + //Wait till the live replica count becomes equal to Replication Factor + LOG.info("Live Replicas after Rereplication: " + liveReplicas); break; } Thread.sleep(100); @@ -128,9 +129,9 @@ public void testBlockInvalidationWhenRBWReplicaMissedInDN() assertEquals("There should be two live replicas", 2, liveReplicas); - // sleep for 1 second, so that by this time datanode reports the corrupt + // sleep for 2 seconds, so that by this time datanode reports the corrupt // block after a live replica of block got replicated. - Thread.sleep(1000); + Thread.sleep(2000); // Check that there is no corrupt block in the corruptReplicasMap. assertEquals("There should not be any replica in the corruptReplicasMap",