From ead7fa0413fd8aacd6d93697cf398ed17c30930f Mon Sep 17 00:00:00 2001 From: Kihwal Lee Date: Mon, 1 Jul 2013 20:20:27 +0000 Subject: [PATCH] HDFS-4888. Refactor and fix FSNamesystem.getTurnOffTip. Contributed by Ravi Prakash. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1498665 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../hdfs/server/namenode/FSNamesystem.java | 63 +++++++++--------- .../org/apache/hadoop/hdfs/TestSafeMode.java | 12 ++-- .../server/namenode/ha/TestHASafeMode.java | 64 ++++++++++--------- 4 files changed, 71 insertions(+), 71 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 4a9aeaab62..36fabb1fcd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -637,6 +637,9 @@ Release 2.1.0-beta - 2013-07-02 HDFS-4944. WebHDFS cannot create a file path containing characters that must be URI-encoded, such as space. (cnauroth) + HDFS-4888. Refactor and fix FSNamesystem.getTurnOffTip. (Ravi Prakash via + kihwal) + BREAKDOWN OF HDFS-347 SUBTASKS AND RELATED JIRAS HDFS-4353. Encapsulate connections to peers in Peer and PeerServer classes. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index cde552442a..1efa990c82 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -4031,9 +4031,9 @@ class SafeModeInfo { // internal fields /** Time when threshold was reached. - * - *
-1 safe mode is off - *
0 safe mode is on, but threshold is not reached yet + *
-1 safe mode is off + *
0 safe mode is on, and threshold is not reached yet + *
>0 safe mode is on, but we are in extension period */ private long reached = -1; /** Total number of blocks. */ @@ -4157,7 +4157,8 @@ private synchronized void leave() { NameNode.stateChangeLog.info("STATE* Leaving safe mode after " + timeInSafemode/1000 + " secs"); NameNode.getNameNodeMetrics().setSafeModeTime((int) timeInSafemode); - + + //Log the following only once (when transitioning from ON -> OFF) if (reached >= 0) { NameNode.stateChangeLog.info("STATE* Safe mode is OFF"); } @@ -4338,62 +4339,56 @@ private void setResourcesLow() { * A tip on how safe mode is to be turned off: manually or automatically. */ String getTurnOffTip() { - if(reached < 0) + if(!isOn()) return "Safe mode is OFF."; - String leaveMsg = ""; + + //Manual OR low-resource safemode. (Admin intervention required) + String leaveMsg = "It was turned on manually. "; if (areResourcesLow()) { - leaveMsg = "Resources are low on NN. " - + "Please add or free up more resources then turn off safe mode manually. " - + "NOTE: If you turn off safe mode before adding resources, " - + "the NN will immediately return to safe mode."; - } else { - leaveMsg = "Safe mode will be turned off automatically"; + leaveMsg = "Resources are low on NN. Please add or free up more " + + "resources then turn off safe mode manually. NOTE: If you turn off" + + " safe mode before adding resources, " + + "the NN will immediately return to safe mode. "; } - if(isManual() && !areResourcesLow()) { - leaveMsg = "Use \"hdfs dfsadmin -safemode leave\" to turn safe mode off"; + if (isManual() || areResourcesLow()) { + return leaveMsg + + "Use \"hdfs dfsadmin -safemode leave\" to turn safe mode off."; } - if(blockTotal < 0) - return leaveMsg + "."; - + //Automatic safemode. System will come out of safemode automatically. + leaveMsg = "Safe mode will be turned off automatically"; int numLive = getNumLiveDataNodes(); String msg = ""; if (reached == 0) { if (blockSafe < blockThreshold) { msg += String.format( "The reported blocks %d needs additional %d" - + " blocks to reach the threshold %.4f of total blocks %d.", + + " blocks to reach the threshold %.4f of total blocks %d.\n", blockSafe, (blockThreshold - blockSafe) + 1, threshold, blockTotal); } if (numLive < datanodeThreshold) { - if (!"".equals(msg)) { - msg += "\n"; - } msg += String.format( "The number of live datanodes %d needs an additional %d live " - + "datanodes to reach the minimum number %d.", + + "datanodes to reach the minimum number %d.\n", numLive, (datanodeThreshold - numLive), datanodeThreshold); } - msg += " " + leaveMsg; } else { msg = String.format("The reported blocks %d has reached the threshold" - + " %.4f of total blocks %d.", blockSafe, threshold, - blockTotal); + + " %.4f of total blocks %d. ", blockSafe, threshold, blockTotal); - if (datanodeThreshold > 0) { - msg += String.format(" The number of live datanodes %d has reached " - + "the minimum number %d.", + msg += String.format("The number of live datanodes %d has reached " + + "the minimum number %d. ", numLive, datanodeThreshold); - } - msg += " " + leaveMsg; } + msg += leaveMsg; // threshold is not reached or manual or resources low if(reached == 0 || (isManual() && !areResourcesLow())) { - return msg + "."; + return msg; } // extension period is in progress - return msg + " in " + Math.abs(reached + extension - now()) / 1000 - + " seconds."; + return msg + (reached + extension - now() > 0 ? + " in " + (reached + extension - now()) / 1000 + " seconds." + : " soon."); } /** @@ -5648,7 +5643,7 @@ public long getTotal() { public String getSafemode() { if (!this.isInSafeMode()) return ""; - return "Safe mode is ON." + this.getSafeModeTip(); + return "Safe mode is ON. " + this.getSafeModeTip(); } @Override // NameNodeMXBean diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java index 794b44d438..7aaff5a04e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java @@ -178,9 +178,9 @@ public void testInitializeReplQueuesEarly() throws Exception { final NameNode nn = cluster.getNameNode(); String status = nn.getNamesystem().getSafemode(); - assertEquals("Safe mode is ON.The reported blocks 0 needs additional " + - "15 blocks to reach the threshold 0.9990 of total blocks 15. " + - "Safe mode will be turned off automatically.", status); + assertEquals("Safe mode is ON. The reported blocks 0 needs additional " + + "15 blocks to reach the threshold 0.9990 of total blocks 15.\n" + + "Safe mode will be turned off automatically", status); assertFalse("Mis-replicated block queues should not be initialized " + "until threshold is crossed", NameNodeAdapter.safeModeInitializedReplQueues(nn)); @@ -353,10 +353,10 @@ public void testDatanodeThreshold() throws IOException { fs = cluster.getFileSystem(); String tipMsg = cluster.getNamesystem().getSafemode(); - assertTrue("Safemode tip message looks right: " + tipMsg, + assertTrue("Safemode tip message doesn't look right: " + tipMsg, tipMsg.contains("The number of live datanodes 0 needs an additional " + - "1 live datanodes to reach the minimum number 1. " + - "Safe mode will be turned off automatically.")); + "1 live datanodes to reach the minimum number 1.\n" + + "Safe mode will be turned off automatically")); // Start a datanode cluster.startDataNodes(conf, 1, true, null, null); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java index 0f599906c0..309b4d0f74 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java @@ -206,11 +206,11 @@ public void testBlocksAddedBeforeStandbyRestart() throws Exception { // We expect it not to be stuck in safemode, since those blocks // that are already visible to the SBN should be processed // in the initial block reports. - assertSafeMode(nn1, 3, 3); + assertSafeMode(nn1, 3, 3, 3, 0); banner("Waiting for standby to catch up to active namespace"); HATestUtil.waitForStandbyToCatchUp(nn0, nn1); - assertSafeMode(nn1, 8, 8); + assertSafeMode(nn1, 8, 8, 3, 0); } /** @@ -230,7 +230,7 @@ public void testBlocksAddedWhileInSafeMode() throws Exception { banner("Restarting standby"); restartStandby(); - assertSafeMode(nn1, 3, 3); + assertSafeMode(nn1, 3, 3, 3, 0); // Create a few blocks which will send blockReceived calls to the // SBN. @@ -241,7 +241,7 @@ public void testBlocksAddedWhileInSafeMode() throws Exception { banner("Waiting for standby to catch up to active namespace"); HATestUtil.waitForStandbyToCatchUp(nn0, nn1); - assertSafeMode(nn1, 8, 8); + assertSafeMode(nn1, 8, 8, 3, 0); } /** @@ -281,11 +281,11 @@ public void testBlocksRemovedBeforeStandbyRestart() throws Exception { banner("Restarting standby"); restartStandby(); - assertSafeMode(nn1, 0, 5); + assertSafeMode(nn1, 0, 5, 3, 0); banner("Waiting for standby to catch up to active namespace"); HATestUtil.waitForStandbyToCatchUp(nn0, nn1); - assertSafeMode(nn1, 0, 0); + assertSafeMode(nn1, 0, 0, 3, 0); } /** @@ -307,7 +307,7 @@ public void testBlocksRemovedWhileInSafeMode() throws Exception { restartStandby(); // It will initially have all of the blocks necessary. - assertSafeMode(nn1, 10, 10); + assertSafeMode(nn1, 10, 10, 3, 0); // Delete those blocks while the SBN is in safe mode. // This doesn't affect the SBN, since deletions are not @@ -322,14 +322,14 @@ public void testBlocksRemovedWhileInSafeMode() throws Exception { HATestUtil.waitForDNDeletions(cluster); cluster.triggerDeletionReports(); - assertSafeMode(nn1, 10, 10); + assertSafeMode(nn1, 10, 10, 3, 0); // When we catch up to active namespace, it will restore back // to 0 blocks. banner("Waiting for standby to catch up to active namespace"); HATestUtil.waitForStandbyToCatchUp(nn0, nn1); - assertSafeMode(nn1, 0, 0); + assertSafeMode(nn1, 0, 0, 3, 0); } /** @@ -355,20 +355,20 @@ public void testAppendWhileInSafeMode() throws Exception { restartStandby(); // It will initially have all of the blocks necessary. - assertSafeMode(nn1, 5, 5); + assertSafeMode(nn1, 5, 5, 3, 0); // Append to a block while SBN is in safe mode. This should // not affect safemode initially, since the DN message // will get queued. FSDataOutputStream stm = fs.append(new Path("/test")); try { - assertSafeMode(nn1, 5, 5); + assertSafeMode(nn1, 5, 5, 3, 0); // if we roll edits now, the SBN should see that it's under construction // and change its total count and safe count down by one, since UC // blocks are not counted by safe mode. HATestUtil.waitForStandbyToCatchUp(nn0, nn1); - assertSafeMode(nn1, 4, 4); + assertSafeMode(nn1, 4, 4, 3, 0); } finally { IOUtils.closeStream(stm); } @@ -386,13 +386,13 @@ public void testAppendWhileInSafeMode() throws Exception { HATestUtil.waitForDNDeletions(cluster); cluster.triggerDeletionReports(); - assertSafeMode(nn1, 4, 4); + assertSafeMode(nn1, 4, 4, 3, 0); // When we roll the edit log, the deletions will go through. banner("Waiting for standby to catch up to active namespace"); HATestUtil.waitForStandbyToCatchUp(nn0, nn1); - assertSafeMode(nn1, 0, 0); + assertSafeMode(nn1, 0, 0, 3, 0); } /** @@ -424,20 +424,21 @@ public void testBlocksDeletedInEditLog() throws Exception { restartActive(); } - private static void assertSafeMode(NameNode nn, int safe, int total) { + private static void assertSafeMode(NameNode nn, int safe, int total, + int numNodes, int nodeThresh) { String status = nn.getNamesystem().getSafemode(); if (safe == total) { assertTrue("Bad safemode status: '" + status + "'", status.startsWith( - "Safe mode is ON." + - "The reported blocks " + safe + " has reached the threshold " + - "0.9990 of total blocks " + total + ". Safe mode will be " + - "turned off automatically")); + "Safe mode is ON. The reported blocks " + safe + " has reached the " + + "threshold 0.9990 of total blocks " + total + ". The number of " + + "live datanodes " + numNodes + " has reached the minimum number " + + nodeThresh + ". Safe mode will be turned off automatically")); } else { int additional = total - safe; assertTrue("Bad safemode status: '" + status + "'", status.startsWith( - "Safe mode is ON." + + "Safe mode is ON. " + "The reported blocks " + safe + " needs additional " + additional + " blocks")); } @@ -467,14 +468,14 @@ public void testComplexFailoverIntoSafemode() throws Exception { // We expect it to be on its way out of safemode, since all of the blocks // from the edit log have been reported. - assertSafeMode(nn1, 3, 3); + assertSafeMode(nn1, 3, 3, 3, 0); // Initiate a failover into it while it's in safemode banner("Initiating a failover into NN1 in safemode"); NameNodeAdapter.abortEditLogs(nn0); cluster.transitionToActive(1); - assertSafeMode(nn1, 5, 5); + assertSafeMode(nn1, 5, 5, 3, 0); } /** @@ -499,10 +500,11 @@ public void testBlocksRemovedWhileInSafeModeEditsArriveFirst() throws Exception // It will initially have all of the blocks necessary. String status = nn1.getNamesystem().getSafemode(); assertTrue("Bad safemode status: '" + status + "'", - status.startsWith( - "Safe mode is ON." + - "The reported blocks 10 has reached the threshold 0.9990 of " + - "total blocks 10. Safe mode will be turned off automatically")); + status.startsWith( + "Safe mode is ON. The reported blocks 10 has reached the threshold " + + "0.9990 of total blocks 10. The number of live datanodes 3 has " + + "reached the minimum number 0. Safe mode will be turned off " + + "automatically")); // Delete those blocks while the SBN is in safe mode. // Immediately roll the edit log before the actual deletions are sent @@ -512,7 +514,7 @@ public void testBlocksRemovedWhileInSafeModeEditsArriveFirst() throws Exception HATestUtil.waitForStandbyToCatchUp(nn0, nn1); // Should see removal of the blocks as well as their contribution to safe block count. - assertSafeMode(nn1, 0, 0); + assertSafeMode(nn1, 0, 0, 3, 0); banner("Triggering sending deletions to DNs and Deletion Reports"); @@ -525,7 +527,7 @@ public void testBlocksRemovedWhileInSafeModeEditsArriveFirst() throws Exception // No change in assertion status here, but some of the consistency checks // in safemode will fire here if we accidentally decrement safe block count // below 0. - assertSafeMode(nn1, 0, 0); + assertSafeMode(nn1, 0, 0, 3, 0); } @@ -561,11 +563,11 @@ public void testSafeBlockTracking() throws Exception { banner("Restarting SBN"); restartStandby(); - assertSafeMode(nn1, 10, 10); + assertSafeMode(nn1, 10, 10, 3, 0); banner("Allowing SBN to catch up"); HATestUtil.waitForStandbyToCatchUp(nn0, nn1); - assertSafeMode(nn1, 15, 15); + assertSafeMode(nn1, 15, 15, 3, 0); } /** @@ -593,7 +595,7 @@ public void testBlocksAddedWhileStandbyIsDown() throws Exception { nn0.getRpcServer().rollEditLog(); restartStandby(); - assertSafeMode(nn1, 6, 6); + assertSafeMode(nn1, 6, 6, 3, 0); } /**