HDFS-4888. Refactor and fix FSNamesystem.getTurnOffTip. Contributed by Ravi Prakash.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1498665 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Kihwal Lee 2013-07-01 20:20:27 +00:00
parent aa7e148d1f
commit ead7fa0413
4 changed files with 71 additions and 71 deletions

View File

@ -637,6 +637,9 @@ Release 2.1.0-beta - 2013-07-02
HDFS-4944. WebHDFS cannot create a file path containing characters that must
be URI-encoded, such as space. (cnauroth)
HDFS-4888. Refactor and fix FSNamesystem.getTurnOffTip. (Ravi Prakash via
kihwal)
BREAKDOWN OF HDFS-347 SUBTASKS AND RELATED JIRAS
HDFS-4353. Encapsulate connections to peers in Peer and PeerServer classes.

View File

@ -4031,9 +4031,9 @@ class SafeModeInfo {
// internal fields
/** Time when threshold was reached.
*
* <br>-1 safe mode is off
* <br> 0 safe mode is on, but threshold is not reached yet
* <br> -1 safe mode is off
* <br> 0 safe mode is on, and threshold is not reached yet
* <br> >0 safe mode is on, but we are in extension period
*/
private long reached = -1;
/** Total number of blocks. */
@ -4157,7 +4157,8 @@ private synchronized void leave() {
NameNode.stateChangeLog.info("STATE* Leaving safe mode after "
+ timeInSafemode/1000 + " secs");
NameNode.getNameNodeMetrics().setSafeModeTime((int) timeInSafemode);
//Log the following only once (when transitioning from ON -> OFF)
if (reached >= 0) {
NameNode.stateChangeLog.info("STATE* Safe mode is OFF");
}
@ -4338,62 +4339,56 @@ private void setResourcesLow() {
* A tip on how safe mode is to be turned off: manually or automatically.
*/
String getTurnOffTip() {
if(reached < 0)
if(!isOn())
return "Safe mode is OFF.";
String leaveMsg = "";
//Manual OR low-resource safemode. (Admin intervention required)
String leaveMsg = "It was turned on manually. ";
if (areResourcesLow()) {
leaveMsg = "Resources are low on NN. "
+ "Please add or free up more resources then turn off safe mode manually. "
+ "NOTE: If you turn off safe mode before adding resources, "
+ "the NN will immediately return to safe mode.";
} else {
leaveMsg = "Safe mode will be turned off automatically";
leaveMsg = "Resources are low on NN. Please add or free up more "
+ "resources then turn off safe mode manually. NOTE: If you turn off"
+ " safe mode before adding resources, "
+ "the NN will immediately return to safe mode. ";
}
if(isManual() && !areResourcesLow()) {
leaveMsg = "Use \"hdfs dfsadmin -safemode leave\" to turn safe mode off";
if (isManual() || areResourcesLow()) {
return leaveMsg
+ "Use \"hdfs dfsadmin -safemode leave\" to turn safe mode off.";
}
if(blockTotal < 0)
return leaveMsg + ".";
//Automatic safemode. System will come out of safemode automatically.
leaveMsg = "Safe mode will be turned off automatically";
int numLive = getNumLiveDataNodes();
String msg = "";
if (reached == 0) {
if (blockSafe < blockThreshold) {
msg += String.format(
"The reported blocks %d needs additional %d"
+ " blocks to reach the threshold %.4f of total blocks %d.",
+ " blocks to reach the threshold %.4f of total blocks %d.\n",
blockSafe, (blockThreshold - blockSafe) + 1, threshold, blockTotal);
}
if (numLive < datanodeThreshold) {
if (!"".equals(msg)) {
msg += "\n";
}
msg += String.format(
"The number of live datanodes %d needs an additional %d live "
+ "datanodes to reach the minimum number %d.",
+ "datanodes to reach the minimum number %d.\n",
numLive, (datanodeThreshold - numLive), datanodeThreshold);
}
msg += " " + leaveMsg;
} else {
msg = String.format("The reported blocks %d has reached the threshold"
+ " %.4f of total blocks %d.", blockSafe, threshold,
blockTotal);
+ " %.4f of total blocks %d. ", blockSafe, threshold, blockTotal);
if (datanodeThreshold > 0) {
msg += String.format(" The number of live datanodes %d has reached "
+ "the minimum number %d.",
msg += String.format("The number of live datanodes %d has reached "
+ "the minimum number %d. ",
numLive, datanodeThreshold);
}
msg += " " + leaveMsg;
}
msg += leaveMsg;
// threshold is not reached or manual or resources low
if(reached == 0 || (isManual() && !areResourcesLow())) {
return msg + ".";
return msg;
}
// extension period is in progress
return msg + " in " + Math.abs(reached + extension - now()) / 1000
+ " seconds.";
return msg + (reached + extension - now() > 0 ?
" in " + (reached + extension - now()) / 1000 + " seconds."
: " soon.");
}
/**
@ -5648,7 +5643,7 @@ public long getTotal() {
public String getSafemode() {
if (!this.isInSafeMode())
return "";
return "Safe mode is ON." + this.getSafeModeTip();
return "Safe mode is ON. " + this.getSafeModeTip();
}
@Override // NameNodeMXBean

View File

@ -178,9 +178,9 @@ public void testInitializeReplQueuesEarly() throws Exception {
final NameNode nn = cluster.getNameNode();
String status = nn.getNamesystem().getSafemode();
assertEquals("Safe mode is ON.The reported blocks 0 needs additional " +
"15 blocks to reach the threshold 0.9990 of total blocks 15. " +
"Safe mode will be turned off automatically.", status);
assertEquals("Safe mode is ON. The reported blocks 0 needs additional " +
"15 blocks to reach the threshold 0.9990 of total blocks 15.\n" +
"Safe mode will be turned off automatically", status);
assertFalse("Mis-replicated block queues should not be initialized " +
"until threshold is crossed",
NameNodeAdapter.safeModeInitializedReplQueues(nn));
@ -353,10 +353,10 @@ public void testDatanodeThreshold() throws IOException {
fs = cluster.getFileSystem();
String tipMsg = cluster.getNamesystem().getSafemode();
assertTrue("Safemode tip message looks right: " + tipMsg,
assertTrue("Safemode tip message doesn't look right: " + tipMsg,
tipMsg.contains("The number of live datanodes 0 needs an additional " +
"1 live datanodes to reach the minimum number 1. " +
"Safe mode will be turned off automatically."));
"1 live datanodes to reach the minimum number 1.\n" +
"Safe mode will be turned off automatically"));
// Start a datanode
cluster.startDataNodes(conf, 1, true, null, null);

View File

@ -206,11 +206,11 @@ public void testBlocksAddedBeforeStandbyRestart() throws Exception {
// We expect it not to be stuck in safemode, since those blocks
// that are already visible to the SBN should be processed
// in the initial block reports.
assertSafeMode(nn1, 3, 3);
assertSafeMode(nn1, 3, 3, 3, 0);
banner("Waiting for standby to catch up to active namespace");
HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
assertSafeMode(nn1, 8, 8);
assertSafeMode(nn1, 8, 8, 3, 0);
}
/**
@ -230,7 +230,7 @@ public void testBlocksAddedWhileInSafeMode() throws Exception {
banner("Restarting standby");
restartStandby();
assertSafeMode(nn1, 3, 3);
assertSafeMode(nn1, 3, 3, 3, 0);
// Create a few blocks which will send blockReceived calls to the
// SBN.
@ -241,7 +241,7 @@ public void testBlocksAddedWhileInSafeMode() throws Exception {
banner("Waiting for standby to catch up to active namespace");
HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
assertSafeMode(nn1, 8, 8);
assertSafeMode(nn1, 8, 8, 3, 0);
}
/**
@ -281,11 +281,11 @@ public void testBlocksRemovedBeforeStandbyRestart() throws Exception {
banner("Restarting standby");
restartStandby();
assertSafeMode(nn1, 0, 5);
assertSafeMode(nn1, 0, 5, 3, 0);
banner("Waiting for standby to catch up to active namespace");
HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
assertSafeMode(nn1, 0, 0);
assertSafeMode(nn1, 0, 0, 3, 0);
}
/**
@ -307,7 +307,7 @@ public void testBlocksRemovedWhileInSafeMode() throws Exception {
restartStandby();
// It will initially have all of the blocks necessary.
assertSafeMode(nn1, 10, 10);
assertSafeMode(nn1, 10, 10, 3, 0);
// Delete those blocks while the SBN is in safe mode.
// This doesn't affect the SBN, since deletions are not
@ -322,14 +322,14 @@ public void testBlocksRemovedWhileInSafeMode() throws Exception {
HATestUtil.waitForDNDeletions(cluster);
cluster.triggerDeletionReports();
assertSafeMode(nn1, 10, 10);
assertSafeMode(nn1, 10, 10, 3, 0);
// When we catch up to active namespace, it will restore back
// to 0 blocks.
banner("Waiting for standby to catch up to active namespace");
HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
assertSafeMode(nn1, 0, 0);
assertSafeMode(nn1, 0, 0, 3, 0);
}
/**
@ -355,20 +355,20 @@ public void testAppendWhileInSafeMode() throws Exception {
restartStandby();
// It will initially have all of the blocks necessary.
assertSafeMode(nn1, 5, 5);
assertSafeMode(nn1, 5, 5, 3, 0);
// Append to a block while SBN is in safe mode. This should
// not affect safemode initially, since the DN message
// will get queued.
FSDataOutputStream stm = fs.append(new Path("/test"));
try {
assertSafeMode(nn1, 5, 5);
assertSafeMode(nn1, 5, 5, 3, 0);
// if we roll edits now, the SBN should see that it's under construction
// and change its total count and safe count down by one, since UC
// blocks are not counted by safe mode.
HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
assertSafeMode(nn1, 4, 4);
assertSafeMode(nn1, 4, 4, 3, 0);
} finally {
IOUtils.closeStream(stm);
}
@ -386,13 +386,13 @@ public void testAppendWhileInSafeMode() throws Exception {
HATestUtil.waitForDNDeletions(cluster);
cluster.triggerDeletionReports();
assertSafeMode(nn1, 4, 4);
assertSafeMode(nn1, 4, 4, 3, 0);
// When we roll the edit log, the deletions will go through.
banner("Waiting for standby to catch up to active namespace");
HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
assertSafeMode(nn1, 0, 0);
assertSafeMode(nn1, 0, 0, 3, 0);
}
/**
@ -424,20 +424,21 @@ public void testBlocksDeletedInEditLog() throws Exception {
restartActive();
}
private static void assertSafeMode(NameNode nn, int safe, int total) {
private static void assertSafeMode(NameNode nn, int safe, int total,
int numNodes, int nodeThresh) {
String status = nn.getNamesystem().getSafemode();
if (safe == total) {
assertTrue("Bad safemode status: '" + status + "'",
status.startsWith(
"Safe mode is ON." +
"The reported blocks " + safe + " has reached the threshold " +
"0.9990 of total blocks " + total + ". Safe mode will be " +
"turned off automatically"));
"Safe mode is ON. The reported blocks " + safe + " has reached the "
+ "threshold 0.9990 of total blocks " + total + ". The number of "
+ "live datanodes " + numNodes + " has reached the minimum number "
+ nodeThresh + ". Safe mode will be turned off automatically"));
} else {
int additional = total - safe;
assertTrue("Bad safemode status: '" + status + "'",
status.startsWith(
"Safe mode is ON." +
"Safe mode is ON. " +
"The reported blocks " + safe + " needs additional " +
additional + " blocks"));
}
@ -467,14 +468,14 @@ public void testComplexFailoverIntoSafemode() throws Exception {
// We expect it to be on its way out of safemode, since all of the blocks
// from the edit log have been reported.
assertSafeMode(nn1, 3, 3);
assertSafeMode(nn1, 3, 3, 3, 0);
// Initiate a failover into it while it's in safemode
banner("Initiating a failover into NN1 in safemode");
NameNodeAdapter.abortEditLogs(nn0);
cluster.transitionToActive(1);
assertSafeMode(nn1, 5, 5);
assertSafeMode(nn1, 5, 5, 3, 0);
}
/**
@ -499,10 +500,11 @@ public void testBlocksRemovedWhileInSafeModeEditsArriveFirst() throws Exception
// It will initially have all of the blocks necessary.
String status = nn1.getNamesystem().getSafemode();
assertTrue("Bad safemode status: '" + status + "'",
status.startsWith(
"Safe mode is ON." +
"The reported blocks 10 has reached the threshold 0.9990 of " +
"total blocks 10. Safe mode will be turned off automatically"));
status.startsWith(
"Safe mode is ON. The reported blocks 10 has reached the threshold "
+ "0.9990 of total blocks 10. The number of live datanodes 3 has "
+ "reached the minimum number 0. Safe mode will be turned off "
+ "automatically"));
// Delete those blocks while the SBN is in safe mode.
// Immediately roll the edit log before the actual deletions are sent
@ -512,7 +514,7 @@ public void testBlocksRemovedWhileInSafeModeEditsArriveFirst() throws Exception
HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
// Should see removal of the blocks as well as their contribution to safe block count.
assertSafeMode(nn1, 0, 0);
assertSafeMode(nn1, 0, 0, 3, 0);
banner("Triggering sending deletions to DNs and Deletion Reports");
@ -525,7 +527,7 @@ public void testBlocksRemovedWhileInSafeModeEditsArriveFirst() throws Exception
// No change in assertion status here, but some of the consistency checks
// in safemode will fire here if we accidentally decrement safe block count
// below 0.
assertSafeMode(nn1, 0, 0);
assertSafeMode(nn1, 0, 0, 3, 0);
}
@ -561,11 +563,11 @@ public void testSafeBlockTracking() throws Exception {
banner("Restarting SBN");
restartStandby();
assertSafeMode(nn1, 10, 10);
assertSafeMode(nn1, 10, 10, 3, 0);
banner("Allowing SBN to catch up");
HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
assertSafeMode(nn1, 15, 15);
assertSafeMode(nn1, 15, 15, 3, 0);
}
/**
@ -593,7 +595,7 @@ public void testBlocksAddedWhileStandbyIsDown() throws Exception {
nn0.getRpcServer().rollEditLog();
restartStandby();
assertSafeMode(nn1, 6, 6);
assertSafeMode(nn1, 6, 6, 3, 0);
}
/**