HDFS-2747. Entering safe mode after starting SBN can NPE. Contributed by Uma Maheswara Rao G.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1232176 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1c24ae0cd8
commit
2f26475a39
@ -105,3 +105,5 @@ HDFS-2766. Test for case where standby partially reads log and then performs che
|
|||||||
HDFS-2738. FSEditLog.selectinputStreams is reading through in-progress streams even when non-in-progress are requested. (atm)
|
HDFS-2738. FSEditLog.selectinputStreams is reading through in-progress streams even when non-in-progress are requested. (atm)
|
||||||
|
|
||||||
HDFS-2789. TestHAAdmin.testFailover is failing (eli)
|
HDFS-2789. TestHAAdmin.testFailover is failing (eli)
|
||||||
|
|
||||||
|
HDFS-2747. Entering safe mode after starting SBN can NPE. (Uma Maheswara Rao G via todd)
|
||||||
|
@ -3774,21 +3774,28 @@ private long getCompleteBlocksTotal() {
|
|||||||
void enterSafeMode(boolean resourcesLow) throws IOException {
|
void enterSafeMode(boolean resourcesLow) throws IOException {
|
||||||
writeLock();
|
writeLock();
|
||||||
try {
|
try {
|
||||||
// Ensure that any concurrent operations have been fully synced
|
// Ensure that any concurrent operations have been fully synced
|
||||||
// before entering safe mode. This ensures that the FSImage
|
// before entering safe mode. This ensures that the FSImage
|
||||||
// is entirely stable on disk as soon as we're in safe mode.
|
// is entirely stable on disk as soon as we're in safe mode.
|
||||||
getEditLog().logSyncAll();
|
boolean isEditlogOpenForWrite = getEditLog().isOpenForWrite();
|
||||||
if (!isInSafeMode()) {
|
// Before Editlog is in OpenForWrite mode, editLogStream will be null. So,
|
||||||
safeMode = new SafeModeInfo(resourcesLow);
|
// logSyncAll call can be called only when Edlitlog is in OpenForWrite mode
|
||||||
return;
|
if (isEditlogOpenForWrite) {
|
||||||
}
|
getEditLog().logSyncAll();
|
||||||
if (resourcesLow) {
|
}
|
||||||
safeMode.setResourcesLow();
|
if (!isInSafeMode()) {
|
||||||
}
|
safeMode = new SafeModeInfo(resourcesLow);
|
||||||
safeMode.setManual();
|
return;
|
||||||
getEditLog().logSyncAll();
|
}
|
||||||
NameNode.stateChangeLog.info("STATE* Safe mode is ON. "
|
if (resourcesLow) {
|
||||||
+ safeMode.getTurnOffTip());
|
safeMode.setResourcesLow();
|
||||||
|
}
|
||||||
|
safeMode.setManual();
|
||||||
|
if (isEditlogOpenForWrite) {
|
||||||
|
getEditLog().logSyncAll();
|
||||||
|
}
|
||||||
|
NameNode.stateChangeLog.info("STATE* Safe mode is ON. "
|
||||||
|
+ safeMode.getTurnOffTip());
|
||||||
} finally {
|
} finally {
|
||||||
writeUnlock();
|
writeUnlock();
|
||||||
}
|
}
|
||||||
|
@ -35,6 +35,7 @@
|
|||||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||||
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
|
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
|
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
@ -95,6 +96,68 @@ private void restartStandby() throws IOException {
|
|||||||
nn1.getNamesystem().getEditLogTailer().interrupt();
|
nn1.getNamesystem().getEditLogTailer().interrupt();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test case for enter safemode in active namenode, when it is already in startup safemode.
|
||||||
|
* It is a regression test for HDFS-2747.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testEnterSafeModeInANNShouldNotThrowNPE() throws Exception {
|
||||||
|
banner("Restarting active");
|
||||||
|
restartActive();
|
||||||
|
FSNamesystem namesystem = nn0.getNamesystem();
|
||||||
|
String status = namesystem.getSafemode();
|
||||||
|
assertTrue("Bad safemode status: '" + status + "'", status
|
||||||
|
.startsWith("Safe mode is ON."));
|
||||||
|
NameNodeAdapter.enterSafeMode(nn0, false);
|
||||||
|
assertTrue("Failed to enter into safemode in active", namesystem
|
||||||
|
.isInSafeMode());
|
||||||
|
NameNodeAdapter.enterSafeMode(nn0, false);
|
||||||
|
assertTrue("Failed to enter into safemode in active", namesystem
|
||||||
|
.isInSafeMode());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test case for enter safemode in standby namenode, when it is already in startup safemode.
|
||||||
|
* It is a regression test for HDFS-2747.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testEnterSafeModeInSBNShouldNotThrowNPE() throws Exception {
|
||||||
|
banner("Starting with NN0 active and NN1 standby, creating some blocks");
|
||||||
|
DFSTestUtil
|
||||||
|
.createFile(fs, new Path("/test"), 3 * BLOCK_SIZE, (short) 3, 1L);
|
||||||
|
// Roll edit log so that, when the SBN restarts, it will load
|
||||||
|
// the namespace during startup and enter safemode.
|
||||||
|
nn0.getRpcServer().rollEditLog();
|
||||||
|
banner("Creating some blocks that won't be in the edit log");
|
||||||
|
DFSTestUtil.createFile(fs, new Path("/test2"), 5 * BLOCK_SIZE, (short) 3,
|
||||||
|
1L);
|
||||||
|
banner("Deleting the original blocks");
|
||||||
|
fs.delete(new Path("/test"), true);
|
||||||
|
banner("Restarting standby");
|
||||||
|
restartStandby();
|
||||||
|
FSNamesystem namesystem = nn1.getNamesystem();
|
||||||
|
String status = namesystem.getSafemode();
|
||||||
|
assertTrue("Bad safemode status: '" + status + "'", status
|
||||||
|
.startsWith("Safe mode is ON."));
|
||||||
|
NameNodeAdapter.enterSafeMode(nn1, false);
|
||||||
|
assertTrue("Failed to enter into safemode in standby", namesystem
|
||||||
|
.isInSafeMode());
|
||||||
|
NameNodeAdapter.enterSafeMode(nn1, false);
|
||||||
|
assertTrue("Failed to enter into safemode in standby", namesystem
|
||||||
|
.isInSafeMode());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void restartActive() throws IOException {
|
||||||
|
cluster.shutdownNameNode(0);
|
||||||
|
// Set the safemode extension to be lengthy, so that the tests
|
||||||
|
// can check the safemode message after the safemode conditions
|
||||||
|
// have been achieved, without being racy.
|
||||||
|
cluster.getConfiguration(0).setInt(
|
||||||
|
DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, 30000);
|
||||||
|
cluster.restartNameNode(0);
|
||||||
|
nn0 = cluster.getNameNode(0);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests the case where, while a standby is down, more blocks are
|
* Tests the case where, while a standby is down, more blocks are
|
||||||
* added to the namespace, but not rolled. So, when it starts up,
|
* added to the namespace, but not rolled. So, when it starts up,
|
||||||
|
Loading…
Reference in New Issue
Block a user