HDFS-5504. In HA mode, OP_DELETE_SNAPSHOT is not decrementing the safemode threshold, leads to NN safemode. Contributed by Vinay.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1541773 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jing Zhao 2013-11-14 01:03:34 +00:00
parent de9271686a
commit 735aae32e4
4 changed files with 73 additions and 4 deletions

View File

@ -550,6 +550,9 @@ Release 2.3.0 - UNRELEASED
HDFS-5075. httpfs-config.sh calls out incorrect env script name HDFS-5075. httpfs-config.sh calls out incorrect env script name
(Timothy St. Clair via stevel) (Timothy St. Clair via stevel)
HDFS-5504. In HA mode, OP_DELETE_SNAPSHOT is not decrementing the safemode threshold,
leads to NN safemode. (Vinay via jing9)
Release 2.2.1 - UNRELEASED Release 2.2.1 - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -593,7 +593,7 @@ private long applyEditLogOp(FSEditLogOp op, FSDirectory fsDir,
fsNamesys.getSnapshotManager().deleteSnapshot( fsNamesys.getSnapshotManager().deleteSnapshot(
deleteSnapshotOp.snapshotRoot, deleteSnapshotOp.snapshotName, deleteSnapshotOp.snapshotRoot, deleteSnapshotOp.snapshotName,
collectedBlocks, removedINodes); collectedBlocks, removedINodes);
fsNamesys.removeBlocks(collectedBlocks); fsNamesys.removeBlocksAndUpdateSafemodeTotal(collectedBlocks);
collectedBlocks.clear(); collectedBlocks.clear();
fsNamesys.dir.removeFromInodeMap(removedINodes); fsNamesys.dir.removeFromInodeMap(removedINodes);
removedINodes.clear(); removedINodes.clear();

View File

@ -3316,6 +3316,18 @@ void removePathAndBlocks(String src, BlocksMapUpdateInfo blocks,
return; return;
} }
removeBlocksAndUpdateSafemodeTotal(blocks);
}
/**
* Removes the blocks from blocksmap and updates the safemode blocks total
*
* @param blocks
* An instance of {@link BlocksMapUpdateInfo} which contains a list
* of blocks that need to be removed from blocksMap
*/
void removeBlocksAndUpdateSafemodeTotal(BlocksMapUpdateInfo blocks) {
assert hasWriteLock();
// In the case that we are a Standby tailing edits from the // In the case that we are a Standby tailing edits from the
// active while in safe-mode, we need to track the total number // active while in safe-mode, we need to track the total number
// of blocks and safe blocks in the system. // of blocks and safe blocks in the system.
@ -3336,9 +3348,9 @@ void removePathAndBlocks(String src, BlocksMapUpdateInfo blocks,
} }
if (trackBlockCounts) { if (trackBlockCounts) {
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
LOG.debug("Adjusting safe-mode totals for deletion of " + src + ":" + LOG.debug("Adjusting safe-mode totals for deletion."
"decreasing safeBlocks by " + numRemovedSafe + + "decreasing safeBlocks by " + numRemovedSafe
", totalBlocks by " + numRemovedComplete); + ", totalBlocks by " + numRemovedComplete);
} }
adjustSafeModeBlockTotals(-numRemovedSafe, -numRemovedComplete); adjustSafeModeBlockTotals(-numRemovedSafe, -numRemovedComplete);
} }

View File

@ -38,6 +38,7 @@
import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
@ -47,7 +48,10 @@
import org.apache.hadoop.hdfs.server.namenode.INodeDirectory; import org.apache.hadoop.hdfs.server.namenode.INodeDirectory;
import org.apache.hadoop.hdfs.server.namenode.INodeDirectoryWithQuota; import org.apache.hadoop.hdfs.server.namenode.INodeDirectoryWithQuota;
import org.apache.hadoop.hdfs.server.namenode.INodeFile; import org.apache.hadoop.hdfs.server.namenode.INodeFile;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
import org.apache.hadoop.hdfs.server.namenode.Quota; import org.apache.hadoop.hdfs.server.namenode.Quota;
import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil;
import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectoryWithSnapshot.DirectoryDiffList; import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectoryWithSnapshot.DirectoryDiffList;
import org.apache.hadoop.hdfs.util.ReadOnlyList; import org.apache.hadoop.hdfs.util.ReadOnlyList;
import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.IOUtils;
@ -949,4 +953,54 @@ public void testDeleteSnapshotCommandWithIllegalArguments() throws Exception {
psOut.close(); psOut.close();
out.close(); out.close();
} }
/*
* OP_DELETE_SNAPSHOT edits op was not decrementing the safemode threshold on
* restart in HA mode. HDFS-5504
*/
@Test(timeout = 60000)
public void testHANNRestartAfterSnapshotDeletion() throws Exception {
hdfs.close();
cluster.shutdown();
conf = new Configuration();
cluster = new MiniDFSCluster.Builder(conf)
.nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(1)
.build();
cluster.transitionToActive(0);
// stop the standby namenode
NameNode snn = cluster.getNameNode(1);
snn.stop();
hdfs = (DistributedFileSystem) HATestUtil
.configureFailoverFs(cluster, conf);
Path dir = new Path("/dir");
Path subDir = new Path(dir, "sub");
hdfs.mkdirs(dir);
hdfs.allowSnapshot(dir);
for (int i = 0; i < 5; i++) {
DFSTestUtil.createFile(hdfs, new Path(subDir, "" + i), 100, (short) 1,
1024L);
}
// take snapshot
hdfs.createSnapshot(dir, "s0");
// delete the subdir
hdfs.delete(subDir, true);
// roll the edit log
NameNode ann = cluster.getNameNode(0);
ann.getRpcServer().rollEditLog();
hdfs.deleteSnapshot(dir, "s0");
// wait for the blocks deletion at namenode
Thread.sleep(2000);
NameNodeAdapter.abortEditLogs(ann);
cluster.restartNameNode(0, false);
cluster.transitionToActive(0);
// wait till the cluster becomes active
cluster.waitClusterUp();
}
} }