HDFS-5504. In HA mode, OP_DELETE_SNAPSHOT is not decrementing the safemode threshold, leads to NN safemode. Contributed by Vinay.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1541773 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
de9271686a
commit
735aae32e4
@ -550,6 +550,9 @@ Release 2.3.0 - UNRELEASED
|
|||||||
HDFS-5075. httpfs-config.sh calls out incorrect env script name
|
HDFS-5075. httpfs-config.sh calls out incorrect env script name
|
||||||
(Timothy St. Clair via stevel)
|
(Timothy St. Clair via stevel)
|
||||||
|
|
||||||
|
HDFS-5504. In HA mode, OP_DELETE_SNAPSHOT is not decrementing the safemode threshold,
|
||||||
|
leads to NN safemode. (Vinay via jing9)
|
||||||
|
|
||||||
Release 2.2.1 - UNRELEASED
|
Release 2.2.1 - UNRELEASED
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
@ -593,7 +593,7 @@ private long applyEditLogOp(FSEditLogOp op, FSDirectory fsDir,
|
|||||||
fsNamesys.getSnapshotManager().deleteSnapshot(
|
fsNamesys.getSnapshotManager().deleteSnapshot(
|
||||||
deleteSnapshotOp.snapshotRoot, deleteSnapshotOp.snapshotName,
|
deleteSnapshotOp.snapshotRoot, deleteSnapshotOp.snapshotName,
|
||||||
collectedBlocks, removedINodes);
|
collectedBlocks, removedINodes);
|
||||||
fsNamesys.removeBlocks(collectedBlocks);
|
fsNamesys.removeBlocksAndUpdateSafemodeTotal(collectedBlocks);
|
||||||
collectedBlocks.clear();
|
collectedBlocks.clear();
|
||||||
fsNamesys.dir.removeFromInodeMap(removedINodes);
|
fsNamesys.dir.removeFromInodeMap(removedINodes);
|
||||||
removedINodes.clear();
|
removedINodes.clear();
|
||||||
|
@ -3316,6 +3316,18 @@ void removePathAndBlocks(String src, BlocksMapUpdateInfo blocks,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
removeBlocksAndUpdateSafemodeTotal(blocks);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Removes the blocks from blocksmap and updates the safemode blocks total
|
||||||
|
*
|
||||||
|
* @param blocks
|
||||||
|
* An instance of {@link BlocksMapUpdateInfo} which contains a list
|
||||||
|
* of blocks that need to be removed from blocksMap
|
||||||
|
*/
|
||||||
|
void removeBlocksAndUpdateSafemodeTotal(BlocksMapUpdateInfo blocks) {
|
||||||
|
assert hasWriteLock();
|
||||||
// In the case that we are a Standby tailing edits from the
|
// In the case that we are a Standby tailing edits from the
|
||||||
// active while in safe-mode, we need to track the total number
|
// active while in safe-mode, we need to track the total number
|
||||||
// of blocks and safe blocks in the system.
|
// of blocks and safe blocks in the system.
|
||||||
@ -3336,9 +3348,9 @@ void removePathAndBlocks(String src, BlocksMapUpdateInfo blocks,
|
|||||||
}
|
}
|
||||||
if (trackBlockCounts) {
|
if (trackBlockCounts) {
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
LOG.debug("Adjusting safe-mode totals for deletion of " + src + ":" +
|
LOG.debug("Adjusting safe-mode totals for deletion."
|
||||||
"decreasing safeBlocks by " + numRemovedSafe +
|
+ "decreasing safeBlocks by " + numRemovedSafe
|
||||||
", totalBlocks by " + numRemovedComplete);
|
+ ", totalBlocks by " + numRemovedComplete);
|
||||||
}
|
}
|
||||||
adjustSafeModeBlockTotals(-numRemovedSafe, -numRemovedComplete);
|
adjustSafeModeBlockTotals(-numRemovedSafe, -numRemovedComplete);
|
||||||
}
|
}
|
||||||
|
@ -38,6 +38,7 @@
|
|||||||
import org.apache.hadoop.hdfs.DFSUtil;
|
import org.apache.hadoop.hdfs.DFSUtil;
|
||||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||||
|
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
|
||||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
|
||||||
@ -47,7 +48,10 @@
|
|||||||
import org.apache.hadoop.hdfs.server.namenode.INodeDirectory;
|
import org.apache.hadoop.hdfs.server.namenode.INodeDirectory;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.INodeDirectoryWithQuota;
|
import org.apache.hadoop.hdfs.server.namenode.INodeDirectoryWithQuota;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.INodeFile;
|
import org.apache.hadoop.hdfs.server.namenode.INodeFile;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.Quota;
|
import org.apache.hadoop.hdfs.server.namenode.Quota;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectoryWithSnapshot.DirectoryDiffList;
|
import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectoryWithSnapshot.DirectoryDiffList;
|
||||||
import org.apache.hadoop.hdfs.util.ReadOnlyList;
|
import org.apache.hadoop.hdfs.util.ReadOnlyList;
|
||||||
import org.apache.hadoop.io.IOUtils;
|
import org.apache.hadoop.io.IOUtils;
|
||||||
@ -949,4 +953,54 @@ public void testDeleteSnapshotCommandWithIllegalArguments() throws Exception {
|
|||||||
psOut.close();
|
psOut.close();
|
||||||
out.close();
|
out.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* OP_DELETE_SNAPSHOT edits op was not decrementing the safemode threshold on
|
||||||
|
* restart in HA mode. HDFS-5504
|
||||||
|
*/
|
||||||
|
@Test(timeout = 60000)
|
||||||
|
public void testHANNRestartAfterSnapshotDeletion() throws Exception {
|
||||||
|
hdfs.close();
|
||||||
|
cluster.shutdown();
|
||||||
|
conf = new Configuration();
|
||||||
|
cluster = new MiniDFSCluster.Builder(conf)
|
||||||
|
.nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(1)
|
||||||
|
.build();
|
||||||
|
cluster.transitionToActive(0);
|
||||||
|
// stop the standby namenode
|
||||||
|
NameNode snn = cluster.getNameNode(1);
|
||||||
|
snn.stop();
|
||||||
|
|
||||||
|
hdfs = (DistributedFileSystem) HATestUtil
|
||||||
|
.configureFailoverFs(cluster, conf);
|
||||||
|
Path dir = new Path("/dir");
|
||||||
|
Path subDir = new Path(dir, "sub");
|
||||||
|
hdfs.mkdirs(dir);
|
||||||
|
hdfs.allowSnapshot(dir);
|
||||||
|
for (int i = 0; i < 5; i++) {
|
||||||
|
DFSTestUtil.createFile(hdfs, new Path(subDir, "" + i), 100, (short) 1,
|
||||||
|
1024L);
|
||||||
|
}
|
||||||
|
|
||||||
|
// take snapshot
|
||||||
|
hdfs.createSnapshot(dir, "s0");
|
||||||
|
|
||||||
|
// delete the subdir
|
||||||
|
hdfs.delete(subDir, true);
|
||||||
|
|
||||||
|
// roll the edit log
|
||||||
|
NameNode ann = cluster.getNameNode(0);
|
||||||
|
ann.getRpcServer().rollEditLog();
|
||||||
|
|
||||||
|
hdfs.deleteSnapshot(dir, "s0");
|
||||||
|
// wait for the blocks deletion at namenode
|
||||||
|
Thread.sleep(2000);
|
||||||
|
|
||||||
|
NameNodeAdapter.abortEditLogs(ann);
|
||||||
|
cluster.restartNameNode(0, false);
|
||||||
|
cluster.transitionToActive(0);
|
||||||
|
|
||||||
|
// wait till the cluster becomes active
|
||||||
|
cluster.waitClusterUp();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user