HDFS-15590. namenode fails to start when ordered snapshot deletion feature is disabled (#2326)

This commit is contained in:
bshashikant 2020-09-24 14:00:41 +05:30 committed by GitHub
parent c3a90dd918
commit 368f2f637e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 46 additions and 5 deletions

View File

@ -241,6 +241,24 @@ public Snapshot removeSnapshot(
throws SnapshotException {
final int i = searchSnapshot(DFSUtil.string2Bytes(snapshotName));
if (i < 0) {
// considering a sequence like this with snapshots S1 and s2
// 1. Ordered snapshot deletion feature is turned on
// 2. Delete S2 creating edit log entry for S2 deletion
// 3. Delete S1
// 4. S2 gets deleted by snapshot gc thread creating edit log record for
// S2 deletion again
// 5. Disable Ordered snapshot deletion feature
// 6. Restarting Namenode
// In this case, when edit log replay happens actual deletion of S2
// will happen when first edit log for S2 deletion gets replayed and
// the second edit log record replay for S2 deletion will fail as snapshot
// won't exist thereby failing the Namenode start
// The idea here is to check during edit log replay, if a certain snapshot
// is not found and the ordered snapshot deletion is off, ignore the error
if (!snapshotManager.isSnapshotDeletionOrdered() &&
!snapshotManager.isImageLoaded()) {
return null;
}
throw new SnapshotException("Cannot delete snapshot " + snapshotName
+ " from path " + snapshotRoot.getFullPathName()
+ ": the snapshot does not exist.");

View File

@ -479,10 +479,10 @@ void checkPerDirectorySnapshotLimit(int n) throws SnapshotException {
void checkSnapshotLimit(int limit, int snapshotCount, String type)
throws SnapshotException {
if (snapshotCount >= limit) {
String msg = "there are already " + (snapshotCount + 1)
String msg = "there are already " + snapshotCount
+ " snapshot(s) and the " + type + " snapshot limit is "
+ limit;
if (fsdir.isImageLoaded()) {
if (isImageLoaded()) {
// We have reached the maximum snapshot limit
throw new SnapshotException(
"Failed to create snapshot: " + msg);
@ -492,7 +492,10 @@ void checkSnapshotLimit(int limit, int snapshotCount, String type)
}
}
}
boolean isImageLoaded() {
return fsdir.isImageLoaded();
}
/**
* Delete a snapshot for a snapshottable directory
* @param snapshotName Name of the snapshot to be deleted

View File

@ -128,8 +128,8 @@ public void testMaxSnapshotLimit() throws Exception {
DFSTestUtil.FsShellRun("-createSnapshot /sub3 sn2", 0,
"Created snapshot /sub3/.snapshot/sn2", conf);
DFSTestUtil.FsShellRun("-createSnapshot /sub3 sn3", 1,
"Failed to add snapshot: there are already 3 snapshot(s) and "
+ "the max snapshot limit is 3", conf);
"Failed to create snapshot: there are already 3 snapshot(s) and "
+ "the per directory snapshot limit is 3", conf);
}
@Test

View File

@ -195,6 +195,26 @@ public void testSnapshotXattrWithSaveNameSpace() throws Exception {
assertXAttrSet("s1", hdfs, null);
}
@Test(timeout = 6000000)
public void testOrderedDeletionWithRestart() throws Exception {
DistributedFileSystem hdfs = cluster.getFileSystem();
hdfs.mkdirs(snapshottableDir);
hdfs.allowSnapshot(snapshottableDir);
final Path sub0 = new Path(snapshottableDir, "sub0");
hdfs.mkdirs(sub0);
hdfs.createSnapshot(snapshottableDir, "s0");
final Path sub1 = new Path(snapshottableDir, "sub1");
hdfs.mkdirs(sub1);
hdfs.createSnapshot(snapshottableDir, "s1");
assertXAttrSet("s1", hdfs, null);
assertXAttrSet("s1", hdfs, null);
cluster.getNameNode().getConf().
setBoolean(DFS_NAMENODE_SNAPSHOT_DELETION_ORDERED, false);
cluster.restartNameNodes();
}
@Test(timeout = 60000)
public void testSnapshotXattrWithDisablingXattr() throws Exception {
DistributedFileSystem hdfs = cluster.getFileSystem();