HDFS-16836: StandbyCheckpointer shouldn't trigger rollback fs image after RU is finalized (#5135)

Co-authored-by: Lei Yang <leyang@linkedin.com>
This commit is contained in:
Lei Yang 2022-11-15 15:06:37 -08:00 committed by GitHub
parent 69e50c7b44
commit cd929457c9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 37 additions and 0 deletions

View File

@ -912,6 +912,7 @@ private long applyEditLogOp(FSEditLogOp op, FSDirectory fsDir,
fsNamesys.getFSImage().updateStorageVersion();
fsNamesys.getFSImage().renameCheckpoint(NameNodeFile.IMAGE_ROLLBACK,
NameNodeFile.IMAGE);
fsNamesys.setNeedRollbackFsImage(false);
break;
}
case OP_ADD_CACHE_DIRECTIVE: {

View File

@ -33,6 +33,9 @@
import javax.management.ReflectionException;
import javax.management.openmbean.CompositeDataSupport;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil;
import org.junit.Rule;
import org.junit.rules.TemporaryFolder;
import org.slf4j.Logger;
@ -720,6 +723,39 @@ static void queryForPreparation(DistributedFileSystem dfs) throws IOException,
}
}
@Test
public void testEditLogTailerRollingUpgrade() throws IOException, InterruptedException {
Configuration conf = new Configuration();
conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, 1);
HAUtil.setAllowStandbyReads(conf, true);
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
.nnTopology(MiniDFSNNTopology.simpleHATopology())
.numDataNodes(0)
.build();
cluster.waitActive();
cluster.transitionToActive(0);
NameNode nn1 = cluster.getNameNode(0);
NameNode nn2 = cluster.getNameNode(1);
try {
// RU start should trigger rollback image in standbycheckpointer
nn1.getRpcServer().rollingUpgrade(HdfsConstants.RollingUpgradeAction.PREPARE);
HATestUtil.waitForStandbyToCatchUp(nn1, nn2);
Assert.assertTrue(nn2.getNamesystem().isNeedRollbackFsImage());
// RU finalize should reset rollback image flag in standbycheckpointer
nn1.getRpcServer().rollingUpgrade(HdfsConstants.RollingUpgradeAction.FINALIZE);
HATestUtil.waitForStandbyToCatchUp(nn1, nn2);
Assert.assertFalse(nn2.getNamesystem().isNeedRollbackFsImage());
} finally {
cluster.shutdown();
}
}
/**
* In non-HA setup, after rolling upgrade prepare, the Secondary NN should
* still be able to do checkpoint