From cd929457c93f4c2460909ab65d2d3cc4e2f817ab Mon Sep 17 00:00:00 2001 From: Lei Yang Date: Tue, 15 Nov 2022 15:06:37 -0800 Subject: [PATCH] HDFS-16836: StandbyCheckpointer shouldn't trigger rollback fs image after RU is finalized (#5135) Co-authored-by: Lei Yang --- .../hdfs/server/namenode/FSEditLogLoader.java | 1 + .../hadoop/hdfs/TestRollingUpgrade.java | 36 +++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java index a065fe6c0c..efbc3b20c6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java @@ -912,6 +912,7 @@ private long applyEditLogOp(FSEditLogOp op, FSDirectory fsDir, fsNamesys.getFSImage().updateStorageVersion(); fsNamesys.getFSImage().renameCheckpoint(NameNodeFile.IMAGE_ROLLBACK, NameNodeFile.IMAGE); + fsNamesys.setNeedRollbackFsImage(false); break; } case OP_ADD_CACHE_DIRECTIVE: { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java index 6e7014c42e..bb5da24a68 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java @@ -33,6 +33,9 @@ import javax.management.ReflectionException; import javax.management.openmbean.CompositeDataSupport; +import org.apache.hadoop.hdfs.protocol.HdfsConstants; +import org.apache.hadoop.hdfs.server.namenode.NameNode; +import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil; import org.junit.Rule; import org.junit.rules.TemporaryFolder; import org.slf4j.Logger; @@ -720,6 +723,39 @@ static void queryForPreparation(DistributedFileSystem dfs) throws IOException, } } + @Test + public void testEditLogTailerRollingUpgrade() throws IOException, InterruptedException { + Configuration conf = new Configuration(); + conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1); + conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, 1); + + HAUtil.setAllowStandbyReads(conf, true); + + MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) + .nnTopology(MiniDFSNNTopology.simpleHATopology()) + .numDataNodes(0) + .build(); + cluster.waitActive(); + + cluster.transitionToActive(0); + + NameNode nn1 = cluster.getNameNode(0); + NameNode nn2 = cluster.getNameNode(1); + try { + // RU start should trigger rollback image in standbycheckpointer + nn1.getRpcServer().rollingUpgrade(HdfsConstants.RollingUpgradeAction.PREPARE); + HATestUtil.waitForStandbyToCatchUp(nn1, nn2); + Assert.assertTrue(nn2.getNamesystem().isNeedRollbackFsImage()); + + // RU finalize should reset rollback image flag in standbycheckpointer + nn1.getRpcServer().rollingUpgrade(HdfsConstants.RollingUpgradeAction.FINALIZE); + HATestUtil.waitForStandbyToCatchUp(nn1, nn2); + Assert.assertFalse(nn2.getNamesystem().isNeedRollbackFsImage()); + } finally { + cluster.shutdown(); + } + } + /** * In non-HA setup, after rolling upgrade prepare, the Secondary NN should * still be able to do checkpoint