From a170d58501cf9f1bca0f111007122caf3ebe9419 Mon Sep 17 00:00:00 2001 From: gp1314 <814085234@qq.com> Date: Wed, 25 Oct 2023 11:43:12 +0800 Subject: [PATCH] HDFS-17231. HA: Safemode should exit when resources are from low to available. (#6207). Contributed by Gu Peng. Reviewed-by: Xing Lin Signed-off-by: He Xiaoqiao --- .../hadoop/hdfs/server/namenode/FSNamesystem.java | 12 ++++++++++++ .../server/namenode/TestNameNodeResourceChecker.java | 8 ++++++++ 2 files changed, 20 insertions(+) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index d9b165f96e..3d360c6d0d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -4534,6 +4534,11 @@ public void run () { LOG.warn(lowResourcesMsg + "Already in safe mode."); } enterSafeMode(true); + } else { + if (isNoManualAndResourceLowSafeMode()) { + LOG.info("Namenode has sufficient available resources, exiting safe mode."); + leaveSafeMode(false); + } } try { Thread.sleep(resourceRecheckInterval); @@ -5265,6 +5270,13 @@ private synchronized boolean isInManualOrResourceLowSafeMode() { return manualSafeMode || resourceLowSafeMode; } + /** + * @return true if it is not in manual safe mode and resource low safe mode. + */ + private synchronized boolean isNoManualAndResourceLowSafeMode() { + return !manualSafeMode && resourceLowSafeMode; + } + private synchronized void setManualAndResourceLowSafeMode(boolean manual, boolean resourceLow) { this.manualSafeMode = manual; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeResourceChecker.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeResourceChecker.java index f86ce5fc06..f3e187b5e3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeResourceChecker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeResourceChecker.java @@ -130,6 +130,14 @@ public void testCheckThatNameNodeResourceMonitorIsRunning() assertTrue("NN should be in safe mode after resources crossed threshold", cluster.getNameNode().isInSafeMode()); + + mockResourceChecker.setResourcesAvailable(true); + while (cluster.getNameNode().isInSafeMode() && + Time.now() < startMillis + (60 * 1000)) { + Thread.sleep(1000); + } + assertTrue("NN should leave safe mode after resources not crossed threshold", + !cluster.getNameNode().isInSafeMode()); } finally { if (cluster != null) cluster.shutdown();