HDFS-17231. HA: Safemode should exit when resources are from low to available. (#6207). Contributed by Gu Peng.

Reviewed-by: Xing Lin <xinglin@linkedin.com>
Signed-off-by: He Xiaoqiao <hexiaoqiao@apache.org>
This commit is contained in:
gp1314 2023-10-25 11:43:12 +08:00 committed by GitHub
parent 882f08b4bc
commit a170d58501
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 20 additions and 0 deletions

View File

@ -4534,6 +4534,11 @@ public void run () {
LOG.warn(lowResourcesMsg + "Already in safe mode.");
}
enterSafeMode(true);
} else {
if (isNoManualAndResourceLowSafeMode()) {
LOG.info("Namenode has sufficient available resources, exiting safe mode.");
leaveSafeMode(false);
}
}
try {
Thread.sleep(resourceRecheckInterval);
@ -5265,6 +5270,13 @@ private synchronized boolean isInManualOrResourceLowSafeMode() {
return manualSafeMode || resourceLowSafeMode;
}
/**
* @return true if it is not in manual safe mode and resource low safe mode.
*/
private synchronized boolean isNoManualAndResourceLowSafeMode() {
return !manualSafeMode && resourceLowSafeMode;
}
private synchronized void setManualAndResourceLowSafeMode(boolean manual,
boolean resourceLow) {
this.manualSafeMode = manual;

View File

@ -130,6 +130,14 @@ public void testCheckThatNameNodeResourceMonitorIsRunning()
assertTrue("NN should be in safe mode after resources crossed threshold",
cluster.getNameNode().isInSafeMode());
mockResourceChecker.setResourcesAvailable(true);
while (cluster.getNameNode().isInSafeMode() &&
Time.now() < startMillis + (60 * 1000)) {
Thread.sleep(1000);
}
assertTrue("NN should leave safe mode after resources not crossed threshold",
!cluster.getNameNode().isInSafeMode());
} finally {
if (cluster != null)
cluster.shutdown();