HDFS-17231. HA: Safemode should exit when resources are from low to available. (#6207). Contributed by Gu Peng.
Reviewed-by: Xing Lin <xinglin@linkedin.com> Signed-off-by: He Xiaoqiao <hexiaoqiao@apache.org>
This commit is contained in:
parent
882f08b4bc
commit
a170d58501
@ -4534,6 +4534,11 @@ public void run () {
|
|||||||
LOG.warn(lowResourcesMsg + "Already in safe mode.");
|
LOG.warn(lowResourcesMsg + "Already in safe mode.");
|
||||||
}
|
}
|
||||||
enterSafeMode(true);
|
enterSafeMode(true);
|
||||||
|
} else {
|
||||||
|
if (isNoManualAndResourceLowSafeMode()) {
|
||||||
|
LOG.info("Namenode has sufficient available resources, exiting safe mode.");
|
||||||
|
leaveSafeMode(false);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
Thread.sleep(resourceRecheckInterval);
|
Thread.sleep(resourceRecheckInterval);
|
||||||
@ -5265,6 +5270,13 @@ private synchronized boolean isInManualOrResourceLowSafeMode() {
|
|||||||
return manualSafeMode || resourceLowSafeMode;
|
return manualSafeMode || resourceLowSafeMode;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return true if it is not in manual safe mode and resource low safe mode.
|
||||||
|
*/
|
||||||
|
private synchronized boolean isNoManualAndResourceLowSafeMode() {
|
||||||
|
return !manualSafeMode && resourceLowSafeMode;
|
||||||
|
}
|
||||||
|
|
||||||
private synchronized void setManualAndResourceLowSafeMode(boolean manual,
|
private synchronized void setManualAndResourceLowSafeMode(boolean manual,
|
||||||
boolean resourceLow) {
|
boolean resourceLow) {
|
||||||
this.manualSafeMode = manual;
|
this.manualSafeMode = manual;
|
||||||
|
@ -130,6 +130,14 @@ public void testCheckThatNameNodeResourceMonitorIsRunning()
|
|||||||
|
|
||||||
assertTrue("NN should be in safe mode after resources crossed threshold",
|
assertTrue("NN should be in safe mode after resources crossed threshold",
|
||||||
cluster.getNameNode().isInSafeMode());
|
cluster.getNameNode().isInSafeMode());
|
||||||
|
|
||||||
|
mockResourceChecker.setResourcesAvailable(true);
|
||||||
|
while (cluster.getNameNode().isInSafeMode() &&
|
||||||
|
Time.now() < startMillis + (60 * 1000)) {
|
||||||
|
Thread.sleep(1000);
|
||||||
|
}
|
||||||
|
assertTrue("NN should leave safe mode after resources not crossed threshold",
|
||||||
|
!cluster.getNameNode().isInSafeMode());
|
||||||
} finally {
|
} finally {
|
||||||
if (cluster != null)
|
if (cluster != null)
|
||||||
cluster.shutdown();
|
cluster.shutdown();
|
||||||
|
Loading…
Reference in New Issue
Block a user