HDFS-12467. Ozone: SCM: NodeManager should log when it comes out of chill mode. Contributed by Nandakumar.
This commit is contained in:
parent
d7a94a21ae
commit
ceec14bff5
@ -18,7 +18,6 @@
|
|||||||
package org.apache.hadoop.ozone.scm.node;
|
package org.apache.hadoop.ozone.scm.node;
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
import java.util.Optional;
|
|
||||||
import com.google.common.base.Preconditions;
|
import com.google.common.base.Preconditions;
|
||||||
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
||||||
import org.apache.commons.collections.map.HashedMap;
|
import org.apache.commons.collections.map.HashedMap;
|
||||||
@ -62,6 +61,7 @@
|
|||||||
import java.util.concurrent.ConcurrentLinkedQueue;
|
import java.util.concurrent.ConcurrentLinkedQueue;
|
||||||
import java.util.concurrent.ScheduledExecutorService;
|
import java.util.concurrent.ScheduledExecutorService;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
@ -131,7 +131,17 @@ public class SCMNodeManager
|
|||||||
private final int maxHBToProcessPerLoop;
|
private final int maxHBToProcessPerLoop;
|
||||||
private final String clusterID;
|
private final String clusterID;
|
||||||
private final VersionInfo version;
|
private final VersionInfo version;
|
||||||
private Optional<Boolean> inManualChillMode;
|
/**
|
||||||
|
* During start up of SCM, it will enter into chill mode and will be there
|
||||||
|
* until number of Datanodes registered reaches {@code chillModeNodeCount}.
|
||||||
|
* This flag is for tracking startup chill mode.
|
||||||
|
*/
|
||||||
|
private AtomicBoolean inStartupChillMode;
|
||||||
|
/**
|
||||||
|
* Administrator can put SCM into chill mode manually.
|
||||||
|
* This flag is for tracking manual chill mode.
|
||||||
|
*/
|
||||||
|
private AtomicBoolean inManualChillMode;
|
||||||
private final CommandQueue commandQueue;
|
private final CommandQueue commandQueue;
|
||||||
// Node manager MXBean
|
// Node manager MXBean
|
||||||
private ObjectName nmInfoBean;
|
private ObjectName nmInfoBean;
|
||||||
@ -173,7 +183,10 @@ public SCMNodeManager(OzoneConfiguration conf, String clusterID)
|
|||||||
executorService = HadoopExecutors.newScheduledThreadPool(1,
|
executorService = HadoopExecutors.newScheduledThreadPool(1,
|
||||||
new ThreadFactoryBuilder().setDaemon(true)
|
new ThreadFactoryBuilder().setDaemon(true)
|
||||||
.setNameFormat("SCM Heartbeat Processing Thread - %d").build());
|
.setNameFormat("SCM Heartbeat Processing Thread - %d").build());
|
||||||
this.inManualChillMode = Optional.empty();
|
|
||||||
|
LOG.info("Entering startup chill mode.");
|
||||||
|
this.inStartupChillMode = new AtomicBoolean(true);
|
||||||
|
this.inManualChillMode = new AtomicBoolean(false);
|
||||||
|
|
||||||
Preconditions.checkState(heartbeatCheckerIntervalMs > 0);
|
Preconditions.checkState(heartbeatCheckerIntervalMs > 0);
|
||||||
executorService.schedule(this, heartbeatCheckerIntervalMs,
|
executorService.schedule(this, heartbeatCheckerIntervalMs,
|
||||||
@ -286,11 +299,7 @@ public void setMinimumChillModeNodes(int count) {
|
|||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public boolean isOutOfNodeChillMode() {
|
public boolean isOutOfNodeChillMode() {
|
||||||
if (inManualChillMode.isPresent()) {
|
return !inStartupChillMode.get() && !inManualChillMode.get();
|
||||||
return !inManualChillMode.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
return (totalNodes.get() >= getMinimumChillModeNodes());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -298,7 +307,8 @@ public boolean isOutOfNodeChillMode() {
|
|||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void clearChillModeFlag() {
|
public void clearChillModeFlag() {
|
||||||
this.inManualChillMode = Optional.empty();
|
LOG.info("Clearing manual chill mode flag.");
|
||||||
|
this.inManualChillMode.getAndSet(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -307,22 +317,15 @@ public void clearChillModeFlag() {
|
|||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public String getChillModeStatus() {
|
public String getChillModeStatus() {
|
||||||
if (inManualChillMode.isPresent() && inManualChillMode.get()) {
|
if (inStartupChillMode.get()) {
|
||||||
return "Manual chill mode is set to true." +
|
|
||||||
getNodeStatus();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (inManualChillMode.isPresent() && !inManualChillMode.get()) {
|
|
||||||
return "Manual chill mode is set to false." +
|
|
||||||
getNodeStatus();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isOutOfNodeChillMode()) {
|
|
||||||
return "Out of chill mode." + getNodeStatus();
|
|
||||||
} else {
|
|
||||||
return "Still in chill mode, waiting on nodes to report in."
|
return "Still in chill mode, waiting on nodes to report in."
|
||||||
+ getNodeStatus();
|
+ getNodeStatus();
|
||||||
}
|
}
|
||||||
|
if (inManualChillMode.get()) {
|
||||||
|
return "Out of startup chill mode, but in manual chill mode." +
|
||||||
|
getNodeStatus();
|
||||||
|
}
|
||||||
|
return "Out of chill mode." + getNodeStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -344,19 +347,24 @@ private String getNodeStatus() {
|
|||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public boolean isInManualChillMode() {
|
public boolean isInManualChillMode() {
|
||||||
if (this.inManualChillMode.isPresent()) {
|
return inManualChillMode.get();
|
||||||
return this.inManualChillMode.get();
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Forcefully exits the chill mode even if we have not met the minimum
|
* Forcefully exits the chill mode even if we have not met the minimum
|
||||||
* criteria of exiting the chill mode.
|
* criteria of exiting the chill mode. This will exit from both startup
|
||||||
|
* and manual chill mode.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void forceExitChillMode() {
|
public void forceExitChillMode() {
|
||||||
this.inManualChillMode = Optional.of(false);
|
if(inStartupChillMode.get()) {
|
||||||
|
LOG.info("Leaving startup chill mode.");
|
||||||
|
inStartupChillMode.getAndSet(false);
|
||||||
|
}
|
||||||
|
if(inManualChillMode.get()) {
|
||||||
|
LOG.info("Leaving manual chill mode.");
|
||||||
|
inManualChillMode.getAndSet(false);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -364,7 +372,8 @@ public void forceExitChillMode() {
|
|||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void forceEnterChillMode() {
|
public void forceEnterChillMode() {
|
||||||
this.inManualChillMode = Optional.of(true);
|
LOG.info("Entering manual chill mode.");
|
||||||
|
inManualChillMode.getAndSet(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -728,6 +737,12 @@ public SCMCommand register(DatanodeID datanodeID) {
|
|||||||
healthyNodeCount.incrementAndGet();
|
healthyNodeCount.incrementAndGet();
|
||||||
nodeStats.put(datanodeID.getDatanodeUuid(), new SCMNodeStat());
|
nodeStats.put(datanodeID.getDatanodeUuid(), new SCMNodeStat());
|
||||||
|
|
||||||
|
if(inStartupChillMode.get() &&
|
||||||
|
totalNodes.get() >= getMinimumChillModeNodes()) {
|
||||||
|
inStartupChillMode.getAndSet(false);
|
||||||
|
LOG.info("Leaving startup chill mode.");
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: define node pool policy for non-default node pool.
|
// TODO: define node pool policy for non-default node pool.
|
||||||
// For now, all nodes are added to the "DefaultNodePool" upon registration
|
// For now, all nodes are added to the "DefaultNodePool" upon registration
|
||||||
// if it has not been added to any node pool yet.
|
// if it has not been added to any node pool yet.
|
||||||
|
@ -907,7 +907,7 @@ public void testScmEnterAndExitChillMode() throws IOException,
|
|||||||
assertTrue(nodeManager.isOutOfNodeChillMode());
|
assertTrue(nodeManager.isOutOfNodeChillMode());
|
||||||
status = nodeManager.getChillModeStatus();
|
status = nodeManager.getChillModeStatus();
|
||||||
Assert.assertThat(status,
|
Assert.assertThat(status,
|
||||||
CoreMatchers.containsString("Manual chill mode is set to false."));
|
CoreMatchers.containsString("Out of chill mode."));
|
||||||
assertFalse((nodeManager.isInManualChillMode()));
|
assertFalse((nodeManager.isInManualChillMode()));
|
||||||
|
|
||||||
|
|
||||||
@ -916,7 +916,8 @@ public void testScmEnterAndExitChillMode() throws IOException,
|
|||||||
assertFalse(nodeManager.isOutOfNodeChillMode());
|
assertFalse(nodeManager.isOutOfNodeChillMode());
|
||||||
status = nodeManager.getChillModeStatus();
|
status = nodeManager.getChillModeStatus();
|
||||||
Assert.assertThat(status,
|
Assert.assertThat(status,
|
||||||
CoreMatchers.containsString("Manual chill mode is set to true."));
|
CoreMatchers.containsString("Out of startup chill mode," +
|
||||||
|
" but in manual chill mode."));
|
||||||
assertTrue((nodeManager.isInManualChillMode()));
|
assertTrue((nodeManager.isInManualChillMode()));
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user