HDFS-12467. Ozone: SCM: NodeManager should log when it comes out of chill mode. Contributed by Nandakumar.

This commit is contained in:
Anu Engineer 2017-10-03 14:41:56 -07:00 committed by Owen O'Malley
parent d7a94a21ae
commit ceec14bff5
2 changed files with 47 additions and 31 deletions

View File

@ -18,7 +18,6 @@
package org.apache.hadoop.ozone.scm.node; package org.apache.hadoop.ozone.scm.node;
import com.google.common.annotations.VisibleForTesting; import com.google.common.annotations.VisibleForTesting;
import java.util.Optional;
import com.google.common.base.Preconditions; import com.google.common.base.Preconditions;
import com.google.common.util.concurrent.ThreadFactoryBuilder; import com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.commons.collections.map.HashedMap; import org.apache.commons.collections.map.HashedMap;
@ -62,6 +61,7 @@
import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors; import java.util.stream.Collectors;
@ -131,7 +131,17 @@ public class SCMNodeManager
private final int maxHBToProcessPerLoop; private final int maxHBToProcessPerLoop;
private final String clusterID; private final String clusterID;
private final VersionInfo version; private final VersionInfo version;
private Optional<Boolean> inManualChillMode; /**
* During start up of SCM, it will enter into chill mode and will be there
* until number of Datanodes registered reaches {@code chillModeNodeCount}.
* This flag is for tracking startup chill mode.
*/
private AtomicBoolean inStartupChillMode;
/**
* Administrator can put SCM into chill mode manually.
* This flag is for tracking manual chill mode.
*/
private AtomicBoolean inManualChillMode;
private final CommandQueue commandQueue; private final CommandQueue commandQueue;
// Node manager MXBean // Node manager MXBean
private ObjectName nmInfoBean; private ObjectName nmInfoBean;
@ -173,7 +183,10 @@ public SCMNodeManager(OzoneConfiguration conf, String clusterID)
executorService = HadoopExecutors.newScheduledThreadPool(1, executorService = HadoopExecutors.newScheduledThreadPool(1,
new ThreadFactoryBuilder().setDaemon(true) new ThreadFactoryBuilder().setDaemon(true)
.setNameFormat("SCM Heartbeat Processing Thread - %d").build()); .setNameFormat("SCM Heartbeat Processing Thread - %d").build());
this.inManualChillMode = Optional.empty();
LOG.info("Entering startup chill mode.");
this.inStartupChillMode = new AtomicBoolean(true);
this.inManualChillMode = new AtomicBoolean(false);
Preconditions.checkState(heartbeatCheckerIntervalMs > 0); Preconditions.checkState(heartbeatCheckerIntervalMs > 0);
executorService.schedule(this, heartbeatCheckerIntervalMs, executorService.schedule(this, heartbeatCheckerIntervalMs,
@ -286,11 +299,7 @@ public void setMinimumChillModeNodes(int count) {
*/ */
@Override @Override
public boolean isOutOfNodeChillMode() { public boolean isOutOfNodeChillMode() {
if (inManualChillMode.isPresent()) { return !inStartupChillMode.get() && !inManualChillMode.get();
return !inManualChillMode.get();
}
return (totalNodes.get() >= getMinimumChillModeNodes());
} }
/** /**
@ -298,7 +307,8 @@ public boolean isOutOfNodeChillMode() {
*/ */
@Override @Override
public void clearChillModeFlag() { public void clearChillModeFlag() {
this.inManualChillMode = Optional.empty(); LOG.info("Clearing manual chill mode flag.");
this.inManualChillMode.getAndSet(false);
} }
/** /**
@ -307,22 +317,15 @@ public void clearChillModeFlag() {
*/ */
@Override @Override
public String getChillModeStatus() { public String getChillModeStatus() {
if (inManualChillMode.isPresent() && inManualChillMode.get()) { if (inStartupChillMode.get()) {
return "Manual chill mode is set to true." +
getNodeStatus();
}
if (inManualChillMode.isPresent() && !inManualChillMode.get()) {
return "Manual chill mode is set to false." +
getNodeStatus();
}
if (isOutOfNodeChillMode()) {
return "Out of chill mode." + getNodeStatus();
} else {
return "Still in chill mode, waiting on nodes to report in." return "Still in chill mode, waiting on nodes to report in."
+ getNodeStatus(); + getNodeStatus();
} }
if (inManualChillMode.get()) {
return "Out of startup chill mode, but in manual chill mode." +
getNodeStatus();
}
return "Out of chill mode." + getNodeStatus();
} }
/** /**
@ -344,19 +347,24 @@ private String getNodeStatus() {
*/ */
@Override @Override
public boolean isInManualChillMode() { public boolean isInManualChillMode() {
if (this.inManualChillMode.isPresent()) { return inManualChillMode.get();
return this.inManualChillMode.get();
}
return false;
} }
/** /**
* Forcefully exits the chill mode even if we have not met the minimum * Forcefully exits the chill mode even if we have not met the minimum
* criteria of exiting the chill mode. * criteria of exiting the chill mode. This will exit from both startup
* and manual chill mode.
*/ */
@Override @Override
public void forceExitChillMode() { public void forceExitChillMode() {
this.inManualChillMode = Optional.of(false); if(inStartupChillMode.get()) {
LOG.info("Leaving startup chill mode.");
inStartupChillMode.getAndSet(false);
}
if(inManualChillMode.get()) {
LOG.info("Leaving manual chill mode.");
inManualChillMode.getAndSet(false);
}
} }
/** /**
@ -364,7 +372,8 @@ public void forceExitChillMode() {
*/ */
@Override @Override
public void forceEnterChillMode() { public void forceEnterChillMode() {
this.inManualChillMode = Optional.of(true); LOG.info("Entering manual chill mode.");
inManualChillMode.getAndSet(true);
} }
/** /**
@ -728,6 +737,12 @@ public SCMCommand register(DatanodeID datanodeID) {
healthyNodeCount.incrementAndGet(); healthyNodeCount.incrementAndGet();
nodeStats.put(datanodeID.getDatanodeUuid(), new SCMNodeStat()); nodeStats.put(datanodeID.getDatanodeUuid(), new SCMNodeStat());
if(inStartupChillMode.get() &&
totalNodes.get() >= getMinimumChillModeNodes()) {
inStartupChillMode.getAndSet(false);
LOG.info("Leaving startup chill mode.");
}
// TODO: define node pool policy for non-default node pool. // TODO: define node pool policy for non-default node pool.
// For now, all nodes are added to the "DefaultNodePool" upon registration // For now, all nodes are added to the "DefaultNodePool" upon registration
// if it has not been added to any node pool yet. // if it has not been added to any node pool yet.

View File

@ -907,7 +907,7 @@ public void testScmEnterAndExitChillMode() throws IOException,
assertTrue(nodeManager.isOutOfNodeChillMode()); assertTrue(nodeManager.isOutOfNodeChillMode());
status = nodeManager.getChillModeStatus(); status = nodeManager.getChillModeStatus();
Assert.assertThat(status, Assert.assertThat(status,
CoreMatchers.containsString("Manual chill mode is set to false.")); CoreMatchers.containsString("Out of chill mode."));
assertFalse((nodeManager.isInManualChillMode())); assertFalse((nodeManager.isInManualChillMode()));
@ -916,7 +916,8 @@ public void testScmEnterAndExitChillMode() throws IOException,
assertFalse(nodeManager.isOutOfNodeChillMode()); assertFalse(nodeManager.isOutOfNodeChillMode());
status = nodeManager.getChillModeStatus(); status = nodeManager.getChillModeStatus();
Assert.assertThat(status, Assert.assertThat(status,
CoreMatchers.containsString("Manual chill mode is set to true.")); CoreMatchers.containsString("Out of startup chill mode," +
" but in manual chill mode."));
assertTrue((nodeManager.isInManualChillMode())); assertTrue((nodeManager.isInManualChillMode()));