HDFS-12609. Ozone: SCM: Refactoring of chill mode logic in NodeManager. Contributed by Nandakumar.
This commit is contained in:
parent
e3b51d9074
commit
e76e10fd7a
@ -226,7 +226,7 @@ public class BlockManagerImpl implements BlockManager, BlockmanagerMXBean {
|
||||
INVALID_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
if (!nodeManager.isOutOfNodeChillMode()) {
|
||||
if (!nodeManager.isOutOfChillMode()) {
|
||||
LOG.warn("Not out of Chill mode.");
|
||||
throw new SCMException("Unable to create block while in chill mode",
|
||||
CHILL_MODE_EXCEPTION);
|
||||
@ -403,7 +403,7 @@ public class BlockManagerImpl implements BlockManager, BlockmanagerMXBean {
|
||||
*/
|
||||
@Override
|
||||
public void deleteBlocks(List<String> blockIDs) throws IOException {
|
||||
if (!nodeManager.isOutOfNodeChillMode()) {
|
||||
if (!nodeManager.isOutOfChillMode()) {
|
||||
throw new SCMException("Unable to delete block while in chill mode",
|
||||
CHILL_MODE_EXCEPTION);
|
||||
}
|
||||
|
@ -183,7 +183,7 @@ public class ContainerMapping implements Mapping {
|
||||
Preconditions.checkNotNull(containerName);
|
||||
Preconditions.checkState(!containerName.isEmpty());
|
||||
ContainerInfo containerInfo = null;
|
||||
if (!nodeManager.isOutOfNodeChillMode()) {
|
||||
if (!nodeManager.isOutOfChillMode()) {
|
||||
throw new SCMException(
|
||||
"Unable to create container while in chill mode",
|
||||
SCMException.ResultCodes.CHILL_MODE_EXCEPTION);
|
||||
|
@ -94,14 +94,14 @@ public interface NodeManager extends StorageContainerNodeProtocol,
|
||||
void forceExitChillMode();
|
||||
|
||||
/**
|
||||
* Forcefully enters chill mode, even if all minimum node conditions are met.
|
||||
* Puts the node manager into manual chill mode.
|
||||
*/
|
||||
void forceEnterChillMode();
|
||||
void enterChillMode();
|
||||
|
||||
/**
|
||||
* Clears the manual chill mode flag.
|
||||
* Brings node manager out of manual chill mode.
|
||||
*/
|
||||
void clearChillModeFlag();
|
||||
void exitChillMode();
|
||||
|
||||
/**
|
||||
* Returns the aggregated node stats.
|
||||
|
@ -35,13 +35,6 @@ public interface NodeManagerMXBean {
|
||||
*/
|
||||
int getMinimumChillModeNodes();
|
||||
|
||||
/**
|
||||
* Reports if we have exited out of chill mode by discovering enough nodes.
|
||||
*
|
||||
* @return True if we are out of Node layer chill mode, false otherwise.
|
||||
*/
|
||||
boolean isOutOfNodeChillMode();
|
||||
|
||||
/**
|
||||
* Returns a chill mode status string.
|
||||
* @return String
|
||||
@ -50,11 +43,10 @@ public interface NodeManagerMXBean {
|
||||
|
||||
|
||||
/**
|
||||
* Returns the status of manual chill mode flag.
|
||||
* @return true if forceEnterChillMode has been called,
|
||||
* false if forceExitChillMode or status is not set. eg. clearChillModeFlag.
|
||||
* Returns true if node manager is out of chill mode, else false.
|
||||
* @return true if out of chill mode, else false
|
||||
*/
|
||||
boolean isInManualChillMode();
|
||||
boolean isOutOfChillMode();
|
||||
|
||||
/**
|
||||
* Get the number of data nodes that in all states.
|
||||
|
@ -292,25 +292,6 @@ public class SCMNodeManager
|
||||
chillModeNodeCount = count;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reports if we have exited out of chill mode.
|
||||
*
|
||||
* @return true if we are out of chill mode.
|
||||
*/
|
||||
@Override
|
||||
public boolean isOutOfNodeChillMode() {
|
||||
return !inStartupChillMode.get() && !inManualChillMode.get();
|
||||
}
|
||||
|
||||
/**
|
||||
* Clears the manual chill mode.
|
||||
*/
|
||||
@Override
|
||||
public void clearChillModeFlag() {
|
||||
LOG.info("Clearing manual chill mode flag.");
|
||||
this.inManualChillMode.getAndSet(false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns chill mode Status string.
|
||||
* @return String
|
||||
@ -318,36 +299,16 @@ public class SCMNodeManager
|
||||
@Override
|
||||
public String getChillModeStatus() {
|
||||
if (inStartupChillMode.get()) {
|
||||
return "Still in chill mode, waiting on nodes to report in."
|
||||
+ getNodeStatus();
|
||||
return "Still in chill mode, waiting on nodes to report in." +
|
||||
String.format(" %d nodes reported, minimal %d nodes required.",
|
||||
totalNodes.get(), getMinimumChillModeNodes());
|
||||
}
|
||||
if (inManualChillMode.get()) {
|
||||
return "Out of startup chill mode, but in manual chill mode." +
|
||||
getNodeStatus();
|
||||
String.format(" %d nodes have reported in.", totalNodes.get());
|
||||
}
|
||||
return "Out of chill mode." + getNodeStatus();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a node status string.
|
||||
* @return - String
|
||||
*/
|
||||
private String getNodeStatus() {
|
||||
return isOutOfNodeChillMode() ?
|
||||
String.format(" %d nodes have reported in.", totalNodes.get()) :
|
||||
String.format(" %d nodes reported, minimal %d nodes required.",
|
||||
totalNodes.get(), getMinimumChillModeNodes());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the status of Manual chill Mode flag.
|
||||
*
|
||||
* @return true if forceEnterChillMode has been called, false if
|
||||
* forceExitChillMode or status is not set. eg. clearChillModeFlag.
|
||||
*/
|
||||
@Override
|
||||
public boolean isInManualChillMode() {
|
||||
return inManualChillMode.get();
|
||||
return "Out of chill mode." +
|
||||
String.format(" %d nodes have reported in.", totalNodes.get());
|
||||
}
|
||||
|
||||
/**
|
||||
@ -359,21 +320,39 @@ public class SCMNodeManager
|
||||
public void forceExitChillMode() {
|
||||
if(inStartupChillMode.get()) {
|
||||
LOG.info("Leaving startup chill mode.");
|
||||
inStartupChillMode.getAndSet(false);
|
||||
inStartupChillMode.set(false);
|
||||
}
|
||||
if(inManualChillMode.get()) {
|
||||
LOG.info("Leaving manual chill mode.");
|
||||
inManualChillMode.getAndSet(false);
|
||||
inManualChillMode.set(false);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Forcefully enters chill mode, even if all chill mode conditions are met.
|
||||
* Puts the node manager into manual chill mode.
|
||||
*/
|
||||
@Override
|
||||
public void forceEnterChillMode() {
|
||||
public void enterChillMode() {
|
||||
LOG.info("Entering manual chill mode.");
|
||||
inManualChillMode.getAndSet(true);
|
||||
inManualChillMode.set(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Brings node manager out of manual chill mode.
|
||||
*/
|
||||
@Override
|
||||
public void exitChillMode() {
|
||||
LOG.info("Leaving manual chill mode.");
|
||||
inManualChillMode.set(false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if node manager is out of chill mode, else false.
|
||||
* @return true if out of chill mode, else false
|
||||
*/
|
||||
@Override
|
||||
public boolean isOutOfChillMode() {
|
||||
return !(inStartupChillMode.get() || inManualChillMode.get());
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -291,7 +291,7 @@ public final class MiniOzoneCluster extends MiniDFSCluster
|
||||
public void waitTobeOutOfChillMode() throws TimeoutException,
|
||||
InterruptedException {
|
||||
GenericTestUtils.waitFor(() -> {
|
||||
if (scm.getScmNodeManager().isOutOfNodeChillMode()) {
|
||||
if (scm.getScmNodeManager().isOutOfChillMode()) {
|
||||
return true;
|
||||
}
|
||||
LOG.info("Waiting for cluster to be ready. No datanodes found");
|
||||
|
@ -59,16 +59,6 @@ public class ReplicationNodeManagerMock implements NodeManager {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reports if we have exited out of chill mode by discovering enough nodes.
|
||||
*
|
||||
* @return True if we are out of Node layer chill mode, false otherwise.
|
||||
*/
|
||||
@Override
|
||||
public boolean isOutOfNodeChillMode() {
|
||||
return !nodeStateMap.isEmpty();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a chill mode status string.
|
||||
*
|
||||
@ -79,17 +69,6 @@ public class ReplicationNodeManagerMock implements NodeManager {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the status of manual chill mode flag.
|
||||
*
|
||||
* @return true if forceEnterChillMode has been called, false if
|
||||
* forceExitChillMode or status is not set. eg. clearChillModeFlag.
|
||||
*/
|
||||
@Override
|
||||
public boolean isInManualChillMode() {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of data nodes that in all states.
|
||||
*
|
||||
@ -158,21 +137,30 @@ public class ReplicationNodeManagerMock implements NodeManager {
|
||||
}
|
||||
|
||||
/**
|
||||
* Forcefully enters chill mode, even if all minimum node conditions are met.
|
||||
* Puts the node manager into manual chill mode.
|
||||
*/
|
||||
@Override
|
||||
public void forceEnterChillMode() {
|
||||
public void enterChillMode() {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Clears the manual chill mode flag.
|
||||
* Brings node manager out of manual chill mode.
|
||||
*/
|
||||
@Override
|
||||
public void clearChillModeFlag() {
|
||||
public void exitChillMode() {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if node manager is out of chill mode, else false.
|
||||
* @return true if out of chill mode, else false
|
||||
*/
|
||||
@Override
|
||||
public boolean isOutOfChillMode() {
|
||||
return !nodeStateMap.isEmpty();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the aggregated node stats.
|
||||
*
|
||||
|
@ -108,17 +108,13 @@ public class TestSCMMXBean {
|
||||
minChillNodes.intValue());
|
||||
|
||||
boolean isOutOfChillMode = (boolean)mbs.getAttribute(bean,
|
||||
"OutOfNodeChillMode");
|
||||
assertEquals(scmNm.isOutOfNodeChillMode(), isOutOfChillMode);
|
||||
"OutOfChillMode");
|
||||
assertEquals(scmNm.isOutOfChillMode(), isOutOfChillMode);
|
||||
|
||||
String chillStatus = (String)mbs.getAttribute(bean,
|
||||
"ChillModeStatus");
|
||||
assertEquals(scmNm.getChillModeStatus(), chillStatus);
|
||||
|
||||
boolean inManualChillMode = (boolean)mbs.getAttribute(bean,
|
||||
"InManualChillMode");
|
||||
assertEquals(scmNm.isInManualChillMode(), inManualChillMode);
|
||||
|
||||
TabularData nodeCountObj = (TabularData)mbs.getAttribute(bean,
|
||||
"NodeCount");
|
||||
verifyEquals(nodeCountObj, scm.getScmNodeManager().getNodeCount());
|
||||
|
@ -188,16 +188,6 @@ public class MockNodeManager implements NodeManager {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reports if we have exited out of chill mode by discovering enough nodes.
|
||||
*
|
||||
* @return True if we are out of Node layer chill mode, false otherwise.
|
||||
*/
|
||||
@Override
|
||||
public boolean isOutOfNodeChillMode() {
|
||||
return !chillmode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Chill mode is the period when node manager waits for a minimum configured
|
||||
* number of datanodes to report in. This is called chill mode to indicate the
|
||||
@ -212,21 +202,30 @@ public class MockNodeManager implements NodeManager {
|
||||
}
|
||||
|
||||
/**
|
||||
* Forcefully enters chill mode, even if all minimum node conditions are met.
|
||||
* Puts the node manager into manual chill mode.
|
||||
*/
|
||||
@Override
|
||||
public void forceEnterChillMode() {
|
||||
public void enterChillMode() {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Clears the manual chill mode flag.
|
||||
* Brings node manager out of manual chill mode.
|
||||
*/
|
||||
@Override
|
||||
public void clearChillModeFlag() {
|
||||
public void exitChillMode() {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if node manager is out of chill mode, else false.
|
||||
* @return true if out of chill mode, else false
|
||||
*/
|
||||
@Override
|
||||
public boolean isOutOfChillMode() {
|
||||
return !chillmode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a chill mode status string.
|
||||
*
|
||||
@ -237,17 +236,6 @@ public class MockNodeManager implements NodeManager {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the status of manual chill mode flag.
|
||||
*
|
||||
* @return true if forceEnterChillMode has been called, false if
|
||||
* forceExitChillMode or status is not set. eg. clearChillModeFlag.
|
||||
*/
|
||||
@Override
|
||||
public boolean isInManualChillMode() {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the aggregated node stats.
|
||||
* @return the aggregated node stats.
|
||||
|
@ -86,7 +86,7 @@ public class TestContainerPlacement {
|
||||
SCMNodeManager nodeManager = new SCMNodeManager(config,
|
||||
UUID.randomUUID().toString());
|
||||
assertFalse("Node manager should be in chill mode",
|
||||
nodeManager.isOutOfNodeChillMode());
|
||||
nodeManager.isOutOfChillMode());
|
||||
return nodeManager;
|
||||
}
|
||||
|
||||
@ -150,7 +150,7 @@ public class TestContainerPlacement {
|
||||
assertEquals(remaining * nodeCount,
|
||||
(long) nodeManager.getStats().getRemaining().get());
|
||||
|
||||
assertTrue(nodeManager.isOutOfNodeChillMode());
|
||||
assertTrue(nodeManager.isOutOfChillMode());
|
||||
|
||||
String container1 = UUID.randomUUID().toString();
|
||||
Pipeline pipeline1 = containerManager.allocateContainer(
|
||||
|
@ -121,7 +121,7 @@ public class TestNodeManager {
|
||||
SCMNodeManager nodeManager = new SCMNodeManager(config,
|
||||
UUID.randomUUID().toString());
|
||||
assertFalse("Node manager should be in chill mode",
|
||||
nodeManager.isOutOfNodeChillMode());
|
||||
nodeManager.isOutOfChillMode());
|
||||
return nodeManager;
|
||||
}
|
||||
|
||||
@ -150,7 +150,7 @@ public class TestNodeManager {
|
||||
|
||||
assertTrue("Heartbeat thread should have picked up the" +
|
||||
"scheduled heartbeats and transitioned out of chill mode.",
|
||||
nodeManager.isOutOfNodeChillMode());
|
||||
nodeManager.isOutOfChillMode());
|
||||
}
|
||||
}
|
||||
|
||||
@ -169,7 +169,7 @@ public class TestNodeManager {
|
||||
GenericTestUtils.waitFor(() -> nodeManager.waitForHeartbeatProcessed(),
|
||||
100, 4 * 1000);
|
||||
assertFalse("No heartbeats, Node manager should have been in" +
|
||||
" chill mode.", nodeManager.isOutOfNodeChillMode());
|
||||
" chill mode.", nodeManager.isOutOfChillMode());
|
||||
}
|
||||
}
|
||||
|
||||
@ -191,7 +191,7 @@ public class TestNodeManager {
|
||||
GenericTestUtils.waitFor(() -> nodeManager.waitForHeartbeatProcessed(),
|
||||
100, 4 * 1000);
|
||||
assertFalse("Not enough heartbeat, Node manager should have" +
|
||||
"been in chillmode.", nodeManager.isOutOfNodeChillMode());
|
||||
"been in chillmode.", nodeManager.isOutOfChillMode());
|
||||
}
|
||||
}
|
||||
|
||||
@ -219,7 +219,7 @@ public class TestNodeManager {
|
||||
GenericTestUtils.waitFor(() -> nodeManager.waitForHeartbeatProcessed(),
|
||||
100, 4 * 1000);
|
||||
assertFalse("Not enough nodes have send heartbeat to node" +
|
||||
"manager.", nodeManager.isOutOfNodeChillMode());
|
||||
"manager.", nodeManager.isOutOfChillMode());
|
||||
}
|
||||
}
|
||||
|
||||
@ -899,27 +899,23 @@ public class TestNodeManager {
|
||||
"mode, waiting on nodes to report in."));
|
||||
|
||||
// Should not exit chill mode since 10 nodes have not heartbeat yet.
|
||||
assertFalse(nodeManager.isOutOfNodeChillMode());
|
||||
assertFalse((nodeManager.isInManualChillMode()));
|
||||
assertFalse(nodeManager.isOutOfChillMode());
|
||||
|
||||
// Force exit chill mode.
|
||||
nodeManager.forceExitChillMode();
|
||||
assertTrue(nodeManager.isOutOfNodeChillMode());
|
||||
assertTrue(nodeManager.isOutOfChillMode());
|
||||
status = nodeManager.getChillModeStatus();
|
||||
Assert.assertThat(status,
|
||||
CoreMatchers.containsString("Out of chill mode."));
|
||||
assertFalse((nodeManager.isInManualChillMode()));
|
||||
|
||||
|
||||
// Enter back to into chill mode.
|
||||
nodeManager.forceEnterChillMode();
|
||||
assertFalse(nodeManager.isOutOfNodeChillMode());
|
||||
nodeManager.enterChillMode();
|
||||
assertFalse(nodeManager.isOutOfChillMode());
|
||||
status = nodeManager.getChillModeStatus();
|
||||
Assert.assertThat(status,
|
||||
CoreMatchers.containsString("Out of startup chill mode," +
|
||||
" but in manual chill mode."));
|
||||
assertTrue((nodeManager.isInManualChillMode()));
|
||||
|
||||
|
||||
// Assert that node manager force enter cannot be overridden by nodes HBs.
|
||||
for (int x = 0; x < 20; x++) {
|
||||
@ -928,16 +924,15 @@ public class TestNodeManager {
|
||||
}
|
||||
|
||||
Thread.sleep(500);
|
||||
assertFalse(nodeManager.isOutOfNodeChillMode());
|
||||
assertFalse(nodeManager.isOutOfChillMode());
|
||||
|
||||
// Make sure that once we clear the manual chill mode flag, we fall back
|
||||
// Make sure that once we exit out of manual chill mode, we fall back
|
||||
// to the number of nodes to get out chill mode.
|
||||
nodeManager.clearChillModeFlag();
|
||||
assertTrue(nodeManager.isOutOfNodeChillMode());
|
||||
nodeManager.exitChillMode();
|
||||
assertTrue(nodeManager.isOutOfChillMode());
|
||||
status = nodeManager.getChillModeStatus();
|
||||
Assert.assertThat(status,
|
||||
CoreMatchers.containsString("Out of chill mode."));
|
||||
assertFalse(nodeManager.isInManualChillMode());
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user