diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java
index 856d1136fa..13b3bb72ac 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java
@@ -83,6 +83,9 @@ private HddsConfigKeys() {
public static final String HDDS_SCM_CHILLMODE_ENABLED =
"hdds.scm.chillmode.enabled";
public static final boolean HDDS_SCM_CHILLMODE_ENABLED_DEFAULT = true;
+ public static final String HDDS_SCM_CHILLMODE_MIN_DATANODE =
+ "hdds.scm.chillmode.min.datanode";
+ public static final int HDDS_SCM_CHILLMODE_MIN_DATANODE_DEFAULT = 1;
// % of containers which should have at least one reported replica
// before SCM comes out of chill mode.
diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml
index b7c967d0ae..d7cbd75b6b 100644
--- a/hadoop-hdds/common/src/main/resources/ozone-default.xml
+++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml
@@ -1164,6 +1164,15 @@
+
+ hdds.scm.chillmode.min.datanode
+ 1
+ HDDS,SCM,OPERATION
+ Minimum DataNodes which should be registered to get SCM out of
+ chill mode.
+
+
+
hdds.container.action.max.limit
20
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMChillModeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMChillModeManager.java
index 3c1cc8ff3d..c11a60f6c5 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMChillModeManager.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMChillModeManager.java
@@ -20,8 +20,10 @@
import com.google.common.annotations.VisibleForTesting;
import java.util.EnumSet;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
+import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
@@ -60,14 +62,16 @@ public class SCMChillModeManager implements
private Map exitRules = new HashMap(1);
private Configuration config;
private static final String CONT_EXIT_RULE = "ContainerChillModeRule";
+ private static final String DN_EXIT_RULE = "DataNodeChillModeRule";
private final EventQueue eventPublisher;
SCMChillModeManager(Configuration conf, List allContainers,
EventQueue eventQueue) {
this.config = conf;
this.eventPublisher = eventQueue;
- exitRules
- .put(CONT_EXIT_RULE, new ContainerChillModeRule(config, allContainers));
+ exitRules.put(CONT_EXIT_RULE,
+ new ContainerChillModeRule(config, allContainers));
+ exitRules.put(DN_EXIT_RULE, new DataNodeChillModeRule(config));
if (!conf.getBoolean(HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED,
HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED_DEFAULT)) {
exitChillMode(eventQueue);
@@ -120,6 +124,7 @@ public void onMessage(
EventPublisher publisher) {
if (getInChillMode()) {
exitRules.get(CONT_EXIT_RULE).process(nodeRegistrationContainerReport);
+ exitRules.get(DN_EXIT_RULE).process(nodeRegistrationContainerReport);
validateChillModeExitRules(publisher);
}
}
@@ -187,6 +192,9 @@ public boolean validate() {
@VisibleForTesting
public double getCurrentContainerThreshold() {
+ if (maxContainer == 0) {
+ return 1;
+ }
return (containerWithMinReplicas.doubleValue() / maxContainer);
}
@@ -217,6 +225,57 @@ public void cleanup() {
}
}
+ /**
+ * Class defining Chill mode exit criteria according to number of DataNodes
+ * registered with SCM.
+ */
+ public class DataNodeChillModeRule implements
+ ChillModeExitRule {
+
+ // Min DataNodes required to exit chill mode.
+ private int requiredDns;
+ private int registeredDns = 0;
+ // Set to track registered DataNodes.
+ private HashSet registeredDnSet;
+
+ public DataNodeChillModeRule(Configuration conf) {
+ requiredDns = conf
+ .getInt(HddsConfigKeys.HDDS_SCM_CHILLMODE_MIN_DATANODE,
+ HddsConfigKeys.HDDS_SCM_CHILLMODE_MIN_DATANODE_DEFAULT);
+ registeredDnSet = new HashSet<>(requiredDns * 2);
+ }
+
+ @Override
+ public boolean validate() {
+ return registeredDns >= requiredDns;
+ }
+
+ @VisibleForTesting
+ public double getRegisteredDataNodes() {
+ return registeredDns;
+ }
+
+ @Override
+ public void process(NodeRegistrationContainerReport reportsProto) {
+ if (requiredDns == 0) {
+ // No dn check required.
+ return;
+ }
+
+ if(inChillMode.get()) {
+ registeredDnSet.add(reportsProto.getDatanodeDetails().getUuid());
+ registeredDns = registeredDnSet.size();
+ LOG.info("SCM in chill mode. {} DataNodes registered, {} required.",
+ registeredDns, requiredDns);
+ }
+ }
+
+ @Override
+ public void cleanup() {
+ registeredDnSet.clear();
+ }
+ }
+
@VisibleForTesting
public static Logger getLogger() {
return LOG;
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMChillModeManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMChillModeManager.java
index 486c604cd0..53d76e64bb 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMChillModeManager.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMChillModeManager.java
@@ -45,7 +45,7 @@ public class TestSCMChillModeManager {
private List containers;
@Rule
- public Timeout timeout = new Timeout(1000 * 20);
+ public Timeout timeout = new Timeout(1000 * 35);
@BeforeClass
public static void setUp() {
@@ -111,6 +111,45 @@ public void testDisableChillMode() {
assertFalse(scmChillModeManager.getInChillMode());
}
+ @Test
+ public void testChillModeDataNodeExitRule() throws Exception {
+ containers = new ArrayList<>();
+ testChillModeDataNodes(0);
+ testChillModeDataNodes(3);
+ testChillModeDataNodes(5);
+ }
+
+ private void testChillModeDataNodes(int numOfDns) throws Exception {
+ OzoneConfiguration conf = new OzoneConfiguration(config);
+ conf.setInt(HddsConfigKeys.HDDS_SCM_CHILLMODE_MIN_DATANODE, numOfDns);
+ scmChillModeManager = new SCMChillModeManager(conf, containers, queue);
+ queue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
+ scmChillModeManager);
+ // Assert SCM is in Chill mode.
+ assertTrue(scmChillModeManager.getInChillMode());
+
+ // Register all DataNodes except last one and assert SCM is in chill mode.
+ for (int i = 0; i < numOfDns-1; i++) {
+ queue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
+ HddsTestUtils.createNodeRegistrationContainerReport(containers));
+ assertTrue(scmChillModeManager.getInChillMode());
+ assertTrue(scmChillModeManager.getCurrentContainerThreshold() == 1);
+ }
+
+ if(numOfDns == 0){
+ GenericTestUtils.waitFor(() -> {
+ return scmChillModeManager.getInChillMode();
+ }, 10, 1000 * 10);
+ return;
+ }
+ // Register last DataNode and check that SCM is out of Chill mode.
+ queue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
+ HddsTestUtils.createNodeRegistrationContainerReport(containers));
+ GenericTestUtils.waitFor(() -> {
+ return scmChillModeManager.getInChillMode();
+ }, 10, 1000 * 10);
+ }
+
private void testContainerThreshold(List dnContainers,
double expectedThreshold)
throws Exception {