HDDS-609. On restart, SCM does not exit chill mode as it expects DNs to report containers in ALLOCATED state. Contributed by Hanisha Koneru.
This commit is contained in:
parent
c0118105b7
commit
74db993a61
@ -29,6 +29,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hdds.HddsConfigKeys;
|
||||
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
|
||||
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ScmOps;
|
||||
import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo;
|
||||
import org.apache.hadoop.hdds.scm.events.SCMEvents;
|
||||
@ -162,7 +163,7 @@ public class SCMChillModeManager implements
|
||||
|
||||
// Required cutoff % for containers with at least 1 reported replica.
|
||||
private double chillModeCutoff;
|
||||
// Containers read from scm db.
|
||||
// Containers read from scm db (excluding containers in ALLOCATED state).
|
||||
private Map<Long, ContainerInfo> containerMap;
|
||||
private double maxContainer;
|
||||
|
||||
@ -174,11 +175,16 @@ public class SCMChillModeManager implements
|
||||
containerMap = new ConcurrentHashMap<>();
|
||||
if(containers != null) {
|
||||
containers.forEach(c -> {
|
||||
if (c != null) {
|
||||
// Containers in ALLOCATED state should not be included while
|
||||
// calculating the total number of containers here. They are not
|
||||
// reported by DNs and hence should not affect the chill mode exit
|
||||
// rule.
|
||||
if (c != null && c.getState() != null &&
|
||||
!c.getState().equals(HddsProtos.LifeCycleState.ALLOCATED)) {
|
||||
containerMap.put(c.getContainerID(), c);
|
||||
}
|
||||
});
|
||||
maxContainer = containers.size();
|
||||
maxContainer = containerMap.size();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -22,6 +22,7 @@ import java.util.List;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hdds.HddsConfigKeys;
|
||||
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
|
||||
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
|
||||
import org.apache.hadoop.hdds.scm.HddsTestUtils;
|
||||
import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerInfo;
|
||||
import org.apache.hadoop.hdds.scm.events.SCMEvents;
|
||||
@ -70,6 +71,11 @@ public class TestSCMChillModeManager {
|
||||
private void testChillMode(int numContainers) throws Exception {
|
||||
containers = new ArrayList<>();
|
||||
containers.addAll(HddsTestUtils.getContainerInfo(numContainers));
|
||||
// Assign open state to containers to be included in the chill mode
|
||||
// container list
|
||||
for (ContainerInfo container : containers) {
|
||||
container.setState(HddsProtos.LifeCycleState.OPEN);
|
||||
}
|
||||
scmChillModeManager = new SCMChillModeManager(config, containers, queue);
|
||||
queue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
||||
scmChillModeManager);
|
||||
@ -85,6 +91,11 @@ public class TestSCMChillModeManager {
|
||||
public void testChillModeExitRule() throws Exception {
|
||||
containers = new ArrayList<>();
|
||||
containers.addAll(HddsTestUtils.getContainerInfo(25 * 4));
|
||||
// Assign open state to containers to be included in the chill mode
|
||||
// container list
|
||||
for (ContainerInfo container : containers) {
|
||||
container.setState(HddsProtos.LifeCycleState.OPEN);
|
||||
}
|
||||
scmChillModeManager = new SCMChillModeManager(config, containers, queue);
|
||||
queue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
||||
scmChillModeManager);
|
||||
@ -119,6 +130,46 @@ public class TestSCMChillModeManager {
|
||||
testChillModeDataNodes(5);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check that containers in Allocated state are not considered while
|
||||
* computing percentage of containers with at least 1 reported replica in
|
||||
* chill mode exit rule.
|
||||
*/
|
||||
@Test
|
||||
public void testContainerChillModeRule() throws Exception {
|
||||
containers = new ArrayList<>();
|
||||
// Add 100 containers to the list of containers in SCM
|
||||
containers.addAll(HddsTestUtils.getContainerInfo(25 * 4));
|
||||
// Assign OPEN state to first 25 containers and ALLLOCATED state to rest
|
||||
// of the containers
|
||||
for (ContainerInfo container : containers.subList(0, 25)) {
|
||||
container.setState(HddsProtos.LifeCycleState.OPEN);
|
||||
}
|
||||
for (ContainerInfo container : containers.subList(25, 100)) {
|
||||
container.setState(HddsProtos.LifeCycleState.ALLOCATED);
|
||||
}
|
||||
|
||||
scmChillModeManager = new SCMChillModeManager(config, containers, queue);
|
||||
queue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
||||
scmChillModeManager);
|
||||
assertTrue(scmChillModeManager.getInChillMode());
|
||||
|
||||
// When 10 OPEN containers are reported by DNs, the computed container
|
||||
// threshold should be 10/25 as there are only 25 open containers.
|
||||
// Containers in ALLOCATED state should not contribute towards list of
|
||||
// containers while calculating container threshold in SCMChillNodeManager
|
||||
testContainerThreshold(containers.subList(0, 10), 0.4);
|
||||
assertTrue(scmChillModeManager.getInChillMode());
|
||||
|
||||
// When remaining 15 OPEN containers are reported by DNs, the container
|
||||
// threshold should be (10+15)/25.
|
||||
testContainerThreshold(containers.subList(10, 25), 1.0);
|
||||
|
||||
GenericTestUtils.waitFor(() -> {
|
||||
return !scmChillModeManager.getInChillMode();
|
||||
}, 100, 1000 * 5);
|
||||
}
|
||||
|
||||
private void testChillModeDataNodes(int numOfDns) throws Exception {
|
||||
OzoneConfiguration conf = new OzoneConfiguration(config);
|
||||
conf.setInt(HddsConfigKeys.HDDS_SCM_CHILLMODE_MIN_DATANODE, numOfDns);
|
||||
@ -146,7 +197,7 @@ public class TestSCMChillModeManager {
|
||||
queue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
||||
HddsTestUtils.createNodeRegistrationContainerReport(containers));
|
||||
GenericTestUtils.waitFor(() -> {
|
||||
return scmChillModeManager.getInChillMode();
|
||||
return !scmChillModeManager.getInChillMode();
|
||||
}, 10, 1000 * 10);
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user