HDDS-642. Add chill mode exit condition for pipeline availability. Contributed by Yiqun Lin.
This commit is contained in:
parent
efc4d91cbe
commit
b71cc7f33e
@ -87,6 +87,11 @@ private HddsConfigKeys() {
|
||||
"hdds.scm.chillmode.min.datanode";
|
||||
public static final int HDDS_SCM_CHILLMODE_MIN_DATANODE_DEFAULT = 1;
|
||||
|
||||
public static final String HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK =
|
||||
"hdds.scm.chillmode.pipeline-availability.check";
|
||||
public static final boolean
|
||||
HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK_DEFAULT = false;
|
||||
|
||||
// % of containers which should have at least one reported replica
|
||||
// before SCM comes out of chill mode.
|
||||
public static final String HDDS_SCM_CHILLMODE_THRESHOLD_PCT =
|
||||
|
@ -1231,6 +1231,15 @@
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hdds.scm.chillmode.pipeline-availability.check</name>
|
||||
<value>false</value>
|
||||
<tag>HDDS,SCM,OPERATION</tag>
|
||||
<description>
|
||||
Boolean value to enable pipeline availability check during SCM chill mode.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hdds.container.action.max.limit</name>
|
||||
<value>20</value>
|
||||
|
@ -0,0 +1,108 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hdds.scm.chillmode;
|
||||
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReport;
|
||||
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReportsProto;
|
||||
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
|
||||
import org.apache.hadoop.hdds.scm.pipeline.PipelineID;
|
||||
import org.apache.hadoop.hdds.scm.pipeline.PipelineManager;
|
||||
import org.apache.hadoop.hdds.scm.pipeline.PipelineNotFoundException;
|
||||
import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher.PipelineReportFromDatanode;
|
||||
import org.apache.hadoop.hdds.server.events.EventHandler;
|
||||
import org.apache.hadoop.hdds.server.events.EventPublisher;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
|
||||
/**
|
||||
* Class defining Chill mode exit criteria for Pipelines.
|
||||
*/
|
||||
public class PipelineChillModeRule
|
||||
implements ChillModeExitRule<PipelineReportFromDatanode>,
|
||||
EventHandler<PipelineReportFromDatanode> {
|
||||
/** Pipeline availability.*/
|
||||
private AtomicBoolean isPipelineAvailable = new AtomicBoolean(false);
|
||||
|
||||
private final PipelineManager pipelineManager;
|
||||
private final SCMChillModeManager chillModeManager;
|
||||
|
||||
PipelineChillModeRule(PipelineManager pipelineManager,
|
||||
SCMChillModeManager manager) {
|
||||
this.pipelineManager = pipelineManager;
|
||||
this.chillModeManager = manager;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean validate() {
|
||||
return isPipelineAvailable.get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void process(PipelineReportFromDatanode report) {
|
||||
// No need to deal with
|
||||
}
|
||||
|
||||
@Override
|
||||
public void cleanup() {
|
||||
// No need to deal with
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onMessage(PipelineReportFromDatanode pipelineReportFromDatanode,
|
||||
EventPublisher publisher) {
|
||||
// If we are already in pipeline available state,
|
||||
// skipping following check.
|
||||
if (validate()) {
|
||||
chillModeManager.validateChillModeExitRules(publisher);
|
||||
return;
|
||||
}
|
||||
|
||||
Pipeline pipeline;
|
||||
Preconditions.checkNotNull(pipelineReportFromDatanode);
|
||||
PipelineReportsProto pipelineReport = pipelineReportFromDatanode
|
||||
.getReport();
|
||||
|
||||
for (PipelineReport report : pipelineReport.getPipelineReportList()) {
|
||||
PipelineID pipelineID = PipelineID
|
||||
.getFromProtobuf(report.getPipelineID());
|
||||
try {
|
||||
pipeline = pipelineManager.getPipeline(pipelineID);
|
||||
} catch (PipelineNotFoundException e) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (pipeline.getPipelineState() == Pipeline.PipelineState.OPEN) {
|
||||
// ensure there is an OPEN state pipeline and then allowed
|
||||
// to exit chill mode
|
||||
isPipelineAvailable.set(true);
|
||||
|
||||
if (chillModeManager.getInChillMode()) {
|
||||
SCMChillModeManager.getLogger()
|
||||
.info("SCM in chill mode. 1 Pipeline reported, 1 required.");
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (validate()) {
|
||||
chillModeManager.validateChillModeExitRules(publisher);
|
||||
}
|
||||
}
|
||||
}
|
@ -26,6 +26,7 @@
|
||||
import org.apache.hadoop.hdds.HddsConfigKeys;
|
||||
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
|
||||
import org.apache.hadoop.hdds.scm.events.SCMEvents;
|
||||
import org.apache.hadoop.hdds.scm.pipeline.PipelineManager;
|
||||
import org.apache.hadoop.hdds.scm.server.SCMDatanodeProtocolServer
|
||||
.NodeRegistrationContainerReport;
|
||||
import org.apache.hadoop.hdds.server.events.EventHandler;
|
||||
@ -53,14 +54,18 @@ public class SCMChillModeManager implements
|
||||
LoggerFactory.getLogger(SCMChillModeManager.class);
|
||||
private final boolean isChillModeEnabled;
|
||||
private AtomicBoolean inChillMode = new AtomicBoolean(true);
|
||||
|
||||
private Map<String, ChillModeExitRule> exitRules = new HashMap(1);
|
||||
private Configuration config;
|
||||
private static final String CONT_EXIT_RULE = "ContainerChillModeRule";
|
||||
private static final String DN_EXIT_RULE = "DataNodeChillModeRule";
|
||||
private static final String PIPELINE_EXIT_RULE = "PipelineChillModeRule";
|
||||
|
||||
private final EventQueue eventPublisher;
|
||||
|
||||
public SCMChillModeManager(Configuration conf,
|
||||
List<ContainerInfo> allContainers, EventQueue eventQueue) {
|
||||
List<ContainerInfo> allContainers, PipelineManager pipelineManager,
|
||||
EventQueue eventQueue) {
|
||||
this.config = conf;
|
||||
this.eventPublisher = eventQueue;
|
||||
this.isChillModeEnabled = conf.getBoolean(
|
||||
@ -70,6 +75,16 @@ public SCMChillModeManager(Configuration conf,
|
||||
exitRules.put(CONT_EXIT_RULE,
|
||||
new ContainerChillModeRule(config, allContainers, this));
|
||||
exitRules.put(DN_EXIT_RULE, new DataNodeChillModeRule(config, this));
|
||||
|
||||
if (conf.getBoolean(
|
||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK,
|
||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK_DEFAULT)
|
||||
&& pipelineManager != null) {
|
||||
PipelineChillModeRule rule = new PipelineChillModeRule(pipelineManager,
|
||||
this);
|
||||
exitRules.put(PIPELINE_EXIT_RULE, rule);
|
||||
eventPublisher.addHandler(SCMEvents.PIPELINE_REPORT, rule);
|
||||
}
|
||||
emitChillModeStatus();
|
||||
} else {
|
||||
exitChillMode(eventQueue);
|
||||
@ -84,7 +99,7 @@ public void emitChillModeStatus() {
|
||||
eventPublisher.fireEvent(SCMEvents.CHILL_MODE_STATUS, getInChillMode());
|
||||
}
|
||||
|
||||
private void validateChillModeExitRules(EventPublisher eventQueue) {
|
||||
public void validateChillModeExitRules(EventPublisher eventQueue) {
|
||||
for (ChillModeExitRule exitRule : exitRules.values()) {
|
||||
if (!exitRule.validate()) {
|
||||
return;
|
||||
|
@ -217,7 +217,7 @@ private StorageContainerManager(OzoneConfiguration conf) throws IOException {
|
||||
NodeReportHandler nodeReportHandler =
|
||||
new NodeReportHandler(scmNodeManager);
|
||||
PipelineReportHandler pipelineReportHandler =
|
||||
new PipelineReportHandler(pipelineManager, conf);
|
||||
new PipelineReportHandler(pipelineManager, conf);
|
||||
CommandStatusReportHandler cmdStatusReportHandler =
|
||||
new CommandStatusReportHandler();
|
||||
|
||||
@ -302,8 +302,7 @@ private StorageContainerManager(OzoneConfiguration conf) throws IOException {
|
||||
eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS,
|
||||
(BlockManagerImpl) scmBlockManager);
|
||||
scmChillModeManager = new SCMChillModeManager(conf,
|
||||
containerManager.getContainers(),
|
||||
eventQueue);
|
||||
containerManager.getContainers(), pipelineManager, eventQueue);
|
||||
|
||||
eventQueue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
||||
scmChillModeManager);
|
||||
|
@ -17,15 +17,29 @@
|
||||
*/
|
||||
package org.apache.hadoop.hdds.scm.chillmode;
|
||||
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileUtil;
|
||||
import org.apache.hadoop.hdds.HddsConfigKeys;
|
||||
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
|
||||
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
|
||||
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReport;
|
||||
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReportsProto;
|
||||
import org.apache.hadoop.hdds.scm.HddsTestUtils;
|
||||
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
|
||||
import org.apache.hadoop.hdds.scm.container.MockNodeManager;
|
||||
import org.apache.hadoop.hdds.scm.events.SCMEvents;
|
||||
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
|
||||
import org.apache.hadoop.hdds.scm.pipeline.PipelineManager;
|
||||
import org.apache.hadoop.hdds.scm.pipeline.SCMPipelineManager;
|
||||
import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher.PipelineReportFromDatanode;
|
||||
import org.apache.hadoop.hdds.server.events.EventQueue;
|
||||
import org.apache.hadoop.test.GenericTestUtils;
|
||||
import org.junit.BeforeClass;
|
||||
@ -33,9 +47,6 @@
|
||||
import org.junit.Test;
|
||||
import org.junit.rules.Timeout;
|
||||
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
|
||||
/** Test class for SCMChillModeManager.
|
||||
*/
|
||||
public class TestSCMChillModeManager {
|
||||
@ -65,7 +76,7 @@ public void testChillModeState() throws Exception {
|
||||
|
||||
@Test
|
||||
public void testChillModeStateWithNullContainers() {
|
||||
new SCMChillModeManager(config, null, queue);
|
||||
new SCMChillModeManager(config, null, null, queue);
|
||||
}
|
||||
|
||||
private void testChillMode(int numContainers) throws Exception {
|
||||
@ -76,7 +87,8 @@ private void testChillMode(int numContainers) throws Exception {
|
||||
for (ContainerInfo container : containers) {
|
||||
container.setState(HddsProtos.LifeCycleState.OPEN);
|
||||
}
|
||||
scmChillModeManager = new SCMChillModeManager(config, containers, queue);
|
||||
scmChillModeManager = new SCMChillModeManager(
|
||||
config, containers, null, queue);
|
||||
queue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
||||
scmChillModeManager);
|
||||
assertTrue(scmChillModeManager.getInChillMode());
|
||||
@ -96,7 +108,8 @@ public void testChillModeExitRule() throws Exception {
|
||||
for (ContainerInfo container : containers) {
|
||||
container.setState(HddsProtos.LifeCycleState.CLOSED);
|
||||
}
|
||||
scmChillModeManager = new SCMChillModeManager(config, containers, queue);
|
||||
scmChillModeManager = new SCMChillModeManager(
|
||||
config, containers, null, queue);
|
||||
queue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
||||
scmChillModeManager);
|
||||
assertTrue(scmChillModeManager.getInChillMode());
|
||||
@ -118,7 +131,8 @@ public void testChillModeExitRule() throws Exception {
|
||||
public void testDisableChillMode() {
|
||||
OzoneConfiguration conf = new OzoneConfiguration(config);
|
||||
conf.setBoolean(HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED, false);
|
||||
scmChillModeManager = new SCMChillModeManager(conf, containers, queue);
|
||||
scmChillModeManager = new SCMChillModeManager(
|
||||
conf, containers, null, queue);
|
||||
assertFalse(scmChillModeManager.getInChillMode());
|
||||
}
|
||||
|
||||
@ -149,7 +163,8 @@ public void testContainerChillModeRule() throws Exception {
|
||||
container.setState(HddsProtos.LifeCycleState.OPEN);
|
||||
}
|
||||
|
||||
scmChillModeManager = new SCMChillModeManager(config, containers, queue);
|
||||
scmChillModeManager = new SCMChillModeManager(
|
||||
config, containers, null, queue);
|
||||
queue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
||||
scmChillModeManager);
|
||||
assertTrue(scmChillModeManager.getInChillMode());
|
||||
@ -173,7 +188,8 @@ public void testContainerChillModeRule() throws Exception {
|
||||
private void testChillModeDataNodes(int numOfDns) throws Exception {
|
||||
OzoneConfiguration conf = new OzoneConfiguration(config);
|
||||
conf.setInt(HddsConfigKeys.HDDS_SCM_CHILLMODE_MIN_DATANODE, numOfDns);
|
||||
scmChillModeManager = new SCMChillModeManager(conf, containers, queue);
|
||||
scmChillModeManager = new SCMChillModeManager(
|
||||
conf, containers, null, queue);
|
||||
queue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
||||
scmChillModeManager);
|
||||
// Assert SCM is in Chill mode.
|
||||
@ -212,4 +228,51 @@ private void testContainerThreshold(List<ContainerInfo> dnContainers,
|
||||
}, 100, 2000 * 9);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testChillModePipelineExitRule() throws Exception {
|
||||
containers = new ArrayList<>();
|
||||
containers.addAll(HddsTestUtils.getContainerInfo(25 * 4));
|
||||
String storageDir = GenericTestUtils.getTempPath(
|
||||
TestSCMChillModeManager.class.getName() + UUID.randomUUID());
|
||||
try{
|
||||
MockNodeManager nodeManager = new MockNodeManager(true, 1);
|
||||
config.set(HddsConfigKeys.OZONE_METADATA_DIRS, storageDir);
|
||||
// enable pipeline check
|
||||
config.setBoolean(
|
||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK, true);
|
||||
|
||||
PipelineManager pipelineManager = new SCMPipelineManager(config,
|
||||
nodeManager, queue);
|
||||
scmChillModeManager = new SCMChillModeManager(
|
||||
config, containers, pipelineManager, queue);
|
||||
queue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
||||
scmChillModeManager);
|
||||
|
||||
queue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
||||
HddsTestUtils.createNodeRegistrationContainerReport(containers));
|
||||
assertTrue(scmChillModeManager.getInChillMode());
|
||||
|
||||
// simulation a pipeline report to trigger the rule check
|
||||
Pipeline pipeline = pipelineManager.createPipeline(
|
||||
HddsProtos.ReplicationType.STAND_ALONE,
|
||||
HddsProtos.ReplicationFactor.ONE);
|
||||
PipelineReportsProto.Builder reportBuilder = PipelineReportsProto
|
||||
.newBuilder();
|
||||
reportBuilder.addPipelineReport(PipelineReport.newBuilder()
|
||||
.setPipelineID(pipeline.getId().getProtobuf()));
|
||||
|
||||
queue.fireEvent(SCMEvents.PIPELINE_REPORT, new PipelineReportFromDatanode(
|
||||
pipeline.getNodes().get(0), reportBuilder.build()));
|
||||
|
||||
GenericTestUtils.waitFor(() -> {
|
||||
return !scmChillModeManager.getInChillMode();
|
||||
}, 100, 1000 * 10);
|
||||
pipelineManager.close();
|
||||
} finally {
|
||||
config.setBoolean(
|
||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK,
|
||||
false);
|
||||
FileUtil.fullyDelete(new File(storageDir));
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user