diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java index 2d28a5bb05..f16503e408 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java @@ -87,6 +87,11 @@ private HddsConfigKeys() { "hdds.scm.chillmode.min.datanode"; public static final int HDDS_SCM_CHILLMODE_MIN_DATANODE_DEFAULT = 1; + public static final String HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK = + "hdds.scm.chillmode.pipeline-availability.check"; + public static final boolean + HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK_DEFAULT = false; + // % of containers which should have at least one reported replica // before SCM comes out of chill mode. public static final String HDDS_SCM_CHILLMODE_THRESHOLD_PCT = diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index 9f3d7e15de..aa22b2b518 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -1231,6 +1231,15 @@ + + hdds.scm.chillmode.pipeline-availability.check + false + HDDS,SCM,OPERATION + + Boolean value to enable pipeline availability check during SCM chill mode. + + + hdds.container.action.max.limit 20 diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/chillmode/PipelineChillModeRule.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/chillmode/PipelineChillModeRule.java new file mode 100644 index 0000000000..f9a6e59ba2 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/chillmode/PipelineChillModeRule.java @@ -0,0 +1,108 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.scm.chillmode; + +import java.util.concurrent.atomic.AtomicBoolean; + +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReport; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReportsProto; +import org.apache.hadoop.hdds.scm.pipeline.Pipeline; +import org.apache.hadoop.hdds.scm.pipeline.PipelineID; +import org.apache.hadoop.hdds.scm.pipeline.PipelineManager; +import org.apache.hadoop.hdds.scm.pipeline.PipelineNotFoundException; +import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher.PipelineReportFromDatanode; +import org.apache.hadoop.hdds.server.events.EventHandler; +import org.apache.hadoop.hdds.server.events.EventPublisher; + +import com.google.common.base.Preconditions; + +/** + * Class defining Chill mode exit criteria for Pipelines. + */ +public class PipelineChillModeRule + implements ChillModeExitRule, + EventHandler { + /** Pipeline availability.*/ + private AtomicBoolean isPipelineAvailable = new AtomicBoolean(false); + + private final PipelineManager pipelineManager; + private final SCMChillModeManager chillModeManager; + + PipelineChillModeRule(PipelineManager pipelineManager, + SCMChillModeManager manager) { + this.pipelineManager = pipelineManager; + this.chillModeManager = manager; + } + + @Override + public boolean validate() { + return isPipelineAvailable.get(); + } + + @Override + public void process(PipelineReportFromDatanode report) { + // No need to deal with + } + + @Override + public void cleanup() { + // No need to deal with + } + + @Override + public void onMessage(PipelineReportFromDatanode pipelineReportFromDatanode, + EventPublisher publisher) { + // If we are already in pipeline available state, + // skipping following check. + if (validate()) { + chillModeManager.validateChillModeExitRules(publisher); + return; + } + + Pipeline pipeline; + Preconditions.checkNotNull(pipelineReportFromDatanode); + PipelineReportsProto pipelineReport = pipelineReportFromDatanode + .getReport(); + + for (PipelineReport report : pipelineReport.getPipelineReportList()) { + PipelineID pipelineID = PipelineID + .getFromProtobuf(report.getPipelineID()); + try { + pipeline = pipelineManager.getPipeline(pipelineID); + } catch (PipelineNotFoundException e) { + continue; + } + + if (pipeline.getPipelineState() == Pipeline.PipelineState.OPEN) { + // ensure there is an OPEN state pipeline and then allowed + // to exit chill mode + isPipelineAvailable.set(true); + + if (chillModeManager.getInChillMode()) { + SCMChillModeManager.getLogger() + .info("SCM in chill mode. 1 Pipeline reported, 1 required."); + } + break; + } + } + + if (validate()) { + chillModeManager.validateChillModeExitRules(publisher); + } + } +} \ No newline at end of file diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/chillmode/SCMChillModeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/chillmode/SCMChillModeManager.java index fa0eaa9052..09b998dc97 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/chillmode/SCMChillModeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/chillmode/SCMChillModeManager.java @@ -26,6 +26,7 @@ import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.events.SCMEvents; +import org.apache.hadoop.hdds.scm.pipeline.PipelineManager; import org.apache.hadoop.hdds.scm.server.SCMDatanodeProtocolServer .NodeRegistrationContainerReport; import org.apache.hadoop.hdds.server.events.EventHandler; @@ -53,14 +54,18 @@ public class SCMChillModeManager implements LoggerFactory.getLogger(SCMChillModeManager.class); private final boolean isChillModeEnabled; private AtomicBoolean inChillMode = new AtomicBoolean(true); + private Map exitRules = new HashMap(1); private Configuration config; private static final String CONT_EXIT_RULE = "ContainerChillModeRule"; private static final String DN_EXIT_RULE = "DataNodeChillModeRule"; + private static final String PIPELINE_EXIT_RULE = "PipelineChillModeRule"; + private final EventQueue eventPublisher; public SCMChillModeManager(Configuration conf, - List allContainers, EventQueue eventQueue) { + List allContainers, PipelineManager pipelineManager, + EventQueue eventQueue) { this.config = conf; this.eventPublisher = eventQueue; this.isChillModeEnabled = conf.getBoolean( @@ -70,6 +75,16 @@ public SCMChillModeManager(Configuration conf, exitRules.put(CONT_EXIT_RULE, new ContainerChillModeRule(config, allContainers, this)); exitRules.put(DN_EXIT_RULE, new DataNodeChillModeRule(config, this)); + + if (conf.getBoolean( + HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK, + HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK_DEFAULT) + && pipelineManager != null) { + PipelineChillModeRule rule = new PipelineChillModeRule(pipelineManager, + this); + exitRules.put(PIPELINE_EXIT_RULE, rule); + eventPublisher.addHandler(SCMEvents.PIPELINE_REPORT, rule); + } emitChillModeStatus(); } else { exitChillMode(eventQueue); @@ -84,7 +99,7 @@ public void emitChillModeStatus() { eventPublisher.fireEvent(SCMEvents.CHILL_MODE_STATUS, getInChillMode()); } - private void validateChillModeExitRules(EventPublisher eventQueue) { + public void validateChillModeExitRules(EventPublisher eventQueue) { for (ChillModeExitRule exitRule : exitRules.values()) { if (!exitRule.validate()) { return; diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java index fe00b548cd..2d27984f28 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java @@ -217,7 +217,7 @@ private StorageContainerManager(OzoneConfiguration conf) throws IOException { NodeReportHandler nodeReportHandler = new NodeReportHandler(scmNodeManager); PipelineReportHandler pipelineReportHandler = - new PipelineReportHandler(pipelineManager, conf); + new PipelineReportHandler(pipelineManager, conf); CommandStatusReportHandler cmdStatusReportHandler = new CommandStatusReportHandler(); @@ -302,8 +302,7 @@ private StorageContainerManager(OzoneConfiguration conf) throws IOException { eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS, (BlockManagerImpl) scmBlockManager); scmChillModeManager = new SCMChillModeManager(conf, - containerManager.getContainers(), - eventQueue); + containerManager.getContainers(), pipelineManager, eventQueue); eventQueue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT, scmChillModeManager); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/chillmode/TestSCMChillModeManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/chillmode/TestSCMChillModeManager.java index 0487fb7ea7..ed17796baf 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/chillmode/TestSCMChillModeManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/chillmode/TestSCMChillModeManager.java @@ -17,15 +17,29 @@ */ package org.apache.hadoop.hdds.scm.chillmode; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.File; import java.util.ArrayList; import java.util.List; +import java.util.UUID; + import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReport; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReportsProto; import org.apache.hadoop.hdds.scm.HddsTestUtils; import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.MockNodeManager; import org.apache.hadoop.hdds.scm.events.SCMEvents; +import org.apache.hadoop.hdds.scm.pipeline.Pipeline; +import org.apache.hadoop.hdds.scm.pipeline.PipelineManager; +import org.apache.hadoop.hdds.scm.pipeline.SCMPipelineManager; +import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher.PipelineReportFromDatanode; import org.apache.hadoop.hdds.server.events.EventQueue; import org.apache.hadoop.test.GenericTestUtils; import org.junit.BeforeClass; @@ -33,9 +47,6 @@ import org.junit.Test; import org.junit.rules.Timeout; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.assertFalse; - /** Test class for SCMChillModeManager. */ public class TestSCMChillModeManager { @@ -65,7 +76,7 @@ public void testChillModeState() throws Exception { @Test public void testChillModeStateWithNullContainers() { - new SCMChillModeManager(config, null, queue); + new SCMChillModeManager(config, null, null, queue); } private void testChillMode(int numContainers) throws Exception { @@ -76,7 +87,8 @@ private void testChillMode(int numContainers) throws Exception { for (ContainerInfo container : containers) { container.setState(HddsProtos.LifeCycleState.OPEN); } - scmChillModeManager = new SCMChillModeManager(config, containers, queue); + scmChillModeManager = new SCMChillModeManager( + config, containers, null, queue); queue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT, scmChillModeManager); assertTrue(scmChillModeManager.getInChillMode()); @@ -96,7 +108,8 @@ public void testChillModeExitRule() throws Exception { for (ContainerInfo container : containers) { container.setState(HddsProtos.LifeCycleState.CLOSED); } - scmChillModeManager = new SCMChillModeManager(config, containers, queue); + scmChillModeManager = new SCMChillModeManager( + config, containers, null, queue); queue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT, scmChillModeManager); assertTrue(scmChillModeManager.getInChillMode()); @@ -118,7 +131,8 @@ public void testChillModeExitRule() throws Exception { public void testDisableChillMode() { OzoneConfiguration conf = new OzoneConfiguration(config); conf.setBoolean(HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED, false); - scmChillModeManager = new SCMChillModeManager(conf, containers, queue); + scmChillModeManager = new SCMChillModeManager( + conf, containers, null, queue); assertFalse(scmChillModeManager.getInChillMode()); } @@ -149,7 +163,8 @@ public void testContainerChillModeRule() throws Exception { container.setState(HddsProtos.LifeCycleState.OPEN); } - scmChillModeManager = new SCMChillModeManager(config, containers, queue); + scmChillModeManager = new SCMChillModeManager( + config, containers, null, queue); queue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT, scmChillModeManager); assertTrue(scmChillModeManager.getInChillMode()); @@ -173,7 +188,8 @@ public void testContainerChillModeRule() throws Exception { private void testChillModeDataNodes(int numOfDns) throws Exception { OzoneConfiguration conf = new OzoneConfiguration(config); conf.setInt(HddsConfigKeys.HDDS_SCM_CHILLMODE_MIN_DATANODE, numOfDns); - scmChillModeManager = new SCMChillModeManager(conf, containers, queue); + scmChillModeManager = new SCMChillModeManager( + conf, containers, null, queue); queue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT, scmChillModeManager); // Assert SCM is in Chill mode. @@ -212,4 +228,51 @@ private void testContainerThreshold(List dnContainers, }, 100, 2000 * 9); } + @Test + public void testChillModePipelineExitRule() throws Exception { + containers = new ArrayList<>(); + containers.addAll(HddsTestUtils.getContainerInfo(25 * 4)); + String storageDir = GenericTestUtils.getTempPath( + TestSCMChillModeManager.class.getName() + UUID.randomUUID()); + try{ + MockNodeManager nodeManager = new MockNodeManager(true, 1); + config.set(HddsConfigKeys.OZONE_METADATA_DIRS, storageDir); + // enable pipeline check + config.setBoolean( + HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK, true); + + PipelineManager pipelineManager = new SCMPipelineManager(config, + nodeManager, queue); + scmChillModeManager = new SCMChillModeManager( + config, containers, pipelineManager, queue); + queue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT, + scmChillModeManager); + + queue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT, + HddsTestUtils.createNodeRegistrationContainerReport(containers)); + assertTrue(scmChillModeManager.getInChillMode()); + + // simulation a pipeline report to trigger the rule check + Pipeline pipeline = pipelineManager.createPipeline( + HddsProtos.ReplicationType.STAND_ALONE, + HddsProtos.ReplicationFactor.ONE); + PipelineReportsProto.Builder reportBuilder = PipelineReportsProto + .newBuilder(); + reportBuilder.addPipelineReport(PipelineReport.newBuilder() + .setPipelineID(pipeline.getId().getProtobuf())); + + queue.fireEvent(SCMEvents.PIPELINE_REPORT, new PipelineReportFromDatanode( + pipeline.getNodes().get(0), reportBuilder.build())); + + GenericTestUtils.waitFor(() -> { + return !scmChillModeManager.getInChillMode(); + }, 100, 1000 * 10); + pipelineManager.close(); + } finally { + config.setBoolean( + HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK, + false); + FileUtil.fullyDelete(new File(storageDir)); + } + } } \ No newline at end of file