HDDS-1198. Rename chill mode to safe mode. Contributed by Siddharth Wagle. (#737)
This commit is contained in:
parent
1943db5571
commit
ebbda181e4
@ -447,22 +447,22 @@ public long getContainerSize(long containerID) throws IOException {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if SCM is in chill mode.
|
* Check if SCM is in safe mode.
|
||||||
*
|
*
|
||||||
* @return Returns true if SCM is in chill mode else returns false.
|
* @return Returns true if SCM is in safe mode else returns false.
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
public boolean inChillMode() throws IOException {
|
public boolean inSafeMode() throws IOException {
|
||||||
return storageContainerLocationClient.inChillMode();
|
return storageContainerLocationClient.inSafeMode();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Force SCM out of chill mode.
|
* Force SCM out of safe mode.
|
||||||
*
|
*
|
||||||
* @return returns true if operation is successful.
|
* @return returns true if operation is successful.
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
public boolean forceExitChillMode() throws IOException {
|
public boolean forceExitSafeMode() throws IOException {
|
||||||
return storageContainerLocationClient.forceExitChillMode();
|
return storageContainerLocationClient.forceExitSafeMode();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -63,47 +63,47 @@ public final class HddsConfigKeys {
|
|||||||
public static final String HDDS_CONTAINER_CLOSE_THRESHOLD =
|
public static final String HDDS_CONTAINER_CLOSE_THRESHOLD =
|
||||||
"hdds.container.close.threshold";
|
"hdds.container.close.threshold";
|
||||||
public static final float HDDS_CONTAINER_CLOSE_THRESHOLD_DEFAULT = 0.9f;
|
public static final float HDDS_CONTAINER_CLOSE_THRESHOLD_DEFAULT = 0.9f;
|
||||||
public static final String HDDS_SCM_CHILLMODE_ENABLED =
|
public static final String HDDS_SCM_SAFEMODE_ENABLED =
|
||||||
"hdds.scm.chillmode.enabled";
|
"hdds.scm.safemode.enabled";
|
||||||
public static final String HDDS_CONTAINERSCRUB_ENABLED =
|
public static final String HDDS_CONTAINERSCRUB_ENABLED =
|
||||||
"hdds.containerscrub.enabled";
|
"hdds.containerscrub.enabled";
|
||||||
public static final boolean HDDS_CONTAINERSCRUB_ENABLED_DEFAULT = false;
|
public static final boolean HDDS_CONTAINERSCRUB_ENABLED_DEFAULT = false;
|
||||||
public static final boolean HDDS_SCM_CHILLMODE_ENABLED_DEFAULT = true;
|
public static final boolean HDDS_SCM_SAFEMODE_ENABLED_DEFAULT = true;
|
||||||
public static final String HDDS_SCM_CHILLMODE_MIN_DATANODE =
|
public static final String HDDS_SCM_SAFEMODE_MIN_DATANODE =
|
||||||
"hdds.scm.chillmode.min.datanode";
|
"hdds.scm.safemode.min.datanode";
|
||||||
public static final int HDDS_SCM_CHILLMODE_MIN_DATANODE_DEFAULT = 1;
|
public static final int HDDS_SCM_SAFEMODE_MIN_DATANODE_DEFAULT = 1;
|
||||||
|
|
||||||
|
|
||||||
public static final String
|
public static final String
|
||||||
HDDS_SCM_WAIT_TIME_AFTER_CHILL_MODE_EXIT =
|
HDDS_SCM_WAIT_TIME_AFTER_SAFE_MODE_EXIT =
|
||||||
"hdds.scm.wait.time.after.chillmode.exit";
|
"hdds.scm.wait.time.after.safemode.exit";
|
||||||
|
|
||||||
public static final String
|
public static final String
|
||||||
HDDS_SCM_WAIT_TIME_AFTER_CHILL_MODE_EXIT_DEFAULT = "5m";
|
HDDS_SCM_WAIT_TIME_AFTER_SAFE_MODE_EXIT_DEFAULT = "5m";
|
||||||
|
|
||||||
public static final String HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK =
|
public static final String HDDS_SCM_SAFEMODE_PIPELINE_AVAILABILITY_CHECK =
|
||||||
"hdds.scm.chillmode.pipeline-availability.check";
|
"hdds.scm.safemode.pipeline-availability.check";
|
||||||
public static final boolean
|
public static final boolean
|
||||||
HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK_DEFAULT = false;
|
HDDS_SCM_SAFEMODE_PIPELINE_AVAILABILITY_CHECK_DEFAULT = false;
|
||||||
|
|
||||||
// % of containers which should have at least one reported replica
|
// % of containers which should have at least one reported replica
|
||||||
// before SCM comes out of chill mode.
|
// before SCM comes out of safe mode.
|
||||||
public static final String HDDS_SCM_CHILLMODE_THRESHOLD_PCT =
|
public static final String HDDS_SCM_SAFEMODE_THRESHOLD_PCT =
|
||||||
"hdds.scm.chillmode.threshold.pct";
|
"hdds.scm.safemode.threshold.pct";
|
||||||
public static final double HDDS_SCM_CHILLMODE_THRESHOLD_PCT_DEFAULT = 0.99;
|
public static final double HDDS_SCM_SAFEMODE_THRESHOLD_PCT_DEFAULT = 0.99;
|
||||||
|
|
||||||
|
|
||||||
// percentage of healthy pipelines, where all 3 datanodes are reported in the
|
// percentage of healthy pipelines, where all 3 datanodes are reported in the
|
||||||
// pipeline.
|
// pipeline.
|
||||||
public static final String HDDS_SCM_CHILLMODE_HEALTHY_PIPELINE_THRESHOLD_PCT =
|
public static final String HDDS_SCM_SAFEMODE_HEALTHY_PIPELINE_THRESHOLD_PCT =
|
||||||
"hdds.scm.chillmode.healthy.pipelie.pct";
|
"hdds.scm.safemode.healthy.pipelie.pct";
|
||||||
public static final double
|
public static final double
|
||||||
HDDS_SCM_CHILLMODE_HEALTHY_PIPELINE_THRESHOLD_PCT_DEFAULT = 0.10;
|
HDDS_SCM_SAFEMODE_HEALTHY_PIPELINE_THRESHOLD_PCT_DEFAULT = 0.10;
|
||||||
|
|
||||||
public static final String HDDS_SCM_CHILLMODE_ONE_NODE_REPORTED_PIPELINE_PCT =
|
public static final String HDDS_SCM_SAFEMODE_ONE_NODE_REPORTED_PIPELINE_PCT =
|
||||||
"hdds.scm.chillmode.atleast.one.node.reported.pipeline.pct";
|
"hdds.scm.safemode.atleast.one.node.reported.pipeline.pct";
|
||||||
public static final double
|
public static final double
|
||||||
HDDS_SCM_CHILLMODE_ONE_NODE_REPORTED_PIPELINE_PCT_DEFAULT = 0.90;
|
HDDS_SCM_SAFEMODE_ONE_NODE_REPORTED_PIPELINE_PCT_DEFAULT = 0.90;
|
||||||
|
|
||||||
public static final String HDDS_LOCK_MAX_CONCURRENCY =
|
public static final String HDDS_LOCK_MAX_CONCURRENCY =
|
||||||
"hdds.lock.max.concurrency";
|
"hdds.lock.max.concurrency";
|
||||||
|
@ -189,18 +189,18 @@ Pipeline createReplicationPipeline(HddsProtos.ReplicationType type,
|
|||||||
void closePipeline(HddsProtos.PipelineID pipelineID) throws IOException;
|
void closePipeline(HddsProtos.PipelineID pipelineID) throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if SCM is in chill mode.
|
* Check if SCM is in safe mode.
|
||||||
*
|
*
|
||||||
* @return Returns true if SCM is in chill mode else returns false.
|
* @return Returns true if SCM is in safe mode else returns false.
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
boolean inChillMode() throws IOException;
|
boolean inSafeMode() throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Force SCM out of chill mode.
|
* Force SCM out of safe mode.
|
||||||
*
|
*
|
||||||
* @return returns true if operation is successful.
|
* @return returns true if operation is successful.
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
boolean forceExitChillMode() throws IOException;
|
boolean forceExitSafeMode() throws IOException;
|
||||||
}
|
}
|
||||||
|
@ -163,18 +163,18 @@ Pipeline createReplicationPipeline(HddsProtos.ReplicationType type,
|
|||||||
ScmInfo getScmInfo() throws IOException;
|
ScmInfo getScmInfo() throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if SCM is in chill mode.
|
* Check if SCM is in safe mode.
|
||||||
*
|
*
|
||||||
* @return Returns true if SCM is in chill mode else returns false.
|
* @return Returns true if SCM is in safe mode else returns false.
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
boolean inChillMode() throws IOException;
|
boolean inSafeMode() throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Force SCM out of Chill mode.
|
* Force SCM out of Safe mode.
|
||||||
*
|
*
|
||||||
* @return returns true if operation is successful.
|
* @return returns true if operation is successful.
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
boolean forceExitChillMode() throws IOException;
|
boolean forceExitSafeMode() throws IOException;
|
||||||
}
|
}
|
||||||
|
@ -23,12 +23,12 @@
|
|||||||
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ListPipelineRequestProto;
|
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ListPipelineRequestProto;
|
||||||
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ListPipelineResponseProto;
|
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ListPipelineResponseProto;
|
||||||
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ClosePipelineRequestProto;
|
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ClosePipelineRequestProto;
|
||||||
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ForceExitChillModeRequestProto;
|
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ForceExitSafeModeRequestProto;
|
||||||
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ForceExitChillModeResponseProto;
|
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ForceExitSafeModeResponseProto;
|
||||||
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainerWithPipelineRequestProto;
|
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainerWithPipelineRequestProto;
|
||||||
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainerWithPipelineResponseProto;
|
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainerWithPipelineResponseProto;
|
||||||
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.InChillModeRequestProto;
|
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.InSafeModeRequestProto;
|
||||||
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.InChillModeResponseProto;
|
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.InSafeModeResponseProto;
|
||||||
import org.apache.hadoop.hdds.scm.ScmInfo;
|
import org.apache.hadoop.hdds.scm.ScmInfo;
|
||||||
import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline;
|
import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline;
|
||||||
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
|
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
|
||||||
@ -370,38 +370,38 @@ public ScmInfo getScmInfo() throws IOException {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if SCM is in chill mode.
|
* Check if SCM is in safe mode.
|
||||||
*
|
*
|
||||||
* @return Returns true if SCM is in chill mode else returns false.
|
* @return Returns true if SCM is in safe mode else returns false.
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public boolean inChillMode() throws IOException {
|
public boolean inSafeMode() throws IOException {
|
||||||
InChillModeRequestProto request =
|
InSafeModeRequestProto request =
|
||||||
InChillModeRequestProto.getDefaultInstance();
|
InSafeModeRequestProto.getDefaultInstance();
|
||||||
try {
|
try {
|
||||||
InChillModeResponseProto resp = rpcProxy.inChillMode(
|
InSafeModeResponseProto resp = rpcProxy.inSafeMode(
|
||||||
NULL_RPC_CONTROLLER, request);
|
NULL_RPC_CONTROLLER, request);
|
||||||
return resp.getInChillMode();
|
return resp.getInSafeMode();
|
||||||
} catch (ServiceException e) {
|
} catch (ServiceException e) {
|
||||||
throw ProtobufHelper.getRemoteException(e);
|
throw ProtobufHelper.getRemoteException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Force SCM out of Chill mode.
|
* Force SCM out of Safe mode.
|
||||||
*
|
*
|
||||||
* @return returns true if operation is successful.
|
* @return returns true if operation is successful.
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public boolean forceExitChillMode() throws IOException {
|
public boolean forceExitSafeMode() throws IOException {
|
||||||
ForceExitChillModeRequestProto request =
|
ForceExitSafeModeRequestProto request =
|
||||||
ForceExitChillModeRequestProto.getDefaultInstance();
|
ForceExitSafeModeRequestProto.getDefaultInstance();
|
||||||
try {
|
try {
|
||||||
ForceExitChillModeResponseProto resp = rpcProxy
|
ForceExitSafeModeResponseProto resp = rpcProxy
|
||||||
.forceExitChillMode(NULL_RPC_CONTROLLER, request);
|
.forceExitSafeMode(NULL_RPC_CONTROLLER, request);
|
||||||
return resp.getExitedChillMode();
|
return resp.getExitedSafeMode();
|
||||||
} catch (ServiceException e) {
|
} catch (ServiceException e) {
|
||||||
throw ProtobufHelper.getRemoteException(e);
|
throw ProtobufHelper.getRemoteException(e);
|
||||||
}
|
}
|
||||||
|
@ -34,8 +34,8 @@ public enum SCMAction implements AuditAction {
|
|||||||
LIST_PIPELINE,
|
LIST_PIPELINE,
|
||||||
CLOSE_PIPELINE,
|
CLOSE_PIPELINE,
|
||||||
DELETE_CONTAINER,
|
DELETE_CONTAINER,
|
||||||
IN_CHILL_MODE,
|
IN_SAFE_MODE,
|
||||||
FORCE_EXIT_CHILL_MODE;
|
FORCE_EXIT_SAFE_MODE;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getAction() {
|
public String getAction() {
|
||||||
|
@ -24,13 +24,13 @@
|
|||||||
|
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.hdds.protocol.proto
|
import org.apache.hadoop.hdds.protocol.proto
|
||||||
.StorageContainerLocationProtocolProtos.InChillModeRequestProto;
|
.StorageContainerLocationProtocolProtos.InSafeModeRequestProto;
|
||||||
import org.apache.hadoop.hdds.protocol.proto
|
import org.apache.hadoop.hdds.protocol.proto
|
||||||
.StorageContainerLocationProtocolProtos.InChillModeResponseProto;
|
.StorageContainerLocationProtocolProtos.InSafeModeResponseProto;
|
||||||
import org.apache.hadoop.hdds.protocol.proto
|
import org.apache.hadoop.hdds.protocol.proto
|
||||||
.StorageContainerLocationProtocolProtos.ForceExitChillModeRequestProto;
|
.StorageContainerLocationProtocolProtos.ForceExitSafeModeRequestProto;
|
||||||
import org.apache.hadoop.hdds.protocol.proto
|
import org.apache.hadoop.hdds.protocol.proto
|
||||||
.StorageContainerLocationProtocolProtos.ForceExitChillModeResponseProto;
|
.StorageContainerLocationProtocolProtos.ForceExitSafeModeResponseProto;
|
||||||
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainerWithPipelineRequestProto;
|
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainerWithPipelineRequestProto;
|
||||||
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainerWithPipelineResponseProto;
|
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainerWithPipelineResponseProto;
|
||||||
import org.apache.hadoop.hdds.scm.ScmInfo;
|
import org.apache.hadoop.hdds.scm.ScmInfo;
|
||||||
@ -282,26 +282,26 @@ public HddsProtos.GetScmInfoRespsonseProto getScmInfo(
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public InChillModeResponseProto inChillMode(
|
public InSafeModeResponseProto inSafeMode(
|
||||||
RpcController controller,
|
RpcController controller,
|
||||||
InChillModeRequestProto request) throws ServiceException {
|
InSafeModeRequestProto request) throws ServiceException {
|
||||||
try (Scope scope = TracingUtil
|
try (Scope scope = TracingUtil
|
||||||
.importAndCreateScope("inChillMode", request.getTraceID())) {
|
.importAndCreateScope("inSafeMode", request.getTraceID())) {
|
||||||
return InChillModeResponseProto.newBuilder()
|
return InSafeModeResponseProto.newBuilder()
|
||||||
.setInChillMode(impl.inChillMode()).build();
|
.setInSafeMode(impl.inSafeMode()).build();
|
||||||
} catch (IOException ex) {
|
} catch (IOException ex) {
|
||||||
throw new ServiceException(ex);
|
throw new ServiceException(ex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ForceExitChillModeResponseProto forceExitChillMode(
|
public ForceExitSafeModeResponseProto forceExitSafeMode(
|
||||||
RpcController controller, ForceExitChillModeRequestProto request)
|
RpcController controller, ForceExitSafeModeRequestProto request)
|
||||||
throws ServiceException {
|
throws ServiceException {
|
||||||
try (Scope scope = TracingUtil
|
try (Scope scope = TracingUtil
|
||||||
.importAndCreateScope("forceExitChillMode", request.getTraceID())) {
|
.importAndCreateScope("forceExitSafeMode", request.getTraceID())) {
|
||||||
return ForceExitChillModeResponseProto.newBuilder()
|
return ForceExitSafeModeResponseProto.newBuilder()
|
||||||
.setExitedChillMode(impl.forceExitChillMode()).build();
|
.setExitedSafeMode(impl.forceExitSafeMode()).build();
|
||||||
} catch (IOException ex) {
|
} catch (IOException ex) {
|
||||||
throw new ServiceException(ex);
|
throw new ServiceException(ex);
|
||||||
}
|
}
|
||||||
|
@ -88,7 +88,7 @@ message DeleteKeyBlocksResultProto {
|
|||||||
message DeleteScmBlockResult {
|
message DeleteScmBlockResult {
|
||||||
enum Result {
|
enum Result {
|
||||||
success = 1;
|
success = 1;
|
||||||
chillMode = 2;
|
safeMode = 2;
|
||||||
errorNotFound = 3;
|
errorNotFound = 3;
|
||||||
unknownFailure = 4;
|
unknownFailure = 4;
|
||||||
}
|
}
|
||||||
|
@ -176,20 +176,20 @@ message ClosePipelineRequestProto {
|
|||||||
message ClosePipelineResponseProto {
|
message ClosePipelineResponseProto {
|
||||||
}
|
}
|
||||||
|
|
||||||
message InChillModeRequestProto {
|
message InSafeModeRequestProto {
|
||||||
optional string traceID = 1;
|
optional string traceID = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
message InChillModeResponseProto {
|
message InSafeModeResponseProto {
|
||||||
required bool inChillMode = 1;
|
required bool inSafeMode = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
message ForceExitChillModeRequestProto {
|
message ForceExitSafeModeRequestProto {
|
||||||
optional string traceID = 1;
|
optional string traceID = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
message ForceExitChillModeResponseProto {
|
message ForceExitSafeModeResponseProto {
|
||||||
required bool exitedChillMode = 1;
|
required bool exitedSafeMode = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -265,14 +265,14 @@ service StorageContainerLocationProtocolService {
|
|||||||
returns (GetScmInfoRespsonseProto);
|
returns (GetScmInfoRespsonseProto);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks if SCM is in ChillMode.
|
* Checks if SCM is in SafeMode.
|
||||||
*/
|
*/
|
||||||
rpc inChillMode(InChillModeRequestProto)
|
rpc inSafeMode(InSafeModeRequestProto)
|
||||||
returns (InChillModeResponseProto);
|
returns (InSafeModeResponseProto);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns information about SCM.
|
* Returns information about SCM.
|
||||||
*/
|
*/
|
||||||
rpc forceExitChillMode(ForceExitChillModeRequestProto)
|
rpc forceExitSafeMode(ForceExitSafeModeRequestProto)
|
||||||
returns (ForceExitChillModeResponseProto);
|
returns (ForceExitSafeModeResponseProto);
|
||||||
}
|
}
|
||||||
|
@ -1300,52 +1300,52 @@
|
|||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>hdds.scm.chillmode.threshold.pct</name>
|
<name>hdds.scm.safemode.threshold.pct</name>
|
||||||
<value>0.99</value>
|
<value>0.99</value>
|
||||||
<tag>HDDS,SCM,OPERATION</tag>
|
<tag>HDDS,SCM,OPERATION</tag>
|
||||||
<description> % of containers which should have at least one
|
<description> % of containers which should have at least one
|
||||||
reported replica before SCM comes out of chill mode.
|
reported replica before SCM comes out of safe mode.
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>hdds.scm.wait.time.after.chillmode.exit</name>
|
<name>hdds.scm.wait.time.after.safemode.exit</name>
|
||||||
<value>5m</value>
|
<value>5m</value>
|
||||||
<tag>HDDS,SCM,OPERATION</tag>
|
<tag>HDDS,SCM,OPERATION</tag>
|
||||||
<description> After exiting chillmode, wait for configured interval of
|
<description> After exiting safemode, wait for configured interval of
|
||||||
time to start replication monitor and cleanup activities of unhealthy
|
time to start replication monitor and cleanup activities of unhealthy
|
||||||
pipelines.
|
pipelines.
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>hdds.scm.chillmode.enabled</name>
|
<name>hdds.scm.safemode.enabled</name>
|
||||||
<value>true</value>
|
<value>true</value>
|
||||||
<tag>HDDS,SCM,OPERATION</tag>
|
<tag>HDDS,SCM,OPERATION</tag>
|
||||||
<description>Boolean value to enable or disable SCM chill mode.
|
<description>Boolean value to enable or disable SCM safe mode.
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>hdds.scm.chillmode.min.datanode</name>
|
<name>hdds.scm.safemode.min.datanode</name>
|
||||||
<value>1</value>
|
<value>1</value>
|
||||||
<tag>HDDS,SCM,OPERATION</tag>
|
<tag>HDDS,SCM,OPERATION</tag>
|
||||||
<description>Minimum DataNodes which should be registered to get SCM out of
|
<description>Minimum DataNodes which should be registered to get SCM out of
|
||||||
chill mode.
|
safe mode.
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>hdds.scm.chillmode.pipeline-availability.check</name>
|
<name>hdds.scm.safemode.pipeline-availability.check</name>
|
||||||
<value>false</value>
|
<value>false</value>
|
||||||
<tag>HDDS,SCM,OPERATION</tag>
|
<tag>HDDS,SCM,OPERATION</tag>
|
||||||
<description>
|
<description>
|
||||||
Boolean value to enable pipeline availability check during SCM chill mode.
|
Boolean value to enable pipeline availability check during SCM safe mode.
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>hdds.scm.chillmode.healthy.pipelie.pct</name>
|
<name>hdds.scm.safemode.healthy.pipelie.pct</name>
|
||||||
<value>0.10</value>
|
<value>0.10</value>
|
||||||
<tag>HDDS,SCM,OPERATION</tag>
|
<tag>HDDS,SCM,OPERATION</tag>
|
||||||
<description>
|
<description>
|
||||||
@ -1355,7 +1355,7 @@
|
|||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>hdds.scm.chillmode.atleast.one.node.reported.pipeline.pct</name>
|
<name>hdds.scm.safemode.atleast.one.node.reported.pipeline.pct</name>
|
||||||
<value>0.90</value>
|
<value>0.90</value>
|
||||||
<tag>HDDS,SCM,OPERATION</tag>
|
<tag>HDDS,SCM,OPERATION</tag>
|
||||||
<description>
|
<description>
|
||||||
|
@ -22,7 +22,7 @@
|
|||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.hdds.HddsConfigKeys;
|
import org.apache.hadoop.hdds.HddsConfigKeys;
|
||||||
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ScmOps;
|
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ScmOps;
|
||||||
import org.apache.hadoop.hdds.scm.chillmode.Precheck;
|
import org.apache.hadoop.hdds.scm.safemode.Precheck;
|
||||||
import org.apache.hadoop.hdds.scm.exceptions.SCMException;
|
import org.apache.hadoop.hdds.scm.exceptions.SCMException;
|
||||||
import org.apache.hadoop.hdds.server.ServerUtils;
|
import org.apache.hadoop.hdds.server.ServerUtils;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
|
@ -81,9 +81,9 @@ AllocatedBlock allocateBlock(long size, HddsProtos.ReplicationType type,
|
|||||||
SCMBlockDeletingService getSCMBlockDeletingService();
|
SCMBlockDeletingService getSCMBlockDeletingService();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set ChillMode status.
|
* Set SafeMode status.
|
||||||
*
|
*
|
||||||
* @param chillModeStatus
|
* @param safeModeStatus
|
||||||
*/
|
*/
|
||||||
void setChillModeStatus(boolean chillModeStatus);
|
void setSafeModeStatus(boolean safeModeStatus);
|
||||||
}
|
}
|
||||||
|
@ -34,7 +34,7 @@
|
|||||||
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ScmOps;
|
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ScmOps;
|
||||||
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
|
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
|
||||||
import org.apache.hadoop.hdds.scm.ScmUtils;
|
import org.apache.hadoop.hdds.scm.ScmUtils;
|
||||||
import org.apache.hadoop.hdds.scm.chillmode.ChillModePrecheck;
|
import org.apache.hadoop.hdds.scm.safemode.SafeModePrecheck;
|
||||||
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
|
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
|
||||||
import org.apache.hadoop.hdds.scm.container.ContainerManager;
|
import org.apache.hadoop.hdds.scm.container.ContainerManager;
|
||||||
import org.apache.hadoop.hdds.scm.container.common.helpers.AllocatedBlock;
|
import org.apache.hadoop.hdds.scm.container.common.helpers.AllocatedBlock;
|
||||||
@ -78,7 +78,7 @@ public class BlockManagerImpl implements BlockManager, BlockmanagerMXBean {
|
|||||||
private final SCMBlockDeletingService blockDeletingService;
|
private final SCMBlockDeletingService blockDeletingService;
|
||||||
|
|
||||||
private ObjectName mxBean;
|
private ObjectName mxBean;
|
||||||
private ChillModePrecheck chillModePrecheck;
|
private SafeModePrecheck safeModePrecheck;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor.
|
* Constructor.
|
||||||
@ -116,7 +116,7 @@ public BlockManagerImpl(final Configuration conf,
|
|||||||
new SCMBlockDeletingService(deletedBlockLog, containerManager,
|
new SCMBlockDeletingService(deletedBlockLog, containerManager,
|
||||||
scm.getScmNodeManager(), scm.getEventQueue(), svcInterval,
|
scm.getScmNodeManager(), scm.getEventQueue(), svcInterval,
|
||||||
serviceTimeout, conf);
|
serviceTimeout, conf);
|
||||||
chillModePrecheck = new ChillModePrecheck(conf);
|
safeModePrecheck = new SafeModePrecheck(conf);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -154,7 +154,7 @@ public AllocatedBlock allocateBlock(final long size, ReplicationType type,
|
|||||||
ReplicationFactor factor, String owner, ExcludeList excludeList)
|
ReplicationFactor factor, String owner, ExcludeList excludeList)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
LOG.trace("Size;{} , type : {}, factor : {} ", size, type, factor);
|
LOG.trace("Size;{} , type : {}, factor : {} ", size, type, factor);
|
||||||
ScmUtils.preCheck(ScmOps.allocateBlock, chillModePrecheck);
|
ScmUtils.preCheck(ScmOps.allocateBlock, safeModePrecheck);
|
||||||
if (size < 0 || size > containerSize) {
|
if (size < 0 || size > containerSize) {
|
||||||
LOG.warn("Invalid block size requested : {}", size);
|
LOG.warn("Invalid block size requested : {}", size);
|
||||||
throw new SCMException("Unsupported block size: " + size,
|
throw new SCMException("Unsupported block size: " + size,
|
||||||
@ -254,7 +254,7 @@ private AllocatedBlock newBlock(ContainerInfo containerInfo) {
|
|||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void deleteBlocks(List<BlockID> blockIDs) throws IOException {
|
public void deleteBlocks(List<BlockID> blockIDs) throws IOException {
|
||||||
ScmUtils.preCheck(ScmOps.deleteBlock, chillModePrecheck);
|
ScmUtils.preCheck(ScmOps.deleteBlock, safeModePrecheck);
|
||||||
|
|
||||||
LOG.info("Deleting blocks {}", StringUtils.join(",", blockIDs));
|
LOG.info("Deleting blocks {}", StringUtils.join(",", blockIDs));
|
||||||
Map<Long, List<Long>> containerBlocks = new HashMap<>();
|
Map<Long, List<Long>> containerBlocks = new HashMap<>();
|
||||||
@ -324,15 +324,15 @@ public SCMBlockDeletingService getSCMBlockDeletingService() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setChillModeStatus(boolean chillModeStatus) {
|
public void setSafeModeStatus(boolean safeModeStatus) {
|
||||||
this.chillModePrecheck.setInChillMode(chillModeStatus);
|
this.safeModePrecheck.setInSafeMode(safeModeStatus);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns status of scm chill mode determined by CHILL_MODE_STATUS event.
|
* Returns status of scm safe mode determined by SAFE_MODE_STATUS event.
|
||||||
* */
|
* */
|
||||||
public boolean isScmInChillMode() {
|
public boolean isScmInSafeMode() {
|
||||||
return this.chillModePrecheck.isInChillMode();
|
return this.safeModePrecheck.isInSafeMode();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -86,12 +86,12 @@ public void close() throws IOException {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Waits for
|
* Waits for
|
||||||
* {@link HddsConfigKeys#HDDS_SCM_WAIT_TIME_AFTER_CHILL_MODE_EXIT} and set
|
* {@link HddsConfigKeys#HDDS_SCM_WAIT_TIME_AFTER_SAFE_MODE_EXIT} and set
|
||||||
* replicationEnabled to start replication monitor thread.
|
* replicationEnabled to start replication monitor thread.
|
||||||
*/
|
*/
|
||||||
public void fireReplicationStart(boolean chillModeStatus,
|
public void fireReplicationStart(boolean safeModeStatus,
|
||||||
long waitTime) {
|
long waitTime) {
|
||||||
if (!chillModeStatus) {
|
if (!safeModeStatus) {
|
||||||
scheduler.schedule(() -> {
|
scheduler.schedule(() -> {
|
||||||
setReplicationEnabled(true);
|
setReplicationEnabled(true);
|
||||||
LOG.info("Replication Timer sleep for {} ms completed. Enable "
|
LOG.info("Replication Timer sleep for {} ms completed. Enable "
|
||||||
|
@ -21,7 +21,7 @@
|
|||||||
|
|
||||||
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
|
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
|
||||||
import org.apache.hadoop.hdds.scm.block.PendingDeleteStatusList;
|
import org.apache.hadoop.hdds.scm.block.PendingDeleteStatusList;
|
||||||
import org.apache.hadoop.hdds.scm.chillmode.SCMChillModeManager.ChillModeStatus;
|
import org.apache.hadoop.hdds.scm.safemode.SCMSafeModeManager.SafeModeStatus;
|
||||||
import org.apache.hadoop.hdds.scm.command.CommandStatusReportHandler;
|
import org.apache.hadoop.hdds.scm.command.CommandStatusReportHandler;
|
||||||
import org.apache.hadoop.hdds.scm.command.CommandStatusReportHandler
|
import org.apache.hadoop.hdds.scm.command.CommandStatusReportHandler
|
||||||
.ReplicationStatus;
|
.ReplicationStatus;
|
||||||
@ -106,7 +106,7 @@ public final class SCMEvents {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* PipelineReport processed by pipeline report handler. This event is
|
* PipelineReport processed by pipeline report handler. This event is
|
||||||
* received by HealthyPipelineChillModeRule.
|
* received by HealthyPipelineSafeModeRule.
|
||||||
*/
|
*/
|
||||||
public static final TypedEvent<PipelineReportFromDatanode>
|
public static final TypedEvent<PipelineReportFromDatanode>
|
||||||
PROCESSED_PIPELINE_REPORT = new TypedEvent<>(
|
PROCESSED_PIPELINE_REPORT = new TypedEvent<>(
|
||||||
@ -254,8 +254,8 @@ public final class SCMEvents {
|
|||||||
*/
|
*/
|
||||||
public static final TypedEvent<Boolean> START_REPLICATION =
|
public static final TypedEvent<Boolean> START_REPLICATION =
|
||||||
new TypedEvent<>(Boolean.class);
|
new TypedEvent<>(Boolean.class);
|
||||||
public static final TypedEvent<ChillModeStatus> CHILL_MODE_STATUS =
|
public static final TypedEvent<SafeModeStatus> SAFE_MODE_STATUS =
|
||||||
new TypedEvent<>(ChillModeStatus.class);
|
new TypedEvent<>(SafeModeStatus.class);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Private Ctor. Never Constructed.
|
* Private Ctor. Never Constructed.
|
||||||
|
@ -103,7 +103,7 @@ public enum ResultCodes {
|
|||||||
FAILED_TO_FIND_SUITABLE_NODE,
|
FAILED_TO_FIND_SUITABLE_NODE,
|
||||||
INVALID_CAPACITY,
|
INVALID_CAPACITY,
|
||||||
INVALID_BLOCK_SIZE,
|
INVALID_BLOCK_SIZE,
|
||||||
CHILL_MODE_EXCEPTION,
|
SAFE_MODE_EXCEPTION,
|
||||||
FAILED_TO_LOAD_OPEN_CONTAINER,
|
FAILED_TO_LOAD_OPEN_CONTAINER,
|
||||||
FAILED_TO_ALLOCATE_CONTAINER,
|
FAILED_TO_ALLOCATE_CONTAINER,
|
||||||
FAILED_TO_CHANGE_CONTAINER_STATE,
|
FAILED_TO_CHANGE_CONTAINER_STATE,
|
||||||
|
@ -107,7 +107,7 @@ public SCMNodeManager(OzoneConfiguration conf, String clusterID,
|
|||||||
this.version = VersionInfo.getLatestVersion();
|
this.version = VersionInfo.getLatestVersion();
|
||||||
this.commandQueue = new CommandQueue();
|
this.commandQueue = new CommandQueue();
|
||||||
this.scmManager = scmManager;
|
this.scmManager = scmManager;
|
||||||
LOG.info("Entering startup chill mode.");
|
LOG.info("Entering startup safe mode.");
|
||||||
registerMXBean();
|
registerMXBean();
|
||||||
this.metrics = SCMNodeMetrics.create(this);
|
this.metrics = SCMNodeMetrics.create(this);
|
||||||
}
|
}
|
||||||
|
@ -26,7 +26,7 @@
|
|||||||
.StorageContainerDatanodeProtocolProtos.PipelineReport;
|
.StorageContainerDatanodeProtocolProtos.PipelineReport;
|
||||||
import org.apache.hadoop.hdds.protocol.proto
|
import org.apache.hadoop.hdds.protocol.proto
|
||||||
.StorageContainerDatanodeProtocolProtos.PipelineReportsProto;
|
.StorageContainerDatanodeProtocolProtos.PipelineReportsProto;
|
||||||
import org.apache.hadoop.hdds.scm.chillmode.SCMChillModeManager;
|
import org.apache.hadoop.hdds.scm.safemode.SCMSafeModeManager;
|
||||||
import org.apache.hadoop.hdds.scm.events.SCMEvents;
|
import org.apache.hadoop.hdds.scm.events.SCMEvents;
|
||||||
import org.apache.hadoop.hdds.scm.server
|
import org.apache.hadoop.hdds.scm.server
|
||||||
.SCMDatanodeHeartbeatDispatcher.PipelineReportFromDatanode;
|
.SCMDatanodeHeartbeatDispatcher.PipelineReportFromDatanode;
|
||||||
@ -48,20 +48,20 @@ public class PipelineReportHandler implements
|
|||||||
.getLogger(PipelineReportHandler.class);
|
.getLogger(PipelineReportHandler.class);
|
||||||
private final PipelineManager pipelineManager;
|
private final PipelineManager pipelineManager;
|
||||||
private final Configuration conf;
|
private final Configuration conf;
|
||||||
private final SCMChillModeManager scmChillModeManager;
|
private final SCMSafeModeManager scmSafeModeManager;
|
||||||
private final boolean pipelineAvailabilityCheck;
|
private final boolean pipelineAvailabilityCheck;
|
||||||
|
|
||||||
public PipelineReportHandler(SCMChillModeManager scmChillModeManager,
|
public PipelineReportHandler(SCMSafeModeManager scmSafeModeManager,
|
||||||
PipelineManager pipelineManager,
|
PipelineManager pipelineManager,
|
||||||
Configuration conf) {
|
Configuration conf) {
|
||||||
Preconditions.checkNotNull(pipelineManager);
|
Preconditions.checkNotNull(pipelineManager);
|
||||||
Objects.requireNonNull(scmChillModeManager);
|
Objects.requireNonNull(scmSafeModeManager);
|
||||||
this.scmChillModeManager = scmChillModeManager;
|
this.scmSafeModeManager = scmSafeModeManager;
|
||||||
this.pipelineManager = pipelineManager;
|
this.pipelineManager = pipelineManager;
|
||||||
this.conf = conf;
|
this.conf = conf;
|
||||||
this.pipelineAvailabilityCheck = conf.getBoolean(
|
this.pipelineAvailabilityCheck = conf.getBoolean(
|
||||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK,
|
HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_AVAILABILITY_CHECK,
|
||||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK_DEFAULT);
|
HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_AVAILABILITY_CHECK_DEFAULT);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -83,7 +83,7 @@ public void onMessage(PipelineReportFromDatanode pipelineReportFromDatanode,
|
|||||||
report, dn, e);
|
report, dn, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (pipelineAvailabilityCheck && scmChillModeManager.getInChillMode()) {
|
if (pipelineAvailabilityCheck && scmSafeModeManager.getInSafeMode()) {
|
||||||
publisher.fireEvent(SCMEvents.PROCESSED_PIPELINE_REPORT,
|
publisher.fireEvent(SCMEvents.PROCESSED_PIPELINE_REPORT,
|
||||||
pipelineReportFromDatanode);
|
pipelineReportFromDatanode);
|
||||||
}
|
}
|
||||||
|
@ -15,7 +15,7 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hdds.scm.chillmode;
|
package org.apache.hadoop.hdds.scm.safemode;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
@ -36,30 +36,30 @@
|
|||||||
import org.apache.hadoop.hdds.server.events.TypedEvent;
|
import org.apache.hadoop.hdds.server.events.TypedEvent;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Class defining Chill mode exit criteria for Containers.
|
* Class defining Safe mode exit criteria for Containers.
|
||||||
*/
|
*/
|
||||||
public class ContainerChillModeRule extends
|
public class ContainerSafeModeRule extends
|
||||||
ChillModeExitRule<NodeRegistrationContainerReport>{
|
SafeModeExitRule<NodeRegistrationContainerReport>{
|
||||||
|
|
||||||
// Required cutoff % for containers with at least 1 reported replica.
|
// Required cutoff % for containers with at least 1 reported replica.
|
||||||
private double chillModeCutoff;
|
private double safeModeCutoff;
|
||||||
// Containers read from scm db (excluding containers in ALLOCATED state).
|
// Containers read from scm db (excluding containers in ALLOCATED state).
|
||||||
private Map<Long, ContainerInfo> containerMap;
|
private Map<Long, ContainerInfo> containerMap;
|
||||||
private double maxContainer;
|
private double maxContainer;
|
||||||
|
|
||||||
private AtomicLong containerWithMinReplicas = new AtomicLong(0);
|
private AtomicLong containerWithMinReplicas = new AtomicLong(0);
|
||||||
|
|
||||||
public ContainerChillModeRule(String ruleName, EventQueue eventQueue,
|
public ContainerSafeModeRule(String ruleName, EventQueue eventQueue,
|
||||||
Configuration conf,
|
Configuration conf,
|
||||||
List<ContainerInfo> containers, SCMChillModeManager manager) {
|
List<ContainerInfo> containers, SCMSafeModeManager manager) {
|
||||||
super(manager, ruleName, eventQueue);
|
super(manager, ruleName, eventQueue);
|
||||||
chillModeCutoff = conf.getDouble(
|
safeModeCutoff = conf.getDouble(
|
||||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_THRESHOLD_PCT,
|
HddsConfigKeys.HDDS_SCM_SAFEMODE_THRESHOLD_PCT,
|
||||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_THRESHOLD_PCT_DEFAULT);
|
HddsConfigKeys.HDDS_SCM_SAFEMODE_THRESHOLD_PCT_DEFAULT);
|
||||||
|
|
||||||
Preconditions.checkArgument(
|
Preconditions.checkArgument(
|
||||||
(chillModeCutoff >= 0.0 && chillModeCutoff <= 1.0),
|
(safeModeCutoff >= 0.0 && safeModeCutoff <= 1.0),
|
||||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_THRESHOLD_PCT +
|
HddsConfigKeys.HDDS_SCM_SAFEMODE_THRESHOLD_PCT +
|
||||||
" value should be >= 0.0 and <= 1.0");
|
" value should be >= 0.0 and <= 1.0");
|
||||||
|
|
||||||
containerMap = new ConcurrentHashMap<>();
|
containerMap = new ConcurrentHashMap<>();
|
||||||
@ -87,7 +87,7 @@ protected TypedEvent<NodeRegistrationContainerReport> getEventType() {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected boolean validate() {
|
protected boolean validate() {
|
||||||
return getCurrentContainerThreshold() >= chillModeCutoff;
|
return getCurrentContainerThreshold() >= safeModeCutoff;
|
||||||
}
|
}
|
||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
@ -109,9 +109,9 @@ protected void process(NodeRegistrationContainerReport reportsProto) {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
if (scmInChillMode()) {
|
if (scmInSafeMode()) {
|
||||||
SCMChillModeManager.getLogger().info(
|
SCMSafeModeManager.getLogger().info(
|
||||||
"SCM in chill mode. {} % containers have at least one"
|
"SCM in safe mode. {} % containers have at least one"
|
||||||
+ " reported replica.",
|
+ " reported replica.",
|
||||||
(containerWithMinReplicas.doubleValue() / maxContainer) * 100);
|
(containerWithMinReplicas.doubleValue() / maxContainer) * 100);
|
||||||
}
|
}
|
@ -15,7 +15,7 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hdds.scm.chillmode;
|
package org.apache.hadoop.hdds.scm.safemode;
|
||||||
|
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.UUID;
|
import java.util.UUID;
|
||||||
@ -29,25 +29,25 @@
|
|||||||
import org.apache.hadoop.hdds.server.events.TypedEvent;
|
import org.apache.hadoop.hdds.server.events.TypedEvent;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Class defining Chill mode exit criteria according to number of DataNodes
|
* Class defining Safe mode exit criteria according to number of DataNodes
|
||||||
* registered with SCM.
|
* registered with SCM.
|
||||||
*/
|
*/
|
||||||
public class DataNodeChillModeRule extends
|
public class DataNodeSafeModeRule extends
|
||||||
ChillModeExitRule<NodeRegistrationContainerReport>{
|
SafeModeExitRule<NodeRegistrationContainerReport>{
|
||||||
|
|
||||||
// Min DataNodes required to exit chill mode.
|
// Min DataNodes required to exit safe mode.
|
||||||
private int requiredDns;
|
private int requiredDns;
|
||||||
private int registeredDns = 0;
|
private int registeredDns = 0;
|
||||||
// Set to track registered DataNodes.
|
// Set to track registered DataNodes.
|
||||||
private HashSet<UUID> registeredDnSet;
|
private HashSet<UUID> registeredDnSet;
|
||||||
|
|
||||||
public DataNodeChillModeRule(String ruleName, EventQueue eventQueue,
|
public DataNodeSafeModeRule(String ruleName, EventQueue eventQueue,
|
||||||
Configuration conf,
|
Configuration conf,
|
||||||
SCMChillModeManager manager) {
|
SCMSafeModeManager manager) {
|
||||||
super(manager, ruleName, eventQueue);
|
super(manager, ruleName, eventQueue);
|
||||||
requiredDns = conf.getInt(
|
requiredDns = conf.getInt(
|
||||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_MIN_DATANODE,
|
HddsConfigKeys.HDDS_SCM_SAFEMODE_MIN_DATANODE,
|
||||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_MIN_DATANODE_DEFAULT);
|
HddsConfigKeys.HDDS_SCM_SAFEMODE_MIN_DATANODE_DEFAULT);
|
||||||
registeredDnSet = new HashSet<>(requiredDns * 2);
|
registeredDnSet = new HashSet<>(requiredDns * 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -67,9 +67,9 @@ protected void process(NodeRegistrationContainerReport reportsProto) {
|
|||||||
registeredDnSet.add(reportsProto.getDatanodeDetails().getUuid());
|
registeredDnSet.add(reportsProto.getDatanodeDetails().getUuid());
|
||||||
registeredDns = registeredDnSet.size();
|
registeredDns = registeredDnSet.size();
|
||||||
|
|
||||||
if (scmInChillMode()) {
|
if (scmInSafeMode()) {
|
||||||
SCMChillModeManager.getLogger().info(
|
SCMSafeModeManager.getLogger().info(
|
||||||
"SCM in chill mode. {} DataNodes registered, {} required.",
|
"SCM in safe mode. {} DataNodes registered, {} required.",
|
||||||
registeredDns, requiredDns);
|
registeredDns, requiredDns);
|
||||||
}
|
}
|
||||||
|
|
@ -15,7 +15,7 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hdds.scm.chillmode;
|
package org.apache.hadoop.hdds.scm.safemode;
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
@ -42,38 +42,38 @@
|
|||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Class defining Chill mode exit criteria for Pipelines.
|
* Class defining Safe mode exit criteria for Pipelines.
|
||||||
*
|
*
|
||||||
* This rule defines percentage of healthy pipelines need to be reported.
|
* This rule defines percentage of healthy pipelines need to be reported.
|
||||||
* Once chill mode exit happens, this rules take care of writes can go
|
* Once safe mode exit happens, this rules take care of writes can go
|
||||||
* through in a cluster.
|
* through in a cluster.
|
||||||
*/
|
*/
|
||||||
public class HealthyPipelineChillModeRule
|
public class HealthyPipelineSafeModeRule
|
||||||
extends ChillModeExitRule<PipelineReportFromDatanode>{
|
extends SafeModeExitRule<PipelineReportFromDatanode>{
|
||||||
|
|
||||||
public static final Logger LOG =
|
public static final Logger LOG =
|
||||||
LoggerFactory.getLogger(HealthyPipelineChillModeRule.class);
|
LoggerFactory.getLogger(HealthyPipelineSafeModeRule.class);
|
||||||
private final PipelineManager pipelineManager;
|
private final PipelineManager pipelineManager;
|
||||||
private final int healthyPipelineThresholdCount;
|
private final int healthyPipelineThresholdCount;
|
||||||
private int currentHealthyPipelineCount = 0;
|
private int currentHealthyPipelineCount = 0;
|
||||||
private final Set<DatanodeDetails> processedDatanodeDetails =
|
private final Set<DatanodeDetails> processedDatanodeDetails =
|
||||||
new HashSet<>();
|
new HashSet<>();
|
||||||
|
|
||||||
HealthyPipelineChillModeRule(String ruleName, EventQueue eventQueue,
|
HealthyPipelineSafeModeRule(String ruleName, EventQueue eventQueue,
|
||||||
PipelineManager pipelineManager,
|
PipelineManager pipelineManager,
|
||||||
SCMChillModeManager manager, Configuration configuration) {
|
SCMSafeModeManager manager, Configuration configuration) {
|
||||||
super(manager, ruleName, eventQueue);
|
super(manager, ruleName, eventQueue);
|
||||||
this.pipelineManager = pipelineManager;
|
this.pipelineManager = pipelineManager;
|
||||||
double healthyPipelinesPercent =
|
double healthyPipelinesPercent =
|
||||||
configuration.getDouble(HddsConfigKeys.
|
configuration.getDouble(HddsConfigKeys.
|
||||||
HDDS_SCM_CHILLMODE_HEALTHY_PIPELINE_THRESHOLD_PCT,
|
HDDS_SCM_SAFEMODE_HEALTHY_PIPELINE_THRESHOLD_PCT,
|
||||||
HddsConfigKeys.
|
HddsConfigKeys.
|
||||||
HDDS_SCM_CHILLMODE_HEALTHY_PIPELINE_THRESHOLD_PCT_DEFAULT);
|
HDDS_SCM_SAFEMODE_HEALTHY_PIPELINE_THRESHOLD_PCT_DEFAULT);
|
||||||
|
|
||||||
Preconditions.checkArgument(
|
Preconditions.checkArgument(
|
||||||
(healthyPipelinesPercent >= 0.0 && healthyPipelinesPercent <= 1.0),
|
(healthyPipelinesPercent >= 0.0 && healthyPipelinesPercent <= 1.0),
|
||||||
HddsConfigKeys.
|
HddsConfigKeys.
|
||||||
HDDS_SCM_CHILLMODE_HEALTHY_PIPELINE_THRESHOLD_PCT
|
HDDS_SCM_SAFEMODE_HEALTHY_PIPELINE_THRESHOLD_PCT
|
||||||
+ " value should be >= 0.0 and <= 1.0");
|
+ " value should be >= 0.0 and <= 1.0");
|
||||||
|
|
||||||
// As we want to wait for 3 node pipelines
|
// As we want to wait for 3 node pipelines
|
||||||
@ -108,7 +108,7 @@ protected boolean validate() {
|
|||||||
protected void process(PipelineReportFromDatanode
|
protected void process(PipelineReportFromDatanode
|
||||||
pipelineReportFromDatanode) {
|
pipelineReportFromDatanode) {
|
||||||
|
|
||||||
// When SCM is in chill mode for long time, already registered
|
// When SCM is in safe mode for long time, already registered
|
||||||
// datanode can send pipeline report again, then pipeline handler fires
|
// datanode can send pipeline report again, then pipeline handler fires
|
||||||
// processed report event, we should not consider this pipeline report
|
// processed report event, we should not consider this pipeline report
|
||||||
// from datanode again during threshold calculation.
|
// from datanode again during threshold calculation.
|
||||||
@ -138,9 +138,9 @@ protected void process(PipelineReportFromDatanode
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (scmInChillMode()) {
|
if (scmInSafeMode()) {
|
||||||
SCMChillModeManager.getLogger().info(
|
SCMSafeModeManager.getLogger().info(
|
||||||
"SCM in chill mode. Healthy pipelines reported count is {}, " +
|
"SCM in safe mode. Healthy pipelines reported count is {}, " +
|
||||||
"required healthy pipeline reported count is {}",
|
"required healthy pipeline reported count is {}",
|
||||||
currentHealthyPipelineCount, healthyPipelineThresholdCount);
|
currentHealthyPipelineCount, healthyPipelineThresholdCount);
|
||||||
}
|
}
|
@ -15,7 +15,7 @@
|
|||||||
* the License.
|
* the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.apache.hadoop.hdds.scm.chillmode;
|
package org.apache.hadoop.hdds.scm.safemode;
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
import com.google.common.base.Preconditions;
|
import com.google.common.base.Preconditions;
|
||||||
@ -44,13 +44,13 @@
|
|||||||
/**
|
/**
|
||||||
* This rule covers whether we have at least one datanode is reported for each
|
* This rule covers whether we have at least one datanode is reported for each
|
||||||
* pipeline. This rule is for all open containers, we have at least one
|
* pipeline. This rule is for all open containers, we have at least one
|
||||||
* replica available for read when we exit chill mode.
|
* replica available for read when we exit safe mode.
|
||||||
*/
|
*/
|
||||||
public class OneReplicaPipelineChillModeRule extends
|
public class OneReplicaPipelineSafeModeRule extends
|
||||||
ChillModeExitRule<PipelineReportFromDatanode> {
|
SafeModeExitRule<PipelineReportFromDatanode> {
|
||||||
|
|
||||||
private static final Logger LOG =
|
private static final Logger LOG =
|
||||||
LoggerFactory.getLogger(OneReplicaPipelineChillModeRule.class);
|
LoggerFactory.getLogger(OneReplicaPipelineSafeModeRule.class);
|
||||||
|
|
||||||
private int thresholdCount;
|
private int thresholdCount;
|
||||||
private Set<PipelineID> reportedPipelineIDSet = new HashSet<>();
|
private Set<PipelineID> reportedPipelineIDSet = new HashSet<>();
|
||||||
@ -58,21 +58,21 @@ public class OneReplicaPipelineChillModeRule extends
|
|||||||
private int currentReportedPipelineCount = 0;
|
private int currentReportedPipelineCount = 0;
|
||||||
|
|
||||||
|
|
||||||
public OneReplicaPipelineChillModeRule(String ruleName, EventQueue eventQueue,
|
public OneReplicaPipelineSafeModeRule(String ruleName, EventQueue eventQueue,
|
||||||
PipelineManager pipelineManager,
|
PipelineManager pipelineManager,
|
||||||
SCMChillModeManager chillModeManager, Configuration configuration) {
|
SCMSafeModeManager safeModeManager, Configuration configuration) {
|
||||||
super(chillModeManager, ruleName, eventQueue);
|
super(safeModeManager, ruleName, eventQueue);
|
||||||
this.pipelineManager = pipelineManager;
|
this.pipelineManager = pipelineManager;
|
||||||
|
|
||||||
double percent =
|
double percent =
|
||||||
configuration.getDouble(
|
configuration.getDouble(
|
||||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_ONE_NODE_REPORTED_PIPELINE_PCT,
|
HddsConfigKeys.HDDS_SCM_SAFEMODE_ONE_NODE_REPORTED_PIPELINE_PCT,
|
||||||
HddsConfigKeys.
|
HddsConfigKeys.
|
||||||
HDDS_SCM_CHILLMODE_ONE_NODE_REPORTED_PIPELINE_PCT_DEFAULT);
|
HDDS_SCM_SAFEMODE_ONE_NODE_REPORTED_PIPELINE_PCT_DEFAULT);
|
||||||
|
|
||||||
Preconditions.checkArgument((percent >= 0.0 && percent <= 1.0),
|
Preconditions.checkArgument((percent >= 0.0 && percent <= 1.0),
|
||||||
HddsConfigKeys.
|
HddsConfigKeys.
|
||||||
HDDS_SCM_CHILLMODE_ONE_NODE_REPORTED_PIPELINE_PCT +
|
HDDS_SCM_SAFEMODE_ONE_NODE_REPORTED_PIPELINE_PCT +
|
||||||
" value should be >= 0.0 and <= 1.0");
|
" value should be >= 0.0 and <= 1.0");
|
||||||
|
|
||||||
int totalPipelineCount =
|
int totalPipelineCount =
|
||||||
@ -125,9 +125,9 @@ protected void process(PipelineReportFromDatanode
|
|||||||
|
|
||||||
currentReportedPipelineCount = reportedPipelineIDSet.size();
|
currentReportedPipelineCount = reportedPipelineIDSet.size();
|
||||||
|
|
||||||
if (scmInChillMode()) {
|
if (scmInSafeMode()) {
|
||||||
SCMChillModeManager.getLogger().info(
|
SCMSafeModeManager.getLogger().info(
|
||||||
"SCM in chill mode. Pipelines with atleast one datanode reported " +
|
"SCM in safe mode. Pipelines with atleast one datanode reported " +
|
||||||
"count is {}, required atleast one datanode reported per " +
|
"count is {}, required atleast one datanode reported per " +
|
||||||
"pipeline count is {}",
|
"pipeline count is {}",
|
||||||
currentReportedPipelineCount, thresholdCount);
|
currentReportedPipelineCount, thresholdCount);
|
@ -16,7 +16,7 @@
|
|||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.apache.hadoop.hdds.scm.chillmode;
|
package org.apache.hadoop.hdds.scm.safemode;
|
||||||
|
|
||||||
import org.apache.hadoop.hdds.scm.exceptions.SCMException;
|
import org.apache.hadoop.hdds.scm.exceptions.SCMException;
|
||||||
|
|
@ -15,7 +15,7 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hdds.scm.chillmode;
|
package org.apache.hadoop.hdds.scm.safemode;
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
|
||||||
@ -36,29 +36,29 @@
|
|||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* StorageContainerManager enters chill mode on startup to allow system to
|
* StorageContainerManager enters safe mode on startup to allow system to
|
||||||
* reach a stable state before becoming fully functional. SCM will wait
|
* reach a stable state before becoming fully functional. SCM will wait
|
||||||
* for certain resources to be reported before coming out of chill mode.
|
* for certain resources to be reported before coming out of safe mode.
|
||||||
*
|
*
|
||||||
* ChillModeExitRule defines format to define new rules which must be satisfied
|
* SafeModeExitRule defines format to define new rules which must be satisfied
|
||||||
* to exit Chill mode.
|
* to exit Safe mode.
|
||||||
*
|
*
|
||||||
* Current ChillMode rules:
|
* Current SafeMode rules:
|
||||||
* 1. ContainerChillModeRule:
|
* 1. ContainerSafeModeRule:
|
||||||
* On every new datanode registration, it fires
|
* On every new datanode registration, it fires
|
||||||
* {@link SCMEvents#NODE_REGISTRATION_CONT_REPORT}. This rule handles this
|
* {@link SCMEvents#NODE_REGISTRATION_CONT_REPORT}. This rule handles this
|
||||||
* event. This rule process this report, increment the
|
* event. This rule process this report, increment the
|
||||||
* containerWithMinReplicas count when this reported replica is in the
|
* containerWithMinReplicas count when this reported replica is in the
|
||||||
* containerMap. Then validates if cutoff threshold for containers is meet.
|
* containerMap. Then validates if cutoff threshold for containers is meet.
|
||||||
*
|
*
|
||||||
* 2. DatanodeChillModeRule:
|
* 2. DatanodeSafeModeRule:
|
||||||
* On every new datanode registration, it fires
|
* On every new datanode registration, it fires
|
||||||
* {@link SCMEvents#NODE_REGISTRATION_CONT_REPORT}. This rule handles this
|
* {@link SCMEvents#NODE_REGISTRATION_CONT_REPORT}. This rule handles this
|
||||||
* event. This rule process this report, and check if this is new node, add
|
* event. This rule process this report, and check if this is new node, add
|
||||||
* to its reported node list. Then validate it cutoff threshold for minimum
|
* to its reported node list. Then validate it cutoff threshold for minimum
|
||||||
* number of datanode registered is met or not.
|
* number of datanode registered is met or not.
|
||||||
*
|
*
|
||||||
* 3. HealthyPipelineChillModeRule:
|
* 3. HealthyPipelineSafeModeRule:
|
||||||
* Once the pipelineReportHandler processes the
|
* Once the pipelineReportHandler processes the
|
||||||
* {@link SCMEvents#PIPELINE_REPORT}, it fires
|
* {@link SCMEvents#PIPELINE_REPORT}, it fires
|
||||||
* {@link SCMEvents#PROCESSED_PIPELINE_REPORT}. This rule handles this
|
* {@link SCMEvents#PROCESSED_PIPELINE_REPORT}. This rule handles this
|
||||||
@ -66,7 +66,7 @@
|
|||||||
* and increments current healthy pipeline count. Then validate it cutoff
|
* and increments current healthy pipeline count. Then validate it cutoff
|
||||||
* threshold for healthy pipeline is met or not.
|
* threshold for healthy pipeline is met or not.
|
||||||
*
|
*
|
||||||
* 4. OneReplicaPipelineChillModeRule:
|
* 4. OneReplicaPipelineSafeModeRule:
|
||||||
* Once the pipelineReportHandler processes the
|
* Once the pipelineReportHandler processes the
|
||||||
* {@link SCMEvents#PIPELINE_REPORT}, it fires
|
* {@link SCMEvents#PIPELINE_REPORT}, it fires
|
||||||
* {@link SCMEvents#PROCESSED_PIPELINE_REPORT}. This rule handles this
|
* {@link SCMEvents#PROCESSED_PIPELINE_REPORT}. This rule handles this
|
||||||
@ -75,19 +75,19 @@
|
|||||||
* per pipeline is met or not.
|
* per pipeline is met or not.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class SCMChillModeManager {
|
public class SCMSafeModeManager {
|
||||||
|
|
||||||
private static final Logger LOG =
|
private static final Logger LOG =
|
||||||
LoggerFactory.getLogger(SCMChillModeManager.class);
|
LoggerFactory.getLogger(SCMSafeModeManager.class);
|
||||||
private final boolean isChillModeEnabled;
|
private final boolean isSafeModeEnabled;
|
||||||
private AtomicBoolean inChillMode = new AtomicBoolean(true);
|
private AtomicBoolean inSafeMode = new AtomicBoolean(true);
|
||||||
|
|
||||||
private Map<String, ChillModeExitRule> exitRules = new HashMap(1);
|
private Map<String, SafeModeExitRule> exitRules = new HashMap(1);
|
||||||
private Configuration config;
|
private Configuration config;
|
||||||
private static final String CONT_EXIT_RULE = "ContainerChillModeRule";
|
private static final String CONT_EXIT_RULE = "ContainerSafeModeRule";
|
||||||
private static final String DN_EXIT_RULE = "DataNodeChillModeRule";
|
private static final String DN_EXIT_RULE = "DataNodeSafeModeRule";
|
||||||
private static final String HEALTHY_PIPELINE_EXIT_RULE =
|
private static final String HEALTHY_PIPELINE_EXIT_RULE =
|
||||||
"HealthyPipelineChillModeRule";
|
"HealthyPipelineSafeModeRule";
|
||||||
private static final String ATLEAST_ONE_DATANODE_REPORTED_PIPELINE_EXIT_RULE =
|
private static final String ATLEAST_ONE_DATANODE_REPORTED_PIPELINE_EXIT_RULE =
|
||||||
"AtleastOneDatanodeReportedRule";
|
"AtleastOneDatanodeReportedRule";
|
||||||
|
|
||||||
@ -96,57 +96,57 @@ public class SCMChillModeManager {
|
|||||||
private final EventQueue eventPublisher;
|
private final EventQueue eventPublisher;
|
||||||
private final PipelineManager pipelineManager;
|
private final PipelineManager pipelineManager;
|
||||||
|
|
||||||
public SCMChillModeManager(Configuration conf,
|
public SCMSafeModeManager(Configuration conf,
|
||||||
List<ContainerInfo> allContainers, PipelineManager pipelineManager,
|
List<ContainerInfo> allContainers, PipelineManager pipelineManager,
|
||||||
EventQueue eventQueue) {
|
EventQueue eventQueue) {
|
||||||
this.config = conf;
|
this.config = conf;
|
||||||
this.pipelineManager = pipelineManager;
|
this.pipelineManager = pipelineManager;
|
||||||
this.eventPublisher = eventQueue;
|
this.eventPublisher = eventQueue;
|
||||||
this.isChillModeEnabled = conf.getBoolean(
|
this.isSafeModeEnabled = conf.getBoolean(
|
||||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED,
|
HddsConfigKeys.HDDS_SCM_SAFEMODE_ENABLED,
|
||||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED_DEFAULT);
|
HddsConfigKeys.HDDS_SCM_SAFEMODE_ENABLED_DEFAULT);
|
||||||
|
|
||||||
if (isChillModeEnabled) {
|
if (isSafeModeEnabled) {
|
||||||
ContainerChillModeRule containerChillModeRule =
|
ContainerSafeModeRule containerSafeModeRule =
|
||||||
new ContainerChillModeRule(CONT_EXIT_RULE, eventQueue, config,
|
new ContainerSafeModeRule(CONT_EXIT_RULE, eventQueue, config,
|
||||||
allContainers, this);
|
allContainers, this);
|
||||||
DataNodeChillModeRule dataNodeChillModeRule =
|
DataNodeSafeModeRule dataNodeSafeModeRule =
|
||||||
new DataNodeChillModeRule(DN_EXIT_RULE, eventQueue, config, this);
|
new DataNodeSafeModeRule(DN_EXIT_RULE, eventQueue, config, this);
|
||||||
exitRules.put(CONT_EXIT_RULE, containerChillModeRule);
|
exitRules.put(CONT_EXIT_RULE, containerSafeModeRule);
|
||||||
exitRules.put(DN_EXIT_RULE, dataNodeChillModeRule);
|
exitRules.put(DN_EXIT_RULE, dataNodeSafeModeRule);
|
||||||
if (conf.getBoolean(
|
if (conf.getBoolean(
|
||||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK,
|
HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_AVAILABILITY_CHECK,
|
||||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK_DEFAULT)
|
HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_AVAILABILITY_CHECK_DEFAULT)
|
||||||
&& pipelineManager != null) {
|
&& pipelineManager != null) {
|
||||||
HealthyPipelineChillModeRule healthyPipelineChillModeRule =
|
HealthyPipelineSafeModeRule healthyPipelineSafeModeRule =
|
||||||
new HealthyPipelineChillModeRule(HEALTHY_PIPELINE_EXIT_RULE,
|
new HealthyPipelineSafeModeRule(HEALTHY_PIPELINE_EXIT_RULE,
|
||||||
eventQueue, pipelineManager,
|
eventQueue, pipelineManager,
|
||||||
this, config);
|
this, config);
|
||||||
OneReplicaPipelineChillModeRule oneReplicaPipelineChillModeRule =
|
OneReplicaPipelineSafeModeRule oneReplicaPipelineSafeModeRule =
|
||||||
new OneReplicaPipelineChillModeRule(
|
new OneReplicaPipelineSafeModeRule(
|
||||||
ATLEAST_ONE_DATANODE_REPORTED_PIPELINE_EXIT_RULE, eventQueue,
|
ATLEAST_ONE_DATANODE_REPORTED_PIPELINE_EXIT_RULE, eventQueue,
|
||||||
pipelineManager, this, conf);
|
pipelineManager, this, conf);
|
||||||
exitRules.put(HEALTHY_PIPELINE_EXIT_RULE, healthyPipelineChillModeRule);
|
exitRules.put(HEALTHY_PIPELINE_EXIT_RULE, healthyPipelineSafeModeRule);
|
||||||
exitRules.put(ATLEAST_ONE_DATANODE_REPORTED_PIPELINE_EXIT_RULE,
|
exitRules.put(ATLEAST_ONE_DATANODE_REPORTED_PIPELINE_EXIT_RULE,
|
||||||
oneReplicaPipelineChillModeRule);
|
oneReplicaPipelineSafeModeRule);
|
||||||
}
|
}
|
||||||
emitChillModeStatus();
|
emitSafeModeStatus();
|
||||||
} else {
|
} else {
|
||||||
exitChillMode(eventQueue);
|
exitSafeMode(eventQueue);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Emit Chill mode status.
|
* Emit Safe mode status.
|
||||||
*/
|
*/
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
public void emitChillModeStatus() {
|
public void emitSafeModeStatus() {
|
||||||
eventPublisher.fireEvent(SCMEvents.CHILL_MODE_STATUS,
|
eventPublisher.fireEvent(SCMEvents.SAFE_MODE_STATUS,
|
||||||
new ChillModeStatus(getInChillMode()));
|
new SafeModeStatus(getInSafeMode()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public synchronized void validateChillModeExitRules(String ruleName,
|
public synchronized void validateSafeModeExitRules(String ruleName,
|
||||||
EventPublisher eventQueue) {
|
EventPublisher eventQueue) {
|
||||||
|
|
||||||
if (exitRules.get(ruleName) != null) {
|
if (exitRules.get(ruleName) != null) {
|
||||||
@ -158,47 +158,47 @@ public synchronized void validateChillModeExitRules(String ruleName,
|
|||||||
|
|
||||||
|
|
||||||
if (validatedRules.size() == exitRules.size()) {
|
if (validatedRules.size() == exitRules.size()) {
|
||||||
// All rules are satisfied, we can exit chill mode.
|
// All rules are satisfied, we can exit safe mode.
|
||||||
LOG.info("ScmChillModeManager, all rules are successfully validated");
|
LOG.info("ScmSafeModeManager, all rules are successfully validated");
|
||||||
exitChillMode(eventQueue);
|
exitSafeMode(eventQueue);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Exit chill mode. It does following actions:
|
* Exit safe mode. It does following actions:
|
||||||
* 1. Set chill mode status to false.
|
* 1. Set safe mode status to false.
|
||||||
* 2. Emits START_REPLICATION for ReplicationManager.
|
* 2. Emits START_REPLICATION for ReplicationManager.
|
||||||
* 3. Cleanup resources.
|
* 3. Cleanup resources.
|
||||||
* 4. Emit chill mode status.
|
* 4. Emit safe mode status.
|
||||||
* @param eventQueue
|
* @param eventQueue
|
||||||
*/
|
*/
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
public void exitChillMode(EventPublisher eventQueue) {
|
public void exitSafeMode(EventPublisher eventQueue) {
|
||||||
LOG.info("SCM exiting chill mode.");
|
LOG.info("SCM exiting safe mode.");
|
||||||
setInChillMode(false);
|
setInSafeMode(false);
|
||||||
|
|
||||||
// TODO: Remove handler registration as there is no need to listen to
|
// TODO: Remove handler registration as there is no need to listen to
|
||||||
// register events anymore.
|
// register events anymore.
|
||||||
|
|
||||||
emitChillModeStatus();
|
emitSafeModeStatus();
|
||||||
// TODO: #CLUTIL if we reenter chill mode the fixed interval pipeline
|
// TODO: #CLUTIL if we reenter safe mode the fixed interval pipeline
|
||||||
// creation job needs to stop
|
// creation job needs to stop
|
||||||
pipelineManager.startPipelineCreator();
|
pipelineManager.startPipelineCreator();
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean getInChillMode() {
|
public boolean getInSafeMode() {
|
||||||
if (!isChillModeEnabled) {
|
if (!isSafeModeEnabled) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return inChillMode.get();
|
return inSafeMode.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set chill mode status.
|
* Set safe mode status.
|
||||||
*/
|
*/
|
||||||
public void setInChillMode(boolean inChillMode) {
|
public void setInSafeMode(boolean inSafeMode) {
|
||||||
this.inChillMode.set(inChillMode);
|
this.inSafeMode.set(inSafeMode);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Logger getLogger() {
|
public static Logger getLogger() {
|
||||||
@ -207,35 +207,35 @@ public static Logger getLogger() {
|
|||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
public double getCurrentContainerThreshold() {
|
public double getCurrentContainerThreshold() {
|
||||||
return ((ContainerChillModeRule) exitRules.get(CONT_EXIT_RULE))
|
return ((ContainerSafeModeRule) exitRules.get(CONT_EXIT_RULE))
|
||||||
.getCurrentContainerThreshold();
|
.getCurrentContainerThreshold();
|
||||||
}
|
}
|
||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
public HealthyPipelineChillModeRule getHealthyPipelineChillModeRule() {
|
public HealthyPipelineSafeModeRule getHealthyPipelineSafeModeRule() {
|
||||||
return (HealthyPipelineChillModeRule)
|
return (HealthyPipelineSafeModeRule)
|
||||||
exitRules.get(HEALTHY_PIPELINE_EXIT_RULE);
|
exitRules.get(HEALTHY_PIPELINE_EXIT_RULE);
|
||||||
}
|
}
|
||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
public OneReplicaPipelineChillModeRule getOneReplicaPipelineChillModeRule() {
|
public OneReplicaPipelineSafeModeRule getOneReplicaPipelineSafeModeRule() {
|
||||||
return (OneReplicaPipelineChillModeRule)
|
return (OneReplicaPipelineSafeModeRule)
|
||||||
exitRules.get(ATLEAST_ONE_DATANODE_REPORTED_PIPELINE_EXIT_RULE);
|
exitRules.get(ATLEAST_ONE_DATANODE_REPORTED_PIPELINE_EXIT_RULE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Class used during ChillMode status event.
|
* Class used during SafeMode status event.
|
||||||
*/
|
*/
|
||||||
public static class ChillModeStatus {
|
public static class SafeModeStatus {
|
||||||
|
|
||||||
private boolean chillModeStatus;
|
private boolean safeModeStatus;
|
||||||
public ChillModeStatus(boolean chillModeState) {
|
public SafeModeStatus(boolean safeModeState) {
|
||||||
this.chillModeStatus = chillModeState;
|
this.safeModeStatus = safeModeState;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean getChillModeStatus() {
|
public boolean getSafeModeStatus() {
|
||||||
return chillModeStatus;
|
return safeModeStatus;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -15,7 +15,7 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hdds.scm.chillmode;
|
package org.apache.hadoop.hdds.scm.safemode;
|
||||||
|
|
||||||
import org.apache.hadoop.hdds.server.events.EventHandler;
|
import org.apache.hadoop.hdds.server.events.EventHandler;
|
||||||
import org.apache.hadoop.hdds.server.events.EventPublisher;
|
import org.apache.hadoop.hdds.server.events.EventPublisher;
|
||||||
@ -23,31 +23,31 @@
|
|||||||
import org.apache.hadoop.hdds.server.events.TypedEvent;
|
import org.apache.hadoop.hdds.server.events.TypedEvent;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Abstract class for ChillModeExitRules. When a new rule is added, the new
|
* Abstract class for SafeModeExitRules. When a new rule is added, the new
|
||||||
* rule should extend this abstract class.
|
* rule should extend this abstract class.
|
||||||
*
|
*
|
||||||
* Each rule Should do:
|
* Each rule Should do:
|
||||||
* 1. Should add a handler for the event it is looking for during the
|
* 1. Should add a handler for the event it is looking for during the
|
||||||
* initialization of the rule.
|
* initialization of the rule.
|
||||||
* 2. Add the rule in ScmChillModeManager to list of the rules.
|
* 2. Add the rule in ScmSafeModeManager to list of the rules.
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
* @param <T>
|
* @param <T>
|
||||||
*/
|
*/
|
||||||
public abstract class ChillModeExitRule<T> implements EventHandler<T> {
|
public abstract class SafeModeExitRule<T> implements EventHandler<T> {
|
||||||
|
|
||||||
private final SCMChillModeManager chillModeManager;
|
private final SCMSafeModeManager safeModeManager;
|
||||||
private final String ruleName;
|
private final String ruleName;
|
||||||
|
|
||||||
public ChillModeExitRule(SCMChillModeManager chillModeManager,
|
public SafeModeExitRule(SCMSafeModeManager safeModeManager,
|
||||||
String ruleName, EventQueue eventQueue) {
|
String ruleName, EventQueue eventQueue) {
|
||||||
this.chillModeManager = chillModeManager;
|
this.safeModeManager = safeModeManager;
|
||||||
this.ruleName = ruleName;
|
this.ruleName = ruleName;
|
||||||
eventQueue.addHandler(getEventType(), this);
|
eventQueue.addHandler(getEventType(), this);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return's the name of this ChillModeExit Rule.
|
* Return's the name of this SafeModeExit Rule.
|
||||||
* @return ruleName
|
* @return ruleName
|
||||||
*/
|
*/
|
||||||
public String getRuleName() {
|
public String getRuleName() {
|
||||||
@ -55,7 +55,7 @@ public String getRuleName() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return's the event type this chillMode exit rule handles.
|
* Return's the event type this safeMode exit rule handles.
|
||||||
* @return TypedEvent
|
* @return TypedEvent
|
||||||
*/
|
*/
|
||||||
protected abstract TypedEvent<T> getEventType();
|
protected abstract TypedEvent<T> getEventType();
|
||||||
@ -81,11 +81,11 @@ public String getRuleName() {
|
|||||||
@Override
|
@Override
|
||||||
public final void onMessage(T report, EventPublisher publisher) {
|
public final void onMessage(T report, EventPublisher publisher) {
|
||||||
|
|
||||||
// TODO: when we have remove handlers, we can remove getInChillmode check
|
// TODO: when we have remove handlers, we can remove getInSafemode check
|
||||||
|
|
||||||
if (scmInChillMode()) {
|
if (scmInSafeMode()) {
|
||||||
if (validate()) {
|
if (validate()) {
|
||||||
chillModeManager.validateChillModeExitRules(ruleName, publisher);
|
safeModeManager.validateSafeModeExitRules(ruleName, publisher);
|
||||||
cleanup();
|
cleanup();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -93,18 +93,18 @@ public final void onMessage(T report, EventPublisher publisher) {
|
|||||||
process(report);
|
process(report);
|
||||||
|
|
||||||
if (validate()) {
|
if (validate()) {
|
||||||
chillModeManager.validateChillModeExitRules(ruleName, publisher);
|
safeModeManager.validateSafeModeExitRules(ruleName, publisher);
|
||||||
cleanup();
|
cleanup();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return true if SCM is in chill mode, else false.
|
* Return true if SCM is in safe mode, else false.
|
||||||
* @return boolean
|
* @return boolean
|
||||||
*/
|
*/
|
||||||
protected boolean scmInChillMode() {
|
protected boolean scmInSafeMode() {
|
||||||
return chillModeManager.getInChillMode();
|
return safeModeManager.getInSafeMode();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
@ -15,7 +15,7 @@
|
|||||||
* the License.
|
* the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.apache.hadoop.hdds.scm.chillmode;
|
package org.apache.hadoop.hdds.scm.safemode;
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.hdds.HddsConfigKeys;
|
import org.apache.hadoop.hdds.HddsConfigKeys;
|
||||||
@ -24,7 +24,7 @@
|
|||||||
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
|
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
|
||||||
import org.apache.hadoop.hdds.scm.pipeline.PipelineManager;
|
import org.apache.hadoop.hdds.scm.pipeline.PipelineManager;
|
||||||
import org.apache.hadoop.hdds.scm.server.SCMClientProtocolServer;
|
import org.apache.hadoop.hdds.scm.server.SCMClientProtocolServer;
|
||||||
import org.apache.hadoop.hdds.scm.chillmode.SCMChillModeManager.ChillModeStatus;
|
import org.apache.hadoop.hdds.scm.safemode.SCMSafeModeManager.SafeModeStatus;
|
||||||
import org.apache.hadoop.hdds.server.events.EventHandler;
|
import org.apache.hadoop.hdds.server.events.EventHandler;
|
||||||
import org.apache.hadoop.hdds.server.events.EventPublisher;
|
import org.apache.hadoop.hdds.server.events.EventPublisher;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
@ -37,30 +37,30 @@
|
|||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Class to handle the activities needed to be performed after exiting chill
|
* Class to handle the activities needed to be performed after exiting safe
|
||||||
* mode.
|
* mode.
|
||||||
*/
|
*/
|
||||||
public class ChillModeHandler implements EventHandler<ChillModeStatus> {
|
public class SafeModeHandler implements EventHandler<SafeModeStatus> {
|
||||||
|
|
||||||
private static final Logger LOG =
|
private static final Logger LOG =
|
||||||
LoggerFactory.getLogger(ChillModeHandler.class);
|
LoggerFactory.getLogger(SafeModeHandler.class);
|
||||||
|
|
||||||
private final SCMClientProtocolServer scmClientProtocolServer;
|
private final SCMClientProtocolServer scmClientProtocolServer;
|
||||||
private final BlockManager scmBlockManager;
|
private final BlockManager scmBlockManager;
|
||||||
private final long waitTime;
|
private final long waitTime;
|
||||||
private final AtomicBoolean isInChillMode = new AtomicBoolean(true);
|
private final AtomicBoolean isInSafeMode = new AtomicBoolean(true);
|
||||||
private final ReplicationManager replicationManager;
|
private final ReplicationManager replicationManager;
|
||||||
|
|
||||||
private final PipelineManager scmPipelineManager;
|
private final PipelineManager scmPipelineManager;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ChillModeHandler, to handle the logic once we exit chill mode.
|
* SafeModeHandler, to handle the logic once we exit safe mode.
|
||||||
* @param configuration
|
* @param configuration
|
||||||
* @param clientProtocolServer
|
* @param clientProtocolServer
|
||||||
* @param blockManager
|
* @param blockManager
|
||||||
* @param replicationManager
|
* @param replicationManager
|
||||||
*/
|
*/
|
||||||
public ChillModeHandler(Configuration configuration,
|
public SafeModeHandler(Configuration configuration,
|
||||||
SCMClientProtocolServer clientProtocolServer,
|
SCMClientProtocolServer clientProtocolServer,
|
||||||
BlockManager blockManager,
|
BlockManager blockManager,
|
||||||
ReplicationManager replicationManager, PipelineManager pipelineManager) {
|
ReplicationManager replicationManager, PipelineManager pipelineManager) {
|
||||||
@ -73,43 +73,43 @@ public ChillModeHandler(Configuration configuration,
|
|||||||
Objects.requireNonNull(pipelineManager, "PipelineManager object cannot " +
|
Objects.requireNonNull(pipelineManager, "PipelineManager object cannot " +
|
||||||
"be" + "null");
|
"be" + "null");
|
||||||
this.waitTime = configuration.getTimeDuration(
|
this.waitTime = configuration.getTimeDuration(
|
||||||
HddsConfigKeys.HDDS_SCM_WAIT_TIME_AFTER_CHILL_MODE_EXIT,
|
HddsConfigKeys.HDDS_SCM_WAIT_TIME_AFTER_SAFE_MODE_EXIT,
|
||||||
HddsConfigKeys.HDDS_SCM_WAIT_TIME_AFTER_CHILL_MODE_EXIT_DEFAULT,
|
HddsConfigKeys.HDDS_SCM_WAIT_TIME_AFTER_SAFE_MODE_EXIT_DEFAULT,
|
||||||
TimeUnit.MILLISECONDS);
|
TimeUnit.MILLISECONDS);
|
||||||
this.scmClientProtocolServer = clientProtocolServer;
|
this.scmClientProtocolServer = clientProtocolServer;
|
||||||
this.scmBlockManager = blockManager;
|
this.scmBlockManager = blockManager;
|
||||||
this.replicationManager = replicationManager;
|
this.replicationManager = replicationManager;
|
||||||
this.scmPipelineManager = pipelineManager;
|
this.scmPipelineManager = pipelineManager;
|
||||||
|
|
||||||
final boolean chillModeEnabled = configuration.getBoolean(
|
final boolean safeModeEnabled = configuration.getBoolean(
|
||||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED,
|
HddsConfigKeys.HDDS_SCM_SAFEMODE_ENABLED,
|
||||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED_DEFAULT);
|
HddsConfigKeys.HDDS_SCM_SAFEMODE_ENABLED_DEFAULT);
|
||||||
isInChillMode.set(chillModeEnabled);
|
isInSafeMode.set(safeModeEnabled);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set ChillMode status based on
|
* Set SafeMode status based on
|
||||||
* {@link org.apache.hadoop.hdds.scm.events.SCMEvents#CHILL_MODE_STATUS}.
|
* {@link org.apache.hadoop.hdds.scm.events.SCMEvents#SAFE_MODE_STATUS}.
|
||||||
*
|
*
|
||||||
* Inform BlockManager, ScmClientProtocolServer and replicationAcitivity
|
* Inform BlockManager, ScmClientProtocolServer and replicationAcitivity
|
||||||
* status about chillMode status.
|
* status about safeMode status.
|
||||||
*
|
*
|
||||||
* @param chillModeStatus
|
* @param safeModeStatus
|
||||||
* @param publisher
|
* @param publisher
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void onMessage(ChillModeStatus chillModeStatus,
|
public void onMessage(SafeModeStatus safeModeStatus,
|
||||||
EventPublisher publisher) {
|
EventPublisher publisher) {
|
||||||
|
|
||||||
isInChillMode.set(chillModeStatus.getChillModeStatus());
|
isInSafeMode.set(safeModeStatus.getSafeModeStatus());
|
||||||
scmClientProtocolServer.setChillModeStatus(isInChillMode.get());
|
scmClientProtocolServer.setSafeModeStatus(isInSafeMode.get());
|
||||||
scmBlockManager.setChillModeStatus(isInChillMode.get());
|
scmBlockManager.setSafeModeStatus(isInSafeMode.get());
|
||||||
|
|
||||||
if (!isInChillMode.get()) {
|
if (!isInSafeMode.get()) {
|
||||||
final Thread chillModeExitThread = new Thread(() -> {
|
final Thread safeModeExitThread = new Thread(() -> {
|
||||||
try {
|
try {
|
||||||
Thread.sleep(waitTime);
|
Thread.sleep(waitTime);
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
@ -119,8 +119,8 @@ public void onMessage(ChillModeStatus chillModeStatus,
|
|||||||
cleanupPipelines();
|
cleanupPipelines();
|
||||||
});
|
});
|
||||||
|
|
||||||
chillModeExitThread.setDaemon(true);
|
safeModeExitThread.setDaemon(true);
|
||||||
chillModeExitThread.start();
|
safeModeExitThread.start();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -139,8 +139,8 @@ private void cleanupPipelines() {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean getChillModeStatus() {
|
public boolean getSafeModeStatus() {
|
||||||
return isInChillMode.get();
|
return isInSafeMode.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -16,7 +16,7 @@
|
|||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.apache.hadoop.hdds.scm.chillmode;
|
package org.apache.hadoop.hdds.scm.safemode;
|
||||||
|
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
@ -26,32 +26,32 @@
|
|||||||
import org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes;
|
import org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Chill mode pre-check for SCM operations.
|
* Safe mode pre-check for SCM operations.
|
||||||
* */
|
* */
|
||||||
public class ChillModePrecheck implements Precheck<ScmOps> {
|
public class SafeModePrecheck implements Precheck<ScmOps> {
|
||||||
|
|
||||||
private AtomicBoolean inChillMode;
|
private AtomicBoolean inSafeMode;
|
||||||
public static final String PRECHECK_TYPE = "ChillModePrecheck";
|
public static final String PRECHECK_TYPE = "SafeModePrecheck";
|
||||||
|
|
||||||
public ChillModePrecheck(Configuration conf) {
|
public SafeModePrecheck(Configuration conf) {
|
||||||
boolean chillModeEnabled = conf.getBoolean(
|
boolean safeModeEnabled = conf.getBoolean(
|
||||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED,
|
HddsConfigKeys.HDDS_SCM_SAFEMODE_ENABLED,
|
||||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED_DEFAULT);
|
HddsConfigKeys.HDDS_SCM_SAFEMODE_ENABLED_DEFAULT);
|
||||||
if (chillModeEnabled) {
|
if (safeModeEnabled) {
|
||||||
inChillMode = new AtomicBoolean(true);
|
inSafeMode = new AtomicBoolean(true);
|
||||||
} else {
|
} else {
|
||||||
inChillMode = new AtomicBoolean(false);
|
inSafeMode = new AtomicBoolean(false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean check(ScmOps op) throws SCMException {
|
public boolean check(ScmOps op) throws SCMException {
|
||||||
if (inChillMode.get() && ChillModeRestrictedOps
|
if (inSafeMode.get() && SafeModeRestrictedOps
|
||||||
.isRestrictedInChillMode(op)) {
|
.isRestrictedInSafeMode(op)) {
|
||||||
throw new SCMException("ChillModePrecheck failed for " + op,
|
throw new SCMException("SafeModePrecheck failed for " + op,
|
||||||
ResultCodes.CHILL_MODE_EXCEPTION);
|
ResultCodes.SAFE_MODE_EXCEPTION);
|
||||||
}
|
}
|
||||||
return inChillMode.get();
|
return inSafeMode.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -59,11 +59,11 @@ public String type() {
|
|||||||
return PRECHECK_TYPE;
|
return PRECHECK_TYPE;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isInChillMode() {
|
public boolean isInSafeMode() {
|
||||||
return inChillMode.get();
|
return inSafeMode.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setInChillMode(boolean inChillMode) {
|
public void setInSafeMode(boolean inSafeMode) {
|
||||||
this.inChillMode.set(inChillMode);
|
this.inSafeMode.set(inSafeMode);
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -15,19 +15,19 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hdds.scm.chillmode;
|
package org.apache.hadoop.hdds.scm.safemode;
|
||||||
|
|
||||||
import java.util.EnumSet;
|
import java.util.EnumSet;
|
||||||
|
|
||||||
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ScmOps;
|
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ScmOps;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Operations restricted in SCM chill mode.
|
* Operations restricted in SCM safe mode.
|
||||||
*/
|
*/
|
||||||
public final class ChillModeRestrictedOps {
|
public final class SafeModeRestrictedOps {
|
||||||
private static EnumSet restrictedOps = EnumSet.noneOf(ScmOps.class);
|
private static EnumSet restrictedOps = EnumSet.noneOf(ScmOps.class);
|
||||||
|
|
||||||
private ChillModeRestrictedOps() {
|
private SafeModeRestrictedOps() {
|
||||||
}
|
}
|
||||||
|
|
||||||
static {
|
static {
|
||||||
@ -35,7 +35,7 @@ private ChillModeRestrictedOps() {
|
|||||||
restrictedOps.add(ScmOps.allocateContainer);
|
restrictedOps.add(ScmOps.allocateContainer);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean isRestrictedInChillMode(ScmOps opName) {
|
public static boolean isRestrictedInSafeMode(ScmOps opName) {
|
||||||
return restrictedOps.contains(opName);
|
return restrictedOps.contains(opName);
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -15,4 +15,4 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hdds.scm.chillmode;
|
package org.apache.hadoop.hdds.scm.safemode;
|
@ -223,9 +223,9 @@ public List<DeleteBlockGroupResult> deleteKeyBlocks(
|
|||||||
scmEx)
|
scmEx)
|
||||||
);
|
);
|
||||||
switch (scmEx.getResult()) {
|
switch (scmEx.getResult()) {
|
||||||
case CHILL_MODE_EXCEPTION:
|
case SAFE_MODE_EXCEPTION:
|
||||||
resultCode = ScmBlockLocationProtocolProtos.DeleteScmBlockResult
|
resultCode = ScmBlockLocationProtocolProtos.DeleteScmBlockResult
|
||||||
.Result.chillMode;
|
.Result.safeMode;
|
||||||
break;
|
break;
|
||||||
case FAILED_TO_FIND_BLOCK:
|
case FAILED_TO_FIND_BLOCK:
|
||||||
resultCode = ScmBlockLocationProtocolProtos.DeleteScmBlockResult
|
resultCode = ScmBlockLocationProtocolProtos.DeleteScmBlockResult
|
||||||
|
@ -35,7 +35,7 @@
|
|||||||
import org.apache.hadoop.hdds.scm.HddsServerUtil;
|
import org.apache.hadoop.hdds.scm.HddsServerUtil;
|
||||||
import org.apache.hadoop.hdds.scm.ScmInfo;
|
import org.apache.hadoop.hdds.scm.ScmInfo;
|
||||||
import org.apache.hadoop.hdds.scm.ScmUtils;
|
import org.apache.hadoop.hdds.scm.ScmUtils;
|
||||||
import org.apache.hadoop.hdds.scm.chillmode.ChillModePrecheck;
|
import org.apache.hadoop.hdds.scm.safemode.SafeModePrecheck;
|
||||||
import org.apache.hadoop.hdds.scm.container.ContainerID;
|
import org.apache.hadoop.hdds.scm.container.ContainerID;
|
||||||
import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException;
|
import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException;
|
||||||
import org.apache.hadoop.hdds.scm.container.ContainerReplica;
|
import org.apache.hadoop.hdds.scm.container.ContainerReplica;
|
||||||
@ -101,13 +101,13 @@ public class SCMClientProtocolServer implements
|
|||||||
private final InetSocketAddress clientRpcAddress;
|
private final InetSocketAddress clientRpcAddress;
|
||||||
private final StorageContainerManager scm;
|
private final StorageContainerManager scm;
|
||||||
private final OzoneConfiguration conf;
|
private final OzoneConfiguration conf;
|
||||||
private ChillModePrecheck chillModePrecheck;
|
private SafeModePrecheck safeModePrecheck;
|
||||||
|
|
||||||
public SCMClientProtocolServer(OzoneConfiguration conf,
|
public SCMClientProtocolServer(OzoneConfiguration conf,
|
||||||
StorageContainerManager scm) throws IOException {
|
StorageContainerManager scm) throws IOException {
|
||||||
this.scm = scm;
|
this.scm = scm;
|
||||||
this.conf = conf;
|
this.conf = conf;
|
||||||
chillModePrecheck = new ChillModePrecheck(conf);
|
safeModePrecheck = new SafeModePrecheck(conf);
|
||||||
final int handlerCount =
|
final int handlerCount =
|
||||||
conf.getInt(OZONE_SCM_HANDLER_COUNT_KEY,
|
conf.getInt(OZONE_SCM_HANDLER_COUNT_KEY,
|
||||||
OZONE_SCM_HANDLER_COUNT_DEFAULT);
|
OZONE_SCM_HANDLER_COUNT_DEFAULT);
|
||||||
@ -177,7 +177,7 @@ public String getRpcRemoteUsername() {
|
|||||||
public ContainerWithPipeline allocateContainer(HddsProtos.ReplicationType
|
public ContainerWithPipeline allocateContainer(HddsProtos.ReplicationType
|
||||||
replicationType, HddsProtos.ReplicationFactor factor,
|
replicationType, HddsProtos.ReplicationFactor factor,
|
||||||
String owner) throws IOException {
|
String owner) throws IOException {
|
||||||
ScmUtils.preCheck(ScmOps.allocateContainer, chillModePrecheck);
|
ScmUtils.preCheck(ScmOps.allocateContainer, safeModePrecheck);
|
||||||
getScm().checkAdminAccess(getRpcRemoteUsername());
|
getScm().checkAdminAccess(getRpcRemoteUsername());
|
||||||
|
|
||||||
final ContainerInfo container = scm.getContainerManager()
|
final ContainerInfo container = scm.getContainerManager()
|
||||||
@ -220,14 +220,14 @@ public ContainerWithPipeline getContainerWithPipeline(long containerID)
|
|||||||
auditMap.put("containerID", String.valueOf(containerID));
|
auditMap.put("containerID", String.valueOf(containerID));
|
||||||
boolean auditSuccess = true;
|
boolean auditSuccess = true;
|
||||||
try {
|
try {
|
||||||
if (chillModePrecheck.isInChillMode()) {
|
if (safeModePrecheck.isInSafeMode()) {
|
||||||
ContainerInfo contInfo = scm.getContainerManager()
|
ContainerInfo contInfo = scm.getContainerManager()
|
||||||
.getContainer(ContainerID.valueof(containerID));
|
.getContainer(ContainerID.valueof(containerID));
|
||||||
if (contInfo.isOpen()) {
|
if (contInfo.isOpen()) {
|
||||||
if (!hasRequiredReplicas(contInfo)) {
|
if (!hasRequiredReplicas(contInfo)) {
|
||||||
throw new SCMException("Open container " + containerID + " doesn't"
|
throw new SCMException("Open container " + containerID + " doesn't"
|
||||||
+ " have enough replicas to service this operation in "
|
+ " have enough replicas to service this operation in "
|
||||||
+ "Chill mode.", ResultCodes.CHILL_MODE_EXCEPTION);
|
+ "Safe mode.", ResultCodes.SAFE_MODE_EXCEPTION);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -446,31 +446,31 @@ public ScmInfo getScmInfo() throws IOException {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if SCM is in chill mode.
|
* Check if SCM is in safe mode.
|
||||||
*
|
*
|
||||||
* @return Returns true if SCM is in chill mode else returns false.
|
* @return Returns true if SCM is in safe mode else returns false.
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public boolean inChillMode() throws IOException {
|
public boolean inSafeMode() throws IOException {
|
||||||
AUDIT.logReadSuccess(
|
AUDIT.logReadSuccess(
|
||||||
buildAuditMessageForSuccess(SCMAction.IN_CHILL_MODE, null)
|
buildAuditMessageForSuccess(SCMAction.IN_SAFE_MODE, null)
|
||||||
);
|
);
|
||||||
return scm.isInChillMode();
|
return scm.isInSafeMode();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Force SCM out of Chill mode.
|
* Force SCM out of Safe mode.
|
||||||
*
|
*
|
||||||
* @return returns true if operation is successful.
|
* @return returns true if operation is successful.
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public boolean forceExitChillMode() throws IOException {
|
public boolean forceExitSafeMode() throws IOException {
|
||||||
AUDIT.logWriteSuccess(
|
AUDIT.logWriteSuccess(
|
||||||
buildAuditMessageForSuccess(SCMAction.FORCE_EXIT_CHILL_MODE, null)
|
buildAuditMessageForSuccess(SCMAction.FORCE_EXIT_SAFE_MODE, null)
|
||||||
);
|
);
|
||||||
return scm.exitChillMode();
|
return scm.exitSafeMode();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -498,10 +498,10 @@ public StorageContainerManager getScm() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set chill mode status based on .
|
* Set safe mode status based on .
|
||||||
*/
|
*/
|
||||||
public boolean getChillModeStatus() {
|
public boolean getSafeModeStatus() {
|
||||||
return chillModePrecheck.isInChillMode();
|
return safeModePrecheck.isInSafeMode();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -556,11 +556,11 @@ public void close() throws IOException {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set ChillMode status.
|
* Set SafeMode status.
|
||||||
*
|
*
|
||||||
* @param chillModeStatus
|
* @param safeModeStatus
|
||||||
*/
|
*/
|
||||||
public void setChillModeStatus(boolean chillModeStatus) {
|
public void setSafeModeStatus(boolean safeModeStatus) {
|
||||||
chillModePrecheck.setInChillMode(chillModeStatus);
|
safeModePrecheck.setInSafeMode(safeModeStatus);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -21,7 +21,7 @@
|
|||||||
|
|
||||||
|
|
||||||
import org.apache.hadoop.hdds.scm.block.BlockManager;
|
import org.apache.hadoop.hdds.scm.block.BlockManager;
|
||||||
import org.apache.hadoop.hdds.scm.chillmode.SCMChillModeManager;
|
import org.apache.hadoop.hdds.scm.safemode.SCMSafeModeManager;
|
||||||
import org.apache.hadoop.hdds.scm.container.ContainerManager;
|
import org.apache.hadoop.hdds.scm.container.ContainerManager;
|
||||||
import org.apache.hadoop.hdds.scm.container.ReplicationManager;
|
import org.apache.hadoop.hdds.scm.container.ReplicationManager;
|
||||||
import org.apache.hadoop.hdds.scm.metadata.SCMMetadataStore;
|
import org.apache.hadoop.hdds.scm.metadata.SCMMetadataStore;
|
||||||
@ -48,7 +48,7 @@
|
|||||||
* ContainerManager containerManager;
|
* ContainerManager containerManager;
|
||||||
* BlockManager scmBlockManager;
|
* BlockManager scmBlockManager;
|
||||||
* ReplicationManager replicationManager;
|
* ReplicationManager replicationManager;
|
||||||
* SCMChillModeManager scmChillModeManager;
|
* SCMSafeModeManager scmSafeModeManager;
|
||||||
* CertificateServer certificateServer;
|
* CertificateServer certificateServer;
|
||||||
* SCMMetadata scmMetadataStore.
|
* SCMMetadata scmMetadataStore.
|
||||||
*
|
*
|
||||||
@ -62,7 +62,7 @@ public final class SCMConfigurator {
|
|||||||
private ContainerManager containerManager;
|
private ContainerManager containerManager;
|
||||||
private BlockManager scmBlockManager;
|
private BlockManager scmBlockManager;
|
||||||
private ReplicationManager replicationManager;
|
private ReplicationManager replicationManager;
|
||||||
private SCMChillModeManager scmChillModeManager;
|
private SCMSafeModeManager scmSafeModeManager;
|
||||||
private CertificateServer certificateServer;
|
private CertificateServer certificateServer;
|
||||||
private SCMMetadataStore metadataStore;
|
private SCMMetadataStore metadataStore;
|
||||||
|
|
||||||
@ -111,12 +111,12 @@ public void setReplicationManager(ReplicationManager replicationManager) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Allows user to specify a custom version of Chill Mode Manager to use
|
* Allows user to specify a custom version of Safe Mode Manager to use
|
||||||
* with this SCM.
|
* with this SCM.
|
||||||
* @param scmChillModeManager - ChillMode Manager.
|
* @param scmSafeModeManager - SafeMode Manager.
|
||||||
*/
|
*/
|
||||||
public void setScmChillModeManager(SCMChillModeManager scmChillModeManager) {
|
public void setScmSafeModeManager(SCMSafeModeManager scmSafeModeManager) {
|
||||||
this.scmChillModeManager = scmChillModeManager;
|
this.scmSafeModeManager = scmSafeModeManager;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -178,11 +178,11 @@ public ReplicationManager getReplicationManager() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets Chill Mode Manager.
|
* Gets Safe Mode Manager.
|
||||||
* @return Chill Mode manager.
|
* @return Safe Mode manager.
|
||||||
*/
|
*/
|
||||||
public SCMChillModeManager getScmChillModeManager() {
|
public SCMSafeModeManager getScmSafeModeManager() {
|
||||||
return scmChillModeManager;
|
return scmSafeModeManager;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -49,16 +49,16 @@ public interface SCMMXBean extends ServiceRuntimeInfo {
|
|||||||
Map<String, String> getContainerReport();
|
Map<String, String> getContainerReport();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns chill mode status.
|
* Returns safe mode status.
|
||||||
* @return boolean
|
* @return boolean
|
||||||
*/
|
*/
|
||||||
boolean isInChillMode();
|
boolean isInSafeMode();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns live chill mode container threshold.
|
* Returns live safe mode container threshold.
|
||||||
* @return String
|
* @return String
|
||||||
*/
|
*/
|
||||||
double getChillModeCurrentContainerThreshold();
|
double getSafeModeCurrentContainerThreshold();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the container count in all states.
|
* Returns the container count in all states.
|
||||||
|
@ -40,8 +40,8 @@
|
|||||||
import org.apache.hadoop.hdds.scm.block.BlockManagerImpl;
|
import org.apache.hadoop.hdds.scm.block.BlockManagerImpl;
|
||||||
import org.apache.hadoop.hdds.scm.block.DeletedBlockLogImpl;
|
import org.apache.hadoop.hdds.scm.block.DeletedBlockLogImpl;
|
||||||
import org.apache.hadoop.hdds.scm.block.PendingDeleteHandler;
|
import org.apache.hadoop.hdds.scm.block.PendingDeleteHandler;
|
||||||
import org.apache.hadoop.hdds.scm.chillmode.ChillModeHandler;
|
import org.apache.hadoop.hdds.scm.safemode.SafeModeHandler;
|
||||||
import org.apache.hadoop.hdds.scm.chillmode.SCMChillModeManager;
|
import org.apache.hadoop.hdds.scm.safemode.SCMSafeModeManager;
|
||||||
import org.apache.hadoop.hdds.scm.command.CommandStatusReportHandler;
|
import org.apache.hadoop.hdds.scm.command.CommandStatusReportHandler;
|
||||||
import org.apache.hadoop.hdds.scm.container.CloseContainerEventHandler;
|
import org.apache.hadoop.hdds.scm.container.CloseContainerEventHandler;
|
||||||
import org.apache.hadoop.hdds.scm.container.ContainerActionsHandler;
|
import org.apache.hadoop.hdds.scm.container.ContainerActionsHandler;
|
||||||
@ -196,12 +196,12 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
|
|||||||
|
|
||||||
private final LeaseManager<Long> commandWatcherLeaseManager;
|
private final LeaseManager<Long> commandWatcherLeaseManager;
|
||||||
|
|
||||||
private SCMChillModeManager scmChillModeManager;
|
private SCMSafeModeManager scmSafeModeManager;
|
||||||
private CertificateServer certificateServer;
|
private CertificateServer certificateServer;
|
||||||
|
|
||||||
private JvmPauseMonitor jvmPauseMonitor;
|
private JvmPauseMonitor jvmPauseMonitor;
|
||||||
private final OzoneConfiguration configuration;
|
private final OzoneConfiguration configuration;
|
||||||
private final ChillModeHandler chillModeHandler;
|
private final SafeModeHandler safeModeHandler;
|
||||||
private SCMContainerMetrics scmContainerMetrics;
|
private SCMContainerMetrics scmContainerMetrics;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -289,7 +289,7 @@ public StorageContainerManager(OzoneConfiguration conf,
|
|||||||
NodeReportHandler nodeReportHandler =
|
NodeReportHandler nodeReportHandler =
|
||||||
new NodeReportHandler(scmNodeManager);
|
new NodeReportHandler(scmNodeManager);
|
||||||
PipelineReportHandler pipelineReportHandler =
|
PipelineReportHandler pipelineReportHandler =
|
||||||
new PipelineReportHandler(scmChillModeManager, pipelineManager, conf);
|
new PipelineReportHandler(scmSafeModeManager, pipelineManager, conf);
|
||||||
CommandStatusReportHandler cmdStatusReportHandler =
|
CommandStatusReportHandler cmdStatusReportHandler =
|
||||||
new CommandStatusReportHandler();
|
new CommandStatusReportHandler();
|
||||||
|
|
||||||
@ -334,7 +334,7 @@ public StorageContainerManager(OzoneConfiguration conf,
|
|||||||
clientProtocolServer = new SCMClientProtocolServer(conf, this);
|
clientProtocolServer = new SCMClientProtocolServer(conf, this);
|
||||||
httpServer = new StorageContainerManagerHttpServer(conf);
|
httpServer = new StorageContainerManagerHttpServer(conf);
|
||||||
|
|
||||||
chillModeHandler = new ChillModeHandler(configuration,
|
safeModeHandler = new SafeModeHandler(configuration,
|
||||||
clientProtocolServer, scmBlockManager, replicationManager,
|
clientProtocolServer, scmBlockManager, replicationManager,
|
||||||
pipelineManager);
|
pipelineManager);
|
||||||
|
|
||||||
@ -358,7 +358,7 @@ public StorageContainerManager(OzoneConfiguration conf,
|
|||||||
(DeletedBlockLogImpl) scmBlockManager.getDeletedBlockLog());
|
(DeletedBlockLogImpl) scmBlockManager.getDeletedBlockLog());
|
||||||
eventQueue.addHandler(SCMEvents.PIPELINE_ACTIONS, pipelineActionHandler);
|
eventQueue.addHandler(SCMEvents.PIPELINE_ACTIONS, pipelineActionHandler);
|
||||||
eventQueue.addHandler(SCMEvents.PIPELINE_REPORT, pipelineReportHandler);
|
eventQueue.addHandler(SCMEvents.PIPELINE_REPORT, pipelineReportHandler);
|
||||||
eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS, chillModeHandler);
|
eventQueue.addHandler(SCMEvents.SAFE_MODE_STATUS, safeModeHandler);
|
||||||
registerMXBean();
|
registerMXBean();
|
||||||
registerMetricsSource(this);
|
registerMetricsSource(this);
|
||||||
}
|
}
|
||||||
@ -372,7 +372,7 @@ public StorageContainerManager(OzoneConfiguration conf,
|
|||||||
* Container Manager
|
* Container Manager
|
||||||
* Block Manager
|
* Block Manager
|
||||||
* Replication Manager
|
* Replication Manager
|
||||||
* Chill Mode Manager
|
* Safe Mode Manager
|
||||||
*
|
*
|
||||||
* @param conf - Ozone Configuration.
|
* @param conf - Ozone Configuration.
|
||||||
* @param configurator - A customizer which allows different managers to be
|
* @param configurator - A customizer which allows different managers to be
|
||||||
@ -418,10 +418,10 @@ private void initalizeSystemManagers(OzoneConfiguration conf,
|
|||||||
replicationManager = new ReplicationManager(conf,
|
replicationManager = new ReplicationManager(conf,
|
||||||
containerManager, containerPlacementPolicy, eventQueue);
|
containerManager, containerPlacementPolicy, eventQueue);
|
||||||
}
|
}
|
||||||
if(configurator.getScmChillModeManager() != null) {
|
if(configurator.getScmSafeModeManager() != null) {
|
||||||
scmChillModeManager = configurator.getScmChillModeManager();
|
scmSafeModeManager = configurator.getScmSafeModeManager();
|
||||||
} else {
|
} else {
|
||||||
scmChillModeManager = new SCMChillModeManager(conf,
|
scmSafeModeManager = new SCMSafeModeManager(conf,
|
||||||
containerManager.getContainers(), pipelineManager, eventQueue);
|
containerManager.getContainers(), pipelineManager, eventQueue);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1074,13 +1074,13 @@ public BlockManager getScmBlockManager() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
public ChillModeHandler getChillModeHandler() {
|
public SafeModeHandler getSafeModeHandler() {
|
||||||
return chillModeHandler;
|
return safeModeHandler;
|
||||||
}
|
}
|
||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
public SCMChillModeManager getScmChillModeManager() {
|
public SCMSafeModeManager getScmSafeModeManager() {
|
||||||
return scmChillModeManager;
|
return scmSafeModeManager;
|
||||||
}
|
}
|
||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
@ -1149,22 +1149,22 @@ public Map<String, String> getContainerReport() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns live chill mode container threshold.
|
* Returns live safe mode container threshold.
|
||||||
*
|
*
|
||||||
* @return String
|
* @return String
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public double getChillModeCurrentContainerThreshold() {
|
public double getSafeModeCurrentContainerThreshold() {
|
||||||
return getCurrentContainerThreshold();
|
return getCurrentContainerThreshold();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns chill mode status.
|
* Returns safe mode status.
|
||||||
* @return boolean
|
* @return boolean
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public boolean isInChillMode() {
|
public boolean isInSafeMode() {
|
||||||
return scmChillModeManager.getInChillMode();
|
return scmSafeModeManager.getInSafeMode();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -1175,16 +1175,16 @@ public EventPublisher getEventQueue() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Force SCM out of chill mode.
|
* Force SCM out of safe mode.
|
||||||
*/
|
*/
|
||||||
public boolean exitChillMode() {
|
public boolean exitSafeMode() {
|
||||||
scmChillModeManager.exitChillMode(eventQueue);
|
scmSafeModeManager.exitSafeMode(eventQueue);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
public double getCurrentContainerThreshold() {
|
public double getCurrentContainerThreshold() {
|
||||||
return scmChillModeManager.getCurrentContainerThreshold();
|
return scmSafeModeManager.getCurrentContainerThreshold();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -37,8 +37,8 @@ <h2>Status</h2>
|
|||||||
<td>{{$ctrl.overview.jmx.DatanodeRpcPort}}</td>
|
<td>{{$ctrl.overview.jmx.DatanodeRpcPort}}</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td>Node Manager: Chill mode status</td>
|
<td>Node Manager: Safe mode status</td>
|
||||||
<td>{{$ctrl.scmmetrics.InChillMode}}</td>
|
<td>{{$ctrl.scmmetrics.InSafeMode}}</td>
|
||||||
</tr>
|
</tr>
|
||||||
</tbody>
|
</tbody>
|
||||||
</table>
|
</table>
|
||||||
|
@ -25,7 +25,7 @@
|
|||||||
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
|
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
|
||||||
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
|
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
|
||||||
import org.apache.hadoop.hdds.scm.TestUtils;
|
import org.apache.hadoop.hdds.scm.TestUtils;
|
||||||
import org.apache.hadoop.hdds.scm.chillmode.SCMChillModeManager.ChillModeStatus;
|
import org.apache.hadoop.hdds.scm.safemode.SCMSafeModeManager.SafeModeStatus;
|
||||||
import org.apache.hadoop.hdds.scm.container.CloseContainerEventHandler;
|
import org.apache.hadoop.hdds.scm.container.CloseContainerEventHandler;
|
||||||
import org.apache.hadoop.hdds.scm.container.ContainerID;
|
import org.apache.hadoop.hdds.scm.container.ContainerID;
|
||||||
import org.apache.hadoop.hdds.scm.container.MockNodeManager;
|
import org.apache.hadoop.hdds.scm.container.MockNodeManager;
|
||||||
@ -71,7 +71,7 @@ public class TestBlockManager implements EventHandler<Boolean> {
|
|||||||
private static EventQueue eventQueue;
|
private static EventQueue eventQueue;
|
||||||
private int numContainerPerOwnerInPipeline;
|
private int numContainerPerOwnerInPipeline;
|
||||||
private OzoneConfiguration conf;
|
private OzoneConfiguration conf;
|
||||||
private ChillModeStatus chillModeStatus = new ChillModeStatus(false);
|
private SafeModeStatus safeModeStatus = new SafeModeStatus(false);
|
||||||
|
|
||||||
@Rule
|
@Rule
|
||||||
public ExpectedException thrown = ExpectedException.none();
|
public ExpectedException thrown = ExpectedException.none();
|
||||||
@ -101,8 +101,8 @@ public void setUp() throws Exception {
|
|||||||
blockManager = (BlockManagerImpl) scm.getScmBlockManager();
|
blockManager = (BlockManagerImpl) scm.getScmBlockManager();
|
||||||
|
|
||||||
eventQueue = new EventQueue();
|
eventQueue = new EventQueue();
|
||||||
eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS,
|
eventQueue.addHandler(SCMEvents.SAFE_MODE_STATUS,
|
||||||
scm.getChillModeHandler());
|
scm.getSafeModeHandler());
|
||||||
eventQueue.addHandler(SCMEvents.START_REPLICATION, this);
|
eventQueue.addHandler(SCMEvents.START_REPLICATION, this);
|
||||||
CloseContainerEventHandler closeContainerHandler =
|
CloseContainerEventHandler closeContainerHandler =
|
||||||
new CloseContainerEventHandler(pipelineManager, mapping);
|
new CloseContainerEventHandler(pipelineManager, mapping);
|
||||||
@ -124,9 +124,9 @@ public void cleanup() throws IOException {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testAllocateBlock() throws Exception {
|
public void testAllocateBlock() throws Exception {
|
||||||
eventQueue.fireEvent(SCMEvents.CHILL_MODE_STATUS, chillModeStatus);
|
eventQueue.fireEvent(SCMEvents.SAFE_MODE_STATUS, safeModeStatus);
|
||||||
GenericTestUtils.waitFor(() -> {
|
GenericTestUtils.waitFor(() -> {
|
||||||
return !blockManager.isScmInChillMode();
|
return !blockManager.isScmInSafeMode();
|
||||||
}, 10, 1000 * 5);
|
}, 10, 1000 * 5);
|
||||||
AllocatedBlock block = blockManager.allocateBlock(DEFAULT_BLOCK_SIZE,
|
AllocatedBlock block = blockManager.allocateBlock(DEFAULT_BLOCK_SIZE,
|
||||||
type, factor, containerOwner, new ExcludeList());
|
type, factor, containerOwner, new ExcludeList());
|
||||||
@ -135,9 +135,9 @@ public void testAllocateBlock() throws Exception {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testAllocateOversizedBlock() throws Exception {
|
public void testAllocateOversizedBlock() throws Exception {
|
||||||
eventQueue.fireEvent(SCMEvents.CHILL_MODE_STATUS, chillModeStatus);
|
eventQueue.fireEvent(SCMEvents.SAFE_MODE_STATUS, safeModeStatus);
|
||||||
GenericTestUtils.waitFor(() -> {
|
GenericTestUtils.waitFor(() -> {
|
||||||
return !blockManager.isScmInChillMode();
|
return !blockManager.isScmInSafeMode();
|
||||||
}, 10, 1000 * 5);
|
}, 10, 1000 * 5);
|
||||||
long size = 6 * GB;
|
long size = 6 * GB;
|
||||||
thrown.expectMessage("Unsupported block size");
|
thrown.expectMessage("Unsupported block size");
|
||||||
@ -147,25 +147,25 @@ public void testAllocateOversizedBlock() throws Exception {
|
|||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testAllocateBlockFailureInChillMode() throws Exception {
|
public void testAllocateBlockFailureInSafeMode() throws Exception {
|
||||||
eventQueue.fireEvent(SCMEvents.CHILL_MODE_STATUS,
|
eventQueue.fireEvent(SCMEvents.SAFE_MODE_STATUS,
|
||||||
new ChillModeStatus(true));
|
new SafeModeStatus(true));
|
||||||
GenericTestUtils.waitFor(() -> {
|
GenericTestUtils.waitFor(() -> {
|
||||||
return blockManager.isScmInChillMode();
|
return blockManager.isScmInSafeMode();
|
||||||
}, 10, 1000 * 5);
|
}, 10, 1000 * 5);
|
||||||
// Test1: In chill mode expect an SCMException.
|
// Test1: In safe mode expect an SCMException.
|
||||||
thrown.expectMessage("ChillModePrecheck failed for "
|
thrown.expectMessage("SafeModePrecheck failed for "
|
||||||
+ "allocateBlock");
|
+ "allocateBlock");
|
||||||
blockManager.allocateBlock(DEFAULT_BLOCK_SIZE,
|
blockManager.allocateBlock(DEFAULT_BLOCK_SIZE,
|
||||||
type, factor, containerOwner, new ExcludeList());
|
type, factor, containerOwner, new ExcludeList());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testAllocateBlockSucInChillMode() throws Exception {
|
public void testAllocateBlockSucInSafeMode() throws Exception {
|
||||||
// Test2: Exit chill mode and then try allocateBock again.
|
// Test2: Exit safe mode and then try allocateBock again.
|
||||||
eventQueue.fireEvent(SCMEvents.CHILL_MODE_STATUS, chillModeStatus);
|
eventQueue.fireEvent(SCMEvents.SAFE_MODE_STATUS, safeModeStatus);
|
||||||
GenericTestUtils.waitFor(() -> {
|
GenericTestUtils.waitFor(() -> {
|
||||||
return !blockManager.isScmInChillMode();
|
return !blockManager.isScmInSafeMode();
|
||||||
}, 10, 1000 * 5);
|
}, 10, 1000 * 5);
|
||||||
Assert.assertNotNull(blockManager.allocateBlock(DEFAULT_BLOCK_SIZE,
|
Assert.assertNotNull(blockManager.allocateBlock(DEFAULT_BLOCK_SIZE,
|
||||||
type, factor, containerOwner, new ExcludeList()));
|
type, factor, containerOwner, new ExcludeList()));
|
||||||
@ -174,9 +174,9 @@ public void testAllocateBlockSucInChillMode() throws Exception {
|
|||||||
@Test(timeout = 10000)
|
@Test(timeout = 10000)
|
||||||
public void testMultipleBlockAllocation()
|
public void testMultipleBlockAllocation()
|
||||||
throws IOException, TimeoutException, InterruptedException {
|
throws IOException, TimeoutException, InterruptedException {
|
||||||
eventQueue.fireEvent(SCMEvents.CHILL_MODE_STATUS, chillModeStatus);
|
eventQueue.fireEvent(SCMEvents.SAFE_MODE_STATUS, safeModeStatus);
|
||||||
GenericTestUtils
|
GenericTestUtils
|
||||||
.waitFor(() -> !blockManager.isScmInChillMode(), 10, 1000 * 5);
|
.waitFor(() -> !blockManager.isScmInSafeMode(), 10, 1000 * 5);
|
||||||
|
|
||||||
pipelineManager.createPipeline(type, factor);
|
pipelineManager.createPipeline(type, factor);
|
||||||
pipelineManager.createPipeline(type, factor);
|
pipelineManager.createPipeline(type, factor);
|
||||||
@ -216,9 +216,9 @@ private boolean verifyNumberOfContainersInPipelines(
|
|||||||
@Test(timeout = 10000)
|
@Test(timeout = 10000)
|
||||||
public void testMultipleBlockAllocationWithClosedContainer()
|
public void testMultipleBlockAllocationWithClosedContainer()
|
||||||
throws IOException, TimeoutException, InterruptedException {
|
throws IOException, TimeoutException, InterruptedException {
|
||||||
eventQueue.fireEvent(SCMEvents.CHILL_MODE_STATUS, chillModeStatus);
|
eventQueue.fireEvent(SCMEvents.SAFE_MODE_STATUS, safeModeStatus);
|
||||||
GenericTestUtils
|
GenericTestUtils
|
||||||
.waitFor(() -> !blockManager.isScmInChillMode(), 10, 1000 * 5);
|
.waitFor(() -> !blockManager.isScmInSafeMode(), 10, 1000 * 5);
|
||||||
|
|
||||||
// create pipelines
|
// create pipelines
|
||||||
for (int i = 0;
|
for (int i = 0;
|
||||||
@ -268,9 +268,9 @@ public void testMultipleBlockAllocationWithClosedContainer()
|
|||||||
@Test(timeout = 10000)
|
@Test(timeout = 10000)
|
||||||
public void testBlockAllocationWithNoAvailablePipelines()
|
public void testBlockAllocationWithNoAvailablePipelines()
|
||||||
throws IOException, TimeoutException, InterruptedException {
|
throws IOException, TimeoutException, InterruptedException {
|
||||||
eventQueue.fireEvent(SCMEvents.CHILL_MODE_STATUS, chillModeStatus);
|
eventQueue.fireEvent(SCMEvents.SAFE_MODE_STATUS, safeModeStatus);
|
||||||
GenericTestUtils
|
GenericTestUtils
|
||||||
.waitFor(() -> !blockManager.isScmInChillMode(), 10, 1000 * 5);
|
.waitFor(() -> !blockManager.isScmInSafeMode(), 10, 1000 * 5);
|
||||||
|
|
||||||
for (Pipeline pipeline : pipelineManager.getPipelines()) {
|
for (Pipeline pipeline : pipelineManager.getPipelines()) {
|
||||||
pipelineManager.finalizeAndDestroyPipeline(pipeline, false);
|
pipelineManager.finalizeAndDestroyPipeline(pipeline, false);
|
||||||
|
@ -78,7 +78,7 @@ public class MockNodeManager implements NodeManager {
|
|||||||
private final List<DatanodeDetails> deadNodes;
|
private final List<DatanodeDetails> deadNodes;
|
||||||
private final Map<DatanodeDetails, SCMNodeStat> nodeMetricMap;
|
private final Map<DatanodeDetails, SCMNodeStat> nodeMetricMap;
|
||||||
private final SCMNodeStat aggregateStat;
|
private final SCMNodeStat aggregateStat;
|
||||||
private boolean chillmode;
|
private boolean safemode;
|
||||||
private final Map<UUID, List<SCMCommand>> commandMap;
|
private final Map<UUID, List<SCMCommand>> commandMap;
|
||||||
private final Node2PipelineMap node2PipelineMap;
|
private final Node2PipelineMap node2PipelineMap;
|
||||||
private final Node2ContainerMap node2ContainerMap;
|
private final Node2ContainerMap node2ContainerMap;
|
||||||
@ -98,7 +98,7 @@ public MockNodeManager(boolean initializeFakeNodes, int nodeCount) {
|
|||||||
populateNodeMetric(dd, x);
|
populateNodeMetric(dd, x);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
chillmode = false;
|
safemode = false;
|
||||||
this.commandMap = new HashMap<>();
|
this.commandMap = new HashMap<>();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -132,11 +132,11 @@ private void populateNodeMetric(DatanodeDetails datanodeDetails, int x) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets the chill mode value.
|
* Sets the safe mode value.
|
||||||
* @param chillmode boolean
|
* @param safemode boolean
|
||||||
*/
|
*/
|
||||||
public void setChillmode(boolean chillmode) {
|
public void setSafemode(boolean safemode) {
|
||||||
this.chillmode = chillmode;
|
this.safemode = safemode;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -107,8 +107,8 @@ public static void cleanup() throws IOException {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Before
|
@Before
|
||||||
public void clearChillMode() {
|
public void clearSafeMode() {
|
||||||
nodeManager.setChillmode(false);
|
nodeManager.setSafemode(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -266,7 +266,7 @@ public void testCloseContainer() throws IOException {
|
|||||||
*/
|
*/
|
||||||
private ContainerInfo createContainer()
|
private ContainerInfo createContainer()
|
||||||
throws IOException {
|
throws IOException {
|
||||||
nodeManager.setChillmode(false);
|
nodeManager.setSafemode(false);
|
||||||
return containerManager
|
return containerManager
|
||||||
.allocateContainer(xceiverClientManager.getType(),
|
.allocateContainer(xceiverClientManager.getType(),
|
||||||
xceiverClientManager.getFactor(), containerOwner);
|
xceiverClientManager.getFactor(), containerOwner);
|
||||||
|
@ -132,7 +132,7 @@ SCMNodeManager createNodeManager(OzoneConfiguration config)
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests that Node manager handles heartbeats correctly, and comes out of
|
* Tests that Node manager handles heartbeats correctly, and comes out of
|
||||||
* chill Mode.
|
* safe Mode.
|
||||||
*
|
*
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
* @throws InterruptedException
|
* @throws InterruptedException
|
||||||
@ -160,7 +160,7 @@ public void testScmHeartbeat()
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* asserts that if we send no heartbeats node manager stays in chillmode.
|
* asserts that if we send no heartbeats node manager stays in safemode.
|
||||||
*
|
*
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
* @throws InterruptedException
|
* @throws InterruptedException
|
||||||
|
@ -16,7 +16,7 @@
|
|||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.apache.hadoop.hdds.scm.chillmode;
|
package org.apache.hadoop.hdds.scm.safemode;
|
||||||
|
|
||||||
import org.apache.hadoop.fs.FileUtil;
|
import org.apache.hadoop.fs.FileUtil;
|
||||||
import org.apache.hadoop.hdds.HddsConfigKeys;
|
import org.apache.hadoop.hdds.HddsConfigKeys;
|
||||||
@ -47,16 +47,16 @@
|
|||||||
import java.util.UUID;
|
import java.util.UUID;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class tests HealthyPipelineChillMode rule.
|
* This class tests HealthyPipelineSafeMode rule.
|
||||||
*/
|
*/
|
||||||
public class TestHealthyPipelineChillModeRule {
|
public class TestHealthyPipelineSafeModeRule {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testHealthyPipelineChillModeRuleWithNoPipelines()
|
public void testHealthyPipelineSafeModeRuleWithNoPipelines()
|
||||||
throws Exception {
|
throws Exception {
|
||||||
|
|
||||||
String storageDir = GenericTestUtils.getTempPath(
|
String storageDir = GenericTestUtils.getTempPath(
|
||||||
TestHealthyPipelineChillModeRule.class.getName() + UUID.randomUUID());
|
TestHealthyPipelineSafeModeRule.class.getName() + UUID.randomUUID());
|
||||||
try {
|
try {
|
||||||
EventQueue eventQueue = new EventQueue();
|
EventQueue eventQueue = new EventQueue();
|
||||||
List<ContainerInfo> containers = new ArrayList<>();
|
List<ContainerInfo> containers = new ArrayList<>();
|
||||||
@ -67,7 +67,7 @@ public void testHealthyPipelineChillModeRuleWithNoPipelines()
|
|||||||
config.set(HddsConfigKeys.OZONE_METADATA_DIRS, storageDir);
|
config.set(HddsConfigKeys.OZONE_METADATA_DIRS, storageDir);
|
||||||
// enable pipeline check
|
// enable pipeline check
|
||||||
config.setBoolean(
|
config.setBoolean(
|
||||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK, true);
|
HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_AVAILABILITY_CHECK, true);
|
||||||
|
|
||||||
|
|
||||||
SCMPipelineManager pipelineManager = new SCMPipelineManager(config,
|
SCMPipelineManager pipelineManager = new SCMPipelineManager(config,
|
||||||
@ -77,14 +77,14 @@ public void testHealthyPipelineChillModeRuleWithNoPipelines()
|
|||||||
pipelineManager.getStateManager(), config);
|
pipelineManager.getStateManager(), config);
|
||||||
pipelineManager.setPipelineProvider(HddsProtos.ReplicationType.RATIS,
|
pipelineManager.setPipelineProvider(HddsProtos.ReplicationType.RATIS,
|
||||||
mockRatisProvider);
|
mockRatisProvider);
|
||||||
SCMChillModeManager scmChillModeManager = new SCMChillModeManager(
|
SCMSafeModeManager scmSafeModeManager = new SCMSafeModeManager(
|
||||||
config, containers, pipelineManager, eventQueue);
|
config, containers, pipelineManager, eventQueue);
|
||||||
|
|
||||||
HealthyPipelineChillModeRule healthyPipelineChillModeRule =
|
HealthyPipelineSafeModeRule healthyPipelineSafeModeRule =
|
||||||
scmChillModeManager.getHealthyPipelineChillModeRule();
|
scmSafeModeManager.getHealthyPipelineSafeModeRule();
|
||||||
|
|
||||||
// This should be immediately satisfied, as no pipelines are there yet.
|
// This should be immediately satisfied, as no pipelines are there yet.
|
||||||
Assert.assertTrue(healthyPipelineChillModeRule.validate());
|
Assert.assertTrue(healthyPipelineSafeModeRule.validate());
|
||||||
} finally {
|
} finally {
|
||||||
FileUtil.fullyDelete(new File(storageDir));
|
FileUtil.fullyDelete(new File(storageDir));
|
||||||
}
|
}
|
||||||
@ -93,10 +93,10 @@ public void testHealthyPipelineChillModeRuleWithNoPipelines()
|
|||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testHealthyPipelineChillModeRuleWithPipelines() throws Exception {
|
public void testHealthyPipelineSafeModeRuleWithPipelines() throws Exception {
|
||||||
|
|
||||||
String storageDir = GenericTestUtils.getTempPath(
|
String storageDir = GenericTestUtils.getTempPath(
|
||||||
TestHealthyPipelineChillModeRule.class.getName() + UUID.randomUUID());
|
TestHealthyPipelineSafeModeRule.class.getName() + UUID.randomUUID());
|
||||||
|
|
||||||
try {
|
try {
|
||||||
EventQueue eventQueue = new EventQueue();
|
EventQueue eventQueue = new EventQueue();
|
||||||
@ -112,7 +112,7 @@ public void testHealthyPipelineChillModeRuleWithPipelines() throws Exception {
|
|||||||
config.set(HddsConfigKeys.OZONE_METADATA_DIRS, storageDir);
|
config.set(HddsConfigKeys.OZONE_METADATA_DIRS, storageDir);
|
||||||
// enable pipeline check
|
// enable pipeline check
|
||||||
config.setBoolean(
|
config.setBoolean(
|
||||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK, true);
|
HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_AVAILABILITY_CHECK, true);
|
||||||
|
|
||||||
|
|
||||||
SCMPipelineManager pipelineManager = new SCMPipelineManager(config,
|
SCMPipelineManager pipelineManager = new SCMPipelineManager(config,
|
||||||
@ -136,15 +136,15 @@ public void testHealthyPipelineChillModeRuleWithPipelines() throws Exception {
|
|||||||
HddsProtos.ReplicationFactor.THREE);
|
HddsProtos.ReplicationFactor.THREE);
|
||||||
|
|
||||||
|
|
||||||
SCMChillModeManager scmChillModeManager = new SCMChillModeManager(
|
SCMSafeModeManager scmSafeModeManager = new SCMSafeModeManager(
|
||||||
config, containers, pipelineManager, eventQueue);
|
config, containers, pipelineManager, eventQueue);
|
||||||
|
|
||||||
HealthyPipelineChillModeRule healthyPipelineChillModeRule =
|
HealthyPipelineSafeModeRule healthyPipelineSafeModeRule =
|
||||||
scmChillModeManager.getHealthyPipelineChillModeRule();
|
scmSafeModeManager.getHealthyPipelineSafeModeRule();
|
||||||
|
|
||||||
|
|
||||||
// No datanodes have sent pipelinereport from datanode
|
// No datanodes have sent pipelinereport from datanode
|
||||||
Assert.assertFalse(healthyPipelineChillModeRule.validate());
|
Assert.assertFalse(healthyPipelineSafeModeRule.validate());
|
||||||
|
|
||||||
// Fire pipeline report from all datanodes in first pipeline, as here we
|
// Fire pipeline report from all datanodes in first pipeline, as here we
|
||||||
// have 3 pipelines, 10% is 0.3, when doing ceil it is 1. So, we should
|
// have 3 pipelines, 10% is 0.3, when doing ceil it is 1. So, we should
|
||||||
@ -157,7 +157,7 @@ public void testHealthyPipelineChillModeRuleWithPipelines() throws Exception {
|
|||||||
// manager in open state for test case simplicity.
|
// manager in open state for test case simplicity.
|
||||||
|
|
||||||
firePipelineEvent(pipeline1, eventQueue);
|
firePipelineEvent(pipeline1, eventQueue);
|
||||||
GenericTestUtils.waitFor(() -> healthyPipelineChillModeRule.validate(),
|
GenericTestUtils.waitFor(() -> healthyPipelineSafeModeRule.validate(),
|
||||||
1000, 5000);
|
1000, 5000);
|
||||||
} finally {
|
} finally {
|
||||||
FileUtil.fullyDelete(new File(storageDir));
|
FileUtil.fullyDelete(new File(storageDir));
|
||||||
@ -167,11 +167,11 @@ public void testHealthyPipelineChillModeRuleWithPipelines() throws Exception {
|
|||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testHealthyPipelineChillModeRuleWithMixedPipelines()
|
public void testHealthyPipelineSafeModeRuleWithMixedPipelines()
|
||||||
throws Exception {
|
throws Exception {
|
||||||
|
|
||||||
String storageDir = GenericTestUtils.getTempPath(
|
String storageDir = GenericTestUtils.getTempPath(
|
||||||
TestHealthyPipelineChillModeRule.class.getName() + UUID.randomUUID());
|
TestHealthyPipelineSafeModeRule.class.getName() + UUID.randomUUID());
|
||||||
|
|
||||||
try {
|
try {
|
||||||
EventQueue eventQueue = new EventQueue();
|
EventQueue eventQueue = new EventQueue();
|
||||||
@ -187,7 +187,7 @@ public void testHealthyPipelineChillModeRuleWithMixedPipelines()
|
|||||||
config.set(HddsConfigKeys.OZONE_METADATA_DIRS, storageDir);
|
config.set(HddsConfigKeys.OZONE_METADATA_DIRS, storageDir);
|
||||||
// enable pipeline check
|
// enable pipeline check
|
||||||
config.setBoolean(
|
config.setBoolean(
|
||||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK, true);
|
HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_AVAILABILITY_CHECK, true);
|
||||||
|
|
||||||
|
|
||||||
SCMPipelineManager pipelineManager = new SCMPipelineManager(config,
|
SCMPipelineManager pipelineManager = new SCMPipelineManager(config,
|
||||||
@ -210,20 +210,20 @@ public void testHealthyPipelineChillModeRuleWithMixedPipelines()
|
|||||||
HddsProtos.ReplicationFactor.THREE);
|
HddsProtos.ReplicationFactor.THREE);
|
||||||
|
|
||||||
|
|
||||||
SCMChillModeManager scmChillModeManager = new SCMChillModeManager(
|
SCMSafeModeManager scmSafeModeManager = new SCMSafeModeManager(
|
||||||
config, containers, pipelineManager, eventQueue);
|
config, containers, pipelineManager, eventQueue);
|
||||||
|
|
||||||
HealthyPipelineChillModeRule healthyPipelineChillModeRule =
|
HealthyPipelineSafeModeRule healthyPipelineSafeModeRule =
|
||||||
scmChillModeManager.getHealthyPipelineChillModeRule();
|
scmSafeModeManager.getHealthyPipelineSafeModeRule();
|
||||||
|
|
||||||
|
|
||||||
// No datanodes have sent pipelinereport from datanode
|
// No datanodes have sent pipelinereport from datanode
|
||||||
Assert.assertFalse(healthyPipelineChillModeRule.validate());
|
Assert.assertFalse(healthyPipelineSafeModeRule.validate());
|
||||||
|
|
||||||
|
|
||||||
GenericTestUtils.LogCapturer logCapturer =
|
GenericTestUtils.LogCapturer logCapturer =
|
||||||
GenericTestUtils.LogCapturer.captureLogs(LoggerFactory.getLogger(
|
GenericTestUtils.LogCapturer.captureLogs(LoggerFactory.getLogger(
|
||||||
SCMChillModeManager.class));
|
SCMSafeModeManager.class));
|
||||||
|
|
||||||
// fire event with pipeline report with ratis type and factor 1
|
// fire event with pipeline report with ratis type and factor 1
|
||||||
// pipeline, validate() should return false
|
// pipeline, validate() should return false
|
||||||
@ -232,12 +232,12 @@ public void testHealthyPipelineChillModeRuleWithMixedPipelines()
|
|||||||
GenericTestUtils.waitFor(() -> logCapturer.getOutput().contains(
|
GenericTestUtils.waitFor(() -> logCapturer.getOutput().contains(
|
||||||
"reported count is 0"),
|
"reported count is 0"),
|
||||||
1000, 5000);
|
1000, 5000);
|
||||||
Assert.assertFalse(healthyPipelineChillModeRule.validate());
|
Assert.assertFalse(healthyPipelineSafeModeRule.validate());
|
||||||
|
|
||||||
firePipelineEvent(pipeline2, eventQueue);
|
firePipelineEvent(pipeline2, eventQueue);
|
||||||
firePipelineEvent(pipeline3, eventQueue);
|
firePipelineEvent(pipeline3, eventQueue);
|
||||||
|
|
||||||
GenericTestUtils.waitFor(() -> healthyPipelineChillModeRule.validate(),
|
GenericTestUtils.waitFor(() -> healthyPipelineSafeModeRule.validate(),
|
||||||
1000, 5000);
|
1000, 5000);
|
||||||
|
|
||||||
} finally {
|
} finally {
|
@ -15,7 +15,7 @@
|
|||||||
* the License.
|
* the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.apache.hadoop.hdds.scm.chillmode;
|
package org.apache.hadoop.hdds.scm.safemode;
|
||||||
|
|
||||||
import org.apache.hadoop.hdds.HddsConfigKeys;
|
import org.apache.hadoop.hdds.HddsConfigKeys;
|
||||||
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
|
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
|
||||||
@ -45,13 +45,13 @@
|
|||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class tests OneReplicaPipelineChillModeRule.
|
* This class tests OneReplicaPipelineSafeModeRule.
|
||||||
*/
|
*/
|
||||||
public class TestOneReplicaPipelineChillModeRule {
|
public class TestOneReplicaPipelineSafeModeRule {
|
||||||
|
|
||||||
@Rule
|
@Rule
|
||||||
public TemporaryFolder folder = new TemporaryFolder();
|
public TemporaryFolder folder = new TemporaryFolder();
|
||||||
private OneReplicaPipelineChillModeRule rule;
|
private OneReplicaPipelineSafeModeRule rule;
|
||||||
private SCMPipelineManager pipelineManager;
|
private SCMPipelineManager pipelineManager;
|
||||||
private EventQueue eventQueue;
|
private EventQueue eventQueue;
|
||||||
|
|
||||||
@ -60,7 +60,7 @@ private void setup(int nodes, int pipelineFactorThreeCount,
|
|||||||
int pipelineFactorOneCount) throws Exception {
|
int pipelineFactorOneCount) throws Exception {
|
||||||
OzoneConfiguration ozoneConfiguration = new OzoneConfiguration();
|
OzoneConfiguration ozoneConfiguration = new OzoneConfiguration();
|
||||||
ozoneConfiguration.setBoolean(
|
ozoneConfiguration.setBoolean(
|
||||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK, true);
|
HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_AVAILABILITY_CHECK, true);
|
||||||
ozoneConfiguration.set(HddsConfigKeys.OZONE_METADATA_DIRS,
|
ozoneConfiguration.set(HddsConfigKeys.OZONE_METADATA_DIRS,
|
||||||
folder.newFolder().toString());
|
folder.newFolder().toString());
|
||||||
|
|
||||||
@ -84,11 +84,11 @@ private void setup(int nodes, int pipelineFactorThreeCount,
|
|||||||
createPipelines(pipelineFactorOneCount,
|
createPipelines(pipelineFactorOneCount,
|
||||||
HddsProtos.ReplicationFactor.ONE);
|
HddsProtos.ReplicationFactor.ONE);
|
||||||
|
|
||||||
SCMChillModeManager scmChillModeManager =
|
SCMSafeModeManager scmSafeModeManager =
|
||||||
new SCMChillModeManager(ozoneConfiguration, containers,
|
new SCMSafeModeManager(ozoneConfiguration, containers,
|
||||||
pipelineManager, eventQueue);
|
pipelineManager, eventQueue);
|
||||||
|
|
||||||
rule = scmChillModeManager.getOneReplicaPipelineChillModeRule();
|
rule = scmSafeModeManager.getOneReplicaPipelineSafeModeRule();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -104,7 +104,7 @@ public void testOneReplicaPipelineRule() throws Exception {
|
|||||||
|
|
||||||
GenericTestUtils.LogCapturer logCapturer =
|
GenericTestUtils.LogCapturer logCapturer =
|
||||||
GenericTestUtils.LogCapturer.captureLogs(
|
GenericTestUtils.LogCapturer.captureLogs(
|
||||||
LoggerFactory.getLogger(SCMChillModeManager.class));
|
LoggerFactory.getLogger(SCMSafeModeManager.class));
|
||||||
|
|
||||||
List<Pipeline> pipelines = pipelineManager.getPipelines();
|
List<Pipeline> pipelines = pipelineManager.getPipelines();
|
||||||
for (int i = 0; i < pipelineFactorThreeCount -1; i++) {
|
for (int i = 0; i < pipelineFactorThreeCount -1; i++) {
|
||||||
@ -141,7 +141,7 @@ public void testOneReplicaPipelineRuleMixedPipelines() throws Exception {
|
|||||||
|
|
||||||
GenericTestUtils.LogCapturer logCapturer =
|
GenericTestUtils.LogCapturer logCapturer =
|
||||||
GenericTestUtils.LogCapturer.captureLogs(
|
GenericTestUtils.LogCapturer.captureLogs(
|
||||||
LoggerFactory.getLogger(SCMChillModeManager.class));
|
LoggerFactory.getLogger(SCMSafeModeManager.class));
|
||||||
|
|
||||||
List<Pipeline> pipelines =
|
List<Pipeline> pipelines =
|
||||||
pipelineManager.getPipelines(HddsProtos.ReplicationType.RATIS,
|
pipelineManager.getPipelines(HddsProtos.ReplicationType.RATIS,
|
@ -15,7 +15,7 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hdds.scm.chillmode;
|
package org.apache.hadoop.hdds.scm.safemode;
|
||||||
|
|
||||||
import static org.junit.Assert.assertFalse;
|
import static org.junit.Assert.assertFalse;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
@ -52,12 +52,12 @@
|
|||||||
import org.junit.rules.Timeout;
|
import org.junit.rules.Timeout;
|
||||||
import org.mockito.Mockito;
|
import org.mockito.Mockito;
|
||||||
|
|
||||||
/** Test class for SCMChillModeManager.
|
/** Test class for SCMSafeModeManager.
|
||||||
*/
|
*/
|
||||||
public class TestSCMChillModeManager {
|
public class TestSCMSafeModeManager {
|
||||||
|
|
||||||
private static EventQueue queue;
|
private static EventQueue queue;
|
||||||
private SCMChillModeManager scmChillModeManager;
|
private SCMSafeModeManager scmSafeModeManager;
|
||||||
private static Configuration config;
|
private static Configuration config;
|
||||||
private List<ContainerInfo> containers;
|
private List<ContainerInfo> containers;
|
||||||
|
|
||||||
@ -74,62 +74,62 @@ public static void setUp() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testChillModeState() throws Exception {
|
public void testSafeModeState() throws Exception {
|
||||||
// Test 1: test for 0 containers
|
// Test 1: test for 0 containers
|
||||||
testChillMode(0);
|
testSafeMode(0);
|
||||||
|
|
||||||
// Test 2: test for 20 containers
|
// Test 2: test for 20 containers
|
||||||
testChillMode(20);
|
testSafeMode(20);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testChillModeStateWithNullContainers() {
|
public void testSafeModeStateWithNullContainers() {
|
||||||
new SCMChillModeManager(config, null, null, queue);
|
new SCMSafeModeManager(config, null, null, queue);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void testChillMode(int numContainers) throws Exception {
|
private void testSafeMode(int numContainers) throws Exception {
|
||||||
containers = new ArrayList<>();
|
containers = new ArrayList<>();
|
||||||
containers.addAll(HddsTestUtils.getContainerInfo(numContainers));
|
containers.addAll(HddsTestUtils.getContainerInfo(numContainers));
|
||||||
// Assign open state to containers to be included in the chill mode
|
// Assign open state to containers to be included in the safe mode
|
||||||
// container list
|
// container list
|
||||||
for (ContainerInfo container : containers) {
|
for (ContainerInfo container : containers) {
|
||||||
container.setState(HddsProtos.LifeCycleState.OPEN);
|
container.setState(HddsProtos.LifeCycleState.OPEN);
|
||||||
}
|
}
|
||||||
scmChillModeManager = new SCMChillModeManager(
|
scmSafeModeManager = new SCMSafeModeManager(
|
||||||
config, containers, null, queue);
|
config, containers, null, queue);
|
||||||
|
|
||||||
assertTrue(scmChillModeManager.getInChillMode());
|
assertTrue(scmSafeModeManager.getInSafeMode());
|
||||||
queue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
queue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
||||||
HddsTestUtils.createNodeRegistrationContainerReport(containers));
|
HddsTestUtils.createNodeRegistrationContainerReport(containers));
|
||||||
GenericTestUtils.waitFor(() -> {
|
GenericTestUtils.waitFor(() -> {
|
||||||
return !scmChillModeManager.getInChillMode();
|
return !scmSafeModeManager.getInSafeMode();
|
||||||
}, 100, 1000 * 5);
|
}, 100, 1000 * 5);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testChillModeExitRule() throws Exception {
|
public void testSafeModeExitRule() throws Exception {
|
||||||
containers = new ArrayList<>();
|
containers = new ArrayList<>();
|
||||||
containers.addAll(HddsTestUtils.getContainerInfo(25 * 4));
|
containers.addAll(HddsTestUtils.getContainerInfo(25 * 4));
|
||||||
// Assign open state to containers to be included in the chill mode
|
// Assign open state to containers to be included in the safe mode
|
||||||
// container list
|
// container list
|
||||||
for (ContainerInfo container : containers) {
|
for (ContainerInfo container : containers) {
|
||||||
container.setState(HddsProtos.LifeCycleState.CLOSED);
|
container.setState(HddsProtos.LifeCycleState.CLOSED);
|
||||||
}
|
}
|
||||||
scmChillModeManager = new SCMChillModeManager(
|
scmSafeModeManager = new SCMSafeModeManager(
|
||||||
config, containers, null, queue);
|
config, containers, null, queue);
|
||||||
|
|
||||||
assertTrue(scmChillModeManager.getInChillMode());
|
assertTrue(scmSafeModeManager.getInSafeMode());
|
||||||
|
|
||||||
testContainerThreshold(containers.subList(0, 25), 0.25);
|
testContainerThreshold(containers.subList(0, 25), 0.25);
|
||||||
assertTrue(scmChillModeManager.getInChillMode());
|
assertTrue(scmSafeModeManager.getInSafeMode());
|
||||||
testContainerThreshold(containers.subList(25, 50), 0.50);
|
testContainerThreshold(containers.subList(25, 50), 0.50);
|
||||||
assertTrue(scmChillModeManager.getInChillMode());
|
assertTrue(scmSafeModeManager.getInSafeMode());
|
||||||
testContainerThreshold(containers.subList(50, 75), 0.75);
|
testContainerThreshold(containers.subList(50, 75), 0.75);
|
||||||
assertTrue(scmChillModeManager.getInChillMode());
|
assertTrue(scmSafeModeManager.getInSafeMode());
|
||||||
testContainerThreshold(containers.subList(75, 100), 1.0);
|
testContainerThreshold(containers.subList(75, 100), 1.0);
|
||||||
|
|
||||||
GenericTestUtils.waitFor(() -> {
|
GenericTestUtils.waitFor(() -> {
|
||||||
return !scmChillModeManager.getInChillMode();
|
return !scmSafeModeManager.getInSafeMode();
|
||||||
}, 100, 1000 * 5);
|
}, 100, 1000 * 5);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -140,24 +140,24 @@ private OzoneConfiguration createConf(double healthyPercent,
|
|||||||
conf.set(HddsConfigKeys.OZONE_METADATA_DIRS,
|
conf.set(HddsConfigKeys.OZONE_METADATA_DIRS,
|
||||||
tempDir.newFolder().toString());
|
tempDir.newFolder().toString());
|
||||||
conf.setBoolean(
|
conf.setBoolean(
|
||||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK,
|
HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_AVAILABILITY_CHECK,
|
||||||
true);
|
true);
|
||||||
conf.setDouble(HddsConfigKeys.
|
conf.setDouble(HddsConfigKeys.
|
||||||
HDDS_SCM_CHILLMODE_HEALTHY_PIPELINE_THRESHOLD_PCT, healthyPercent);
|
HDDS_SCM_SAFEMODE_HEALTHY_PIPELINE_THRESHOLD_PCT, healthyPercent);
|
||||||
conf.setDouble(HddsConfigKeys.
|
conf.setDouble(HddsConfigKeys.
|
||||||
HDDS_SCM_CHILLMODE_ONE_NODE_REPORTED_PIPELINE_PCT, oneReplicaPercent);
|
HDDS_SCM_SAFEMODE_ONE_NODE_REPORTED_PIPELINE_PCT, oneReplicaPercent);
|
||||||
|
|
||||||
return conf;
|
return conf;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testChillModeExitRuleWithPipelineAvailabilityCheck()
|
public void testSafeModeExitRuleWithPipelineAvailabilityCheck()
|
||||||
throws Exception{
|
throws Exception{
|
||||||
testChillModeExitRuleWithPipelineAvailabilityCheck(100, 30, 8, 0.90, 1);
|
testSafeModeExitRuleWithPipelineAvailabilityCheck(100, 30, 8, 0.90, 1);
|
||||||
testChillModeExitRuleWithPipelineAvailabilityCheck(100, 90, 22, 0.10, 0.9);
|
testSafeModeExitRuleWithPipelineAvailabilityCheck(100, 90, 22, 0.10, 0.9);
|
||||||
testChillModeExitRuleWithPipelineAvailabilityCheck(100, 30, 8, 0, 0.9);
|
testSafeModeExitRuleWithPipelineAvailabilityCheck(100, 30, 8, 0, 0.9);
|
||||||
testChillModeExitRuleWithPipelineAvailabilityCheck(100, 90, 22, 0, 0);
|
testSafeModeExitRuleWithPipelineAvailabilityCheck(100, 90, 22, 0, 0);
|
||||||
testChillModeExitRuleWithPipelineAvailabilityCheck(100, 90, 22, 0, 0.5);
|
testSafeModeExitRuleWithPipelineAvailabilityCheck(100, 90, 22, 0, 0.5);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -169,7 +169,7 @@ public void testFailWithIncorrectValueForHealthyPipelinePercent()
|
|||||||
MockNodeManager mockNodeManager = new MockNodeManager(true, 10);
|
MockNodeManager mockNodeManager = new MockNodeManager(true, 10);
|
||||||
PipelineManager pipelineManager = new SCMPipelineManager(conf,
|
PipelineManager pipelineManager = new SCMPipelineManager(conf,
|
||||||
mockNodeManager, queue);
|
mockNodeManager, queue);
|
||||||
scmChillModeManager = new SCMChillModeManager(
|
scmSafeModeManager = new SCMSafeModeManager(
|
||||||
conf, containers, pipelineManager, queue);
|
conf, containers, pipelineManager, queue);
|
||||||
fail("testFailWithIncorrectValueForHealthyPipelinePercent");
|
fail("testFailWithIncorrectValueForHealthyPipelinePercent");
|
||||||
} catch (IllegalArgumentException ex) {
|
} catch (IllegalArgumentException ex) {
|
||||||
@ -187,7 +187,7 @@ public void testFailWithIncorrectValueForOneReplicaPipelinePercent()
|
|||||||
MockNodeManager mockNodeManager = new MockNodeManager(true, 10);
|
MockNodeManager mockNodeManager = new MockNodeManager(true, 10);
|
||||||
PipelineManager pipelineManager = new SCMPipelineManager(conf,
|
PipelineManager pipelineManager = new SCMPipelineManager(conf,
|
||||||
mockNodeManager, queue);
|
mockNodeManager, queue);
|
||||||
scmChillModeManager = new SCMChillModeManager(
|
scmSafeModeManager = new SCMSafeModeManager(
|
||||||
conf, containers, pipelineManager, queue);
|
conf, containers, pipelineManager, queue);
|
||||||
fail("testFailWithIncorrectValueForOneReplicaPipelinePercent");
|
fail("testFailWithIncorrectValueForOneReplicaPipelinePercent");
|
||||||
} catch (IllegalArgumentException ex) {
|
} catch (IllegalArgumentException ex) {
|
||||||
@ -197,16 +197,16 @@ public void testFailWithIncorrectValueForOneReplicaPipelinePercent()
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testFailWithIncorrectValueForChillModePercent() throws Exception {
|
public void testFailWithIncorrectValueForSafeModePercent() throws Exception {
|
||||||
try {
|
try {
|
||||||
OzoneConfiguration conf = createConf(0.9, 0.1);
|
OzoneConfiguration conf = createConf(0.9, 0.1);
|
||||||
conf.setDouble(HddsConfigKeys.HDDS_SCM_CHILLMODE_THRESHOLD_PCT, -1.0);
|
conf.setDouble(HddsConfigKeys.HDDS_SCM_SAFEMODE_THRESHOLD_PCT, -1.0);
|
||||||
MockNodeManager mockNodeManager = new MockNodeManager(true, 10);
|
MockNodeManager mockNodeManager = new MockNodeManager(true, 10);
|
||||||
PipelineManager pipelineManager = new SCMPipelineManager(conf,
|
PipelineManager pipelineManager = new SCMPipelineManager(conf,
|
||||||
mockNodeManager, queue);
|
mockNodeManager, queue);
|
||||||
scmChillModeManager = new SCMChillModeManager(
|
scmSafeModeManager = new SCMSafeModeManager(
|
||||||
conf, containers, pipelineManager, queue);
|
conf, containers, pipelineManager, queue);
|
||||||
fail("testFailWithIncorrectValueForChillModePercent");
|
fail("testFailWithIncorrectValueForSafeModePercent");
|
||||||
} catch (IllegalArgumentException ex) {
|
} catch (IllegalArgumentException ex) {
|
||||||
GenericTestUtils.assertExceptionContains("value should be >= 0.0 and <=" +
|
GenericTestUtils.assertExceptionContains("value should be >= 0.0 and <=" +
|
||||||
" 1.0", ex);
|
" 1.0", ex);
|
||||||
@ -214,7 +214,7 @@ public void testFailWithIncorrectValueForChillModePercent() throws Exception {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void testChillModeExitRuleWithPipelineAvailabilityCheck(
|
public void testSafeModeExitRuleWithPipelineAvailabilityCheck(
|
||||||
int containerCount, int nodeCount, int pipelineCount,
|
int containerCount, int nodeCount, int pipelineCount,
|
||||||
double healthyPipelinePercent, double oneReplicaPercent)
|
double healthyPipelinePercent, double oneReplicaPercent)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
@ -244,20 +244,20 @@ public void testChillModeExitRuleWithPipelineAvailabilityCheck(
|
|||||||
container.setState(HddsProtos.LifeCycleState.CLOSED);
|
container.setState(HddsProtos.LifeCycleState.CLOSED);
|
||||||
}
|
}
|
||||||
|
|
||||||
scmChillModeManager = new SCMChillModeManager(conf, containers,
|
scmSafeModeManager = new SCMSafeModeManager(conf, containers,
|
||||||
pipelineManager, queue);
|
pipelineManager, queue);
|
||||||
|
|
||||||
assertTrue(scmChillModeManager.getInChillMode());
|
assertTrue(scmSafeModeManager.getInSafeMode());
|
||||||
|
|
||||||
testContainerThreshold(containers, 1.0);
|
testContainerThreshold(containers, 1.0);
|
||||||
|
|
||||||
List<Pipeline> pipelines = pipelineManager.getPipelines();
|
List<Pipeline> pipelines = pipelineManager.getPipelines();
|
||||||
|
|
||||||
int healthyPipelineThresholdCount =
|
int healthyPipelineThresholdCount =
|
||||||
scmChillModeManager.getHealthyPipelineChillModeRule()
|
scmSafeModeManager.getHealthyPipelineSafeModeRule()
|
||||||
.getHealthyPipelineThresholdCount();
|
.getHealthyPipelineThresholdCount();
|
||||||
int oneReplicaThresholdCount =
|
int oneReplicaThresholdCount =
|
||||||
scmChillModeManager.getOneReplicaPipelineChillModeRule()
|
scmSafeModeManager.getOneReplicaPipelineSafeModeRule()
|
||||||
.getThresholdCount();
|
.getThresholdCount();
|
||||||
|
|
||||||
// Because even if no pipelines are there, and threshold we set to zero,
|
// Because even if no pipelines are there, and threshold we set to zero,
|
||||||
@ -282,20 +282,20 @@ public void testChillModeExitRuleWithPipelineAvailabilityCheck(
|
|||||||
|
|
||||||
|
|
||||||
GenericTestUtils.waitFor(() -> {
|
GenericTestUtils.waitFor(() -> {
|
||||||
return !scmChillModeManager.getInChillMode();
|
return !scmSafeModeManager.getInSafeMode();
|
||||||
}, 100, 1000 * 5);
|
}, 100, 1000 * 5);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void checkHealthy(int expectedCount) throws Exception{
|
private void checkHealthy(int expectedCount) throws Exception{
|
||||||
GenericTestUtils.waitFor(() -> scmChillModeManager
|
GenericTestUtils.waitFor(() -> scmSafeModeManager
|
||||||
.getHealthyPipelineChillModeRule()
|
.getHealthyPipelineSafeModeRule()
|
||||||
.getCurrentHealthyPipelineCount() == expectedCount,
|
.getCurrentHealthyPipelineCount() == expectedCount,
|
||||||
100, 5000);
|
100, 5000);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void checkOpen(int expectedCount) throws Exception {
|
private void checkOpen(int expectedCount) throws Exception {
|
||||||
GenericTestUtils.waitFor(() -> scmChillModeManager
|
GenericTestUtils.waitFor(() -> scmSafeModeManager
|
||||||
.getOneReplicaPipelineChillModeRule()
|
.getOneReplicaPipelineSafeModeRule()
|
||||||
.getCurrentReportedPipelineCount() == expectedCount,
|
.getCurrentReportedPipelineCount() == expectedCount,
|
||||||
1000, 5000);
|
1000, 5000);
|
||||||
}
|
}
|
||||||
@ -313,31 +313,31 @@ private void firePipelineEvent(Pipeline pipeline) throws Exception {
|
|||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testDisableChillMode() {
|
public void testDisableSafeMode() {
|
||||||
OzoneConfiguration conf = new OzoneConfiguration(config);
|
OzoneConfiguration conf = new OzoneConfiguration(config);
|
||||||
conf.setBoolean(HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED, false);
|
conf.setBoolean(HddsConfigKeys.HDDS_SCM_SAFEMODE_ENABLED, false);
|
||||||
PipelineManager pipelineManager = Mockito.mock(PipelineManager.class);
|
PipelineManager pipelineManager = Mockito.mock(PipelineManager.class);
|
||||||
Mockito.doNothing().when(pipelineManager).startPipelineCreator();
|
Mockito.doNothing().when(pipelineManager).startPipelineCreator();
|
||||||
scmChillModeManager =
|
scmSafeModeManager =
|
||||||
new SCMChillModeManager(conf, containers, pipelineManager, queue);
|
new SCMSafeModeManager(conf, containers, pipelineManager, queue);
|
||||||
assertFalse(scmChillModeManager.getInChillMode());
|
assertFalse(scmSafeModeManager.getInSafeMode());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testChillModeDataNodeExitRule() throws Exception {
|
public void testSafeModeDataNodeExitRule() throws Exception {
|
||||||
containers = new ArrayList<>();
|
containers = new ArrayList<>();
|
||||||
testChillModeDataNodes(0);
|
testSafeModeDataNodes(0);
|
||||||
testChillModeDataNodes(3);
|
testSafeModeDataNodes(3);
|
||||||
testChillModeDataNodes(5);
|
testSafeModeDataNodes(5);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check that containers in Allocated state are not considered while
|
* Check that containers in Allocated state are not considered while
|
||||||
* computing percentage of containers with at least 1 reported replica in
|
* computing percentage of containers with at least 1 reported replica in
|
||||||
* chill mode exit rule.
|
* safe mode exit rule.
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testContainerChillModeRule() throws Exception {
|
public void testContainerSafeModeRule() throws Exception {
|
||||||
containers = new ArrayList<>();
|
containers = new ArrayList<>();
|
||||||
// Add 100 containers to the list of containers in SCM
|
// Add 100 containers to the list of containers in SCM
|
||||||
containers.addAll(HddsTestUtils.getContainerInfo(25 * 4));
|
containers.addAll(HddsTestUtils.getContainerInfo(25 * 4));
|
||||||
@ -350,55 +350,55 @@ public void testContainerChillModeRule() throws Exception {
|
|||||||
container.setState(HddsProtos.LifeCycleState.OPEN);
|
container.setState(HddsProtos.LifeCycleState.OPEN);
|
||||||
}
|
}
|
||||||
|
|
||||||
scmChillModeManager = new SCMChillModeManager(
|
scmSafeModeManager = new SCMSafeModeManager(
|
||||||
config, containers, null, queue);
|
config, containers, null, queue);
|
||||||
|
|
||||||
assertTrue(scmChillModeManager.getInChillMode());
|
assertTrue(scmSafeModeManager.getInSafeMode());
|
||||||
|
|
||||||
// When 10 CLOSED containers are reported by DNs, the computed container
|
// When 10 CLOSED containers are reported by DNs, the computed container
|
||||||
// threshold should be 10/25 as there are only 25 CLOSED containers.
|
// threshold should be 10/25 as there are only 25 CLOSED containers.
|
||||||
// Containers in OPEN state should not contribute towards list of
|
// Containers in OPEN state should not contribute towards list of
|
||||||
// containers while calculating container threshold in SCMChillNodeManager
|
// containers while calculating container threshold in SCMSafeNodeManager
|
||||||
testContainerThreshold(containers.subList(0, 10), 0.4);
|
testContainerThreshold(containers.subList(0, 10), 0.4);
|
||||||
assertTrue(scmChillModeManager.getInChillMode());
|
assertTrue(scmSafeModeManager.getInSafeMode());
|
||||||
|
|
||||||
// When remaining 15 OPEN containers are reported by DNs, the container
|
// When remaining 15 OPEN containers are reported by DNs, the container
|
||||||
// threshold should be (10+15)/25.
|
// threshold should be (10+15)/25.
|
||||||
testContainerThreshold(containers.subList(10, 25), 1.0);
|
testContainerThreshold(containers.subList(10, 25), 1.0);
|
||||||
|
|
||||||
GenericTestUtils.waitFor(() -> {
|
GenericTestUtils.waitFor(() -> {
|
||||||
return !scmChillModeManager.getInChillMode();
|
return !scmSafeModeManager.getInSafeMode();
|
||||||
}, 100, 1000 * 5);
|
}, 100, 1000 * 5);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void testChillModeDataNodes(int numOfDns) throws Exception {
|
private void testSafeModeDataNodes(int numOfDns) throws Exception {
|
||||||
OzoneConfiguration conf = new OzoneConfiguration(config);
|
OzoneConfiguration conf = new OzoneConfiguration(config);
|
||||||
conf.setInt(HddsConfigKeys.HDDS_SCM_CHILLMODE_MIN_DATANODE, numOfDns);
|
conf.setInt(HddsConfigKeys.HDDS_SCM_SAFEMODE_MIN_DATANODE, numOfDns);
|
||||||
scmChillModeManager = new SCMChillModeManager(
|
scmSafeModeManager = new SCMSafeModeManager(
|
||||||
conf, containers, null, queue);
|
conf, containers, null, queue);
|
||||||
|
|
||||||
// Assert SCM is in Chill mode.
|
// Assert SCM is in Safe mode.
|
||||||
assertTrue(scmChillModeManager.getInChillMode());
|
assertTrue(scmSafeModeManager.getInSafeMode());
|
||||||
|
|
||||||
// Register all DataNodes except last one and assert SCM is in chill mode.
|
// Register all DataNodes except last one and assert SCM is in safe mode.
|
||||||
for (int i = 0; i < numOfDns-1; i++) {
|
for (int i = 0; i < numOfDns-1; i++) {
|
||||||
queue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
queue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
||||||
HddsTestUtils.createNodeRegistrationContainerReport(containers));
|
HddsTestUtils.createNodeRegistrationContainerReport(containers));
|
||||||
assertTrue(scmChillModeManager.getInChillMode());
|
assertTrue(scmSafeModeManager.getInSafeMode());
|
||||||
assertTrue(scmChillModeManager.getCurrentContainerThreshold() == 1);
|
assertTrue(scmSafeModeManager.getCurrentContainerThreshold() == 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(numOfDns == 0){
|
if(numOfDns == 0){
|
||||||
GenericTestUtils.waitFor(() -> {
|
GenericTestUtils.waitFor(() -> {
|
||||||
return scmChillModeManager.getInChillMode();
|
return scmSafeModeManager.getInSafeMode();
|
||||||
}, 10, 1000 * 10);
|
}, 10, 1000 * 10);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// Register last DataNode and check that SCM is out of Chill mode.
|
// Register last DataNode and check that SCM is out of Safe mode.
|
||||||
queue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
queue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
||||||
HddsTestUtils.createNodeRegistrationContainerReport(containers));
|
HddsTestUtils.createNodeRegistrationContainerReport(containers));
|
||||||
GenericTestUtils.waitFor(() -> {
|
GenericTestUtils.waitFor(() -> {
|
||||||
return !scmChillModeManager.getInChillMode();
|
return !scmSafeModeManager.getInSafeMode();
|
||||||
}, 10, 1000 * 10);
|
}, 10, 1000 * 10);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -408,23 +408,23 @@ private void testContainerThreshold(List<ContainerInfo> dnContainers,
|
|||||||
queue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
queue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
||||||
HddsTestUtils.createNodeRegistrationContainerReport(dnContainers));
|
HddsTestUtils.createNodeRegistrationContainerReport(dnContainers));
|
||||||
GenericTestUtils.waitFor(() -> {
|
GenericTestUtils.waitFor(() -> {
|
||||||
double threshold = scmChillModeManager.getCurrentContainerThreshold();
|
double threshold = scmSafeModeManager.getCurrentContainerThreshold();
|
||||||
return threshold == expectedThreshold;
|
return threshold == expectedThreshold;
|
||||||
}, 100, 2000 * 9);
|
}, 100, 2000 * 9);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testChillModePipelineExitRule() throws Exception {
|
public void testSafeModePipelineExitRule() throws Exception {
|
||||||
containers = new ArrayList<>();
|
containers = new ArrayList<>();
|
||||||
containers.addAll(HddsTestUtils.getContainerInfo(25 * 4));
|
containers.addAll(HddsTestUtils.getContainerInfo(25 * 4));
|
||||||
String storageDir = GenericTestUtils.getTempPath(
|
String storageDir = GenericTestUtils.getTempPath(
|
||||||
TestSCMChillModeManager.class.getName() + UUID.randomUUID());
|
TestSCMSafeModeManager.class.getName() + UUID.randomUUID());
|
||||||
try{
|
try{
|
||||||
MockNodeManager nodeManager = new MockNodeManager(true, 3);
|
MockNodeManager nodeManager = new MockNodeManager(true, 3);
|
||||||
config.set(HddsConfigKeys.OZONE_METADATA_DIRS, storageDir);
|
config.set(HddsConfigKeys.OZONE_METADATA_DIRS, storageDir);
|
||||||
// enable pipeline check
|
// enable pipeline check
|
||||||
config.setBoolean(
|
config.setBoolean(
|
||||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK, true);
|
HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_AVAILABILITY_CHECK, true);
|
||||||
|
|
||||||
SCMPipelineManager pipelineManager = new SCMPipelineManager(config,
|
SCMPipelineManager pipelineManager = new SCMPipelineManager(config,
|
||||||
nodeManager, queue);
|
nodeManager, queue);
|
||||||
@ -443,12 +443,12 @@ public void testChillModePipelineExitRule() throws Exception {
|
|||||||
reportBuilder.addPipelineReport(PipelineReport.newBuilder()
|
reportBuilder.addPipelineReport(PipelineReport.newBuilder()
|
||||||
.setPipelineID(pipeline.getId().getProtobuf()));
|
.setPipelineID(pipeline.getId().getProtobuf()));
|
||||||
|
|
||||||
scmChillModeManager = new SCMChillModeManager(
|
scmSafeModeManager = new SCMSafeModeManager(
|
||||||
config, containers, pipelineManager, queue);
|
config, containers, pipelineManager, queue);
|
||||||
|
|
||||||
queue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
queue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
|
||||||
HddsTestUtils.createNodeRegistrationContainerReport(containers));
|
HddsTestUtils.createNodeRegistrationContainerReport(containers));
|
||||||
assertTrue(scmChillModeManager.getInChillMode());
|
assertTrue(scmSafeModeManager.getInSafeMode());
|
||||||
|
|
||||||
// Trigger the processed pipeline report event
|
// Trigger the processed pipeline report event
|
||||||
queue.fireEvent(SCMEvents.PROCESSED_PIPELINE_REPORT,
|
queue.fireEvent(SCMEvents.PROCESSED_PIPELINE_REPORT,
|
||||||
@ -456,12 +456,12 @@ public void testChillModePipelineExitRule() throws Exception {
|
|||||||
reportBuilder.build()));
|
reportBuilder.build()));
|
||||||
|
|
||||||
GenericTestUtils.waitFor(() -> {
|
GenericTestUtils.waitFor(() -> {
|
||||||
return !scmChillModeManager.getInChillMode();
|
return !scmSafeModeManager.getInSafeMode();
|
||||||
}, 100, 1000 * 10);
|
}, 100, 1000 * 10);
|
||||||
pipelineManager.close();
|
pipelineManager.close();
|
||||||
} finally {
|
} finally {
|
||||||
config.setBoolean(
|
config.setBoolean(
|
||||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK,
|
HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_AVAILABILITY_CHECK,
|
||||||
false);
|
false);
|
||||||
FileUtil.fullyDelete(new File(storageDir));
|
FileUtil.fullyDelete(new File(storageDir));
|
||||||
}
|
}
|
@ -16,7 +16,7 @@
|
|||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.apache.hadoop.hdds.scm.chillmode;
|
package org.apache.hadoop.hdds.scm.safemode;
|
||||||
|
|
||||||
import org.apache.hadoop.hdds.HddsConfigKeys;
|
import org.apache.hadoop.hdds.HddsConfigKeys;
|
||||||
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
|
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
|
||||||
@ -39,25 +39,25 @@
|
|||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests ChillModeHandler behavior.
|
* Tests SafeModeHandler behavior.
|
||||||
*/
|
*/
|
||||||
public class TestChillModeHandler {
|
public class TestSafeModeHandler {
|
||||||
|
|
||||||
|
|
||||||
private OzoneConfiguration configuration;
|
private OzoneConfiguration configuration;
|
||||||
private SCMClientProtocolServer scmClientProtocolServer;
|
private SCMClientProtocolServer scmClientProtocolServer;
|
||||||
private ReplicationManager replicationManager;
|
private ReplicationManager replicationManager;
|
||||||
private BlockManager blockManager;
|
private BlockManager blockManager;
|
||||||
private ChillModeHandler chillModeHandler;
|
private SafeModeHandler safeModeHandler;
|
||||||
private EventQueue eventQueue;
|
private EventQueue eventQueue;
|
||||||
private SCMChillModeManager.ChillModeStatus chillModeStatus;
|
private SCMSafeModeManager.SafeModeStatus safeModeStatus;
|
||||||
private PipelineManager scmPipelineManager;
|
private PipelineManager scmPipelineManager;
|
||||||
|
|
||||||
public void setup(boolean enabled) {
|
public void setup(boolean enabled) {
|
||||||
configuration = new OzoneConfiguration();
|
configuration = new OzoneConfiguration();
|
||||||
configuration.setBoolean(HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED,
|
configuration.setBoolean(HddsConfigKeys.HDDS_SCM_SAFEMODE_ENABLED,
|
||||||
enabled);
|
enabled);
|
||||||
configuration.set(HddsConfigKeys.HDDS_SCM_WAIT_TIME_AFTER_CHILL_MODE_EXIT,
|
configuration.set(HddsConfigKeys.HDDS_SCM_WAIT_TIME_AFTER_SAFE_MODE_EXIT,
|
||||||
"3s");
|
"3s");
|
||||||
scmClientProtocolServer =
|
scmClientProtocolServer =
|
||||||
Mockito.mock(SCMClientProtocolServer.class);
|
Mockito.mock(SCMClientProtocolServer.class);
|
||||||
@ -71,45 +71,45 @@ public void setup(boolean enabled) {
|
|||||||
eventQueue);
|
eventQueue);
|
||||||
scmPipelineManager = Mockito.mock(SCMPipelineManager.class);
|
scmPipelineManager = Mockito.mock(SCMPipelineManager.class);
|
||||||
blockManager = Mockito.mock(BlockManagerImpl.class);
|
blockManager = Mockito.mock(BlockManagerImpl.class);
|
||||||
chillModeHandler =
|
safeModeHandler =
|
||||||
new ChillModeHandler(configuration, scmClientProtocolServer,
|
new SafeModeHandler(configuration, scmClientProtocolServer,
|
||||||
blockManager, replicationManager, scmPipelineManager);
|
blockManager, replicationManager, scmPipelineManager);
|
||||||
|
|
||||||
eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS, chillModeHandler);
|
eventQueue.addHandler(SCMEvents.SAFE_MODE_STATUS, safeModeHandler);
|
||||||
chillModeStatus = new SCMChillModeManager.ChillModeStatus(false);
|
safeModeStatus = new SCMSafeModeManager.SafeModeStatus(false);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testChillModeHandlerWithChillModeEnabled() throws Exception {
|
public void testSafeModeHandlerWithSafeModeEnabled() throws Exception {
|
||||||
setup(true);
|
setup(true);
|
||||||
|
|
||||||
Assert.assertTrue(chillModeHandler.getChillModeStatus());
|
Assert.assertTrue(safeModeHandler.getSafeModeStatus());
|
||||||
|
|
||||||
eventQueue.fireEvent(SCMEvents.CHILL_MODE_STATUS, chillModeStatus);
|
eventQueue.fireEvent(SCMEvents.SAFE_MODE_STATUS, safeModeStatus);
|
||||||
|
|
||||||
GenericTestUtils.waitFor(() -> !chillModeHandler.getChillModeStatus(),
|
GenericTestUtils.waitFor(() -> !safeModeHandler.getSafeModeStatus(),
|
||||||
1000, 5000);
|
1000, 5000);
|
||||||
|
|
||||||
Assert.assertFalse(scmClientProtocolServer.getChillModeStatus());
|
Assert.assertFalse(scmClientProtocolServer.getSafeModeStatus());
|
||||||
Assert.assertFalse(((BlockManagerImpl) blockManager).isScmInChillMode());
|
Assert.assertFalse(((BlockManagerImpl) blockManager).isScmInSafeMode());
|
||||||
GenericTestUtils.waitFor(() ->
|
GenericTestUtils.waitFor(() ->
|
||||||
replicationManager.isRunning(), 1000, 5000);
|
replicationManager.isRunning(), 1000, 5000);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testChillModeHandlerWithChillModeDisbaled() throws Exception{
|
public void testSafeModeHandlerWithSafeModeDisbaled() throws Exception{
|
||||||
|
|
||||||
setup(false);
|
setup(false);
|
||||||
|
|
||||||
Assert.assertFalse(chillModeHandler.getChillModeStatus());
|
Assert.assertFalse(safeModeHandler.getSafeModeStatus());
|
||||||
|
|
||||||
eventQueue.fireEvent(SCMEvents.CHILL_MODE_STATUS, chillModeStatus);
|
eventQueue.fireEvent(SCMEvents.SAFE_MODE_STATUS, safeModeStatus);
|
||||||
|
|
||||||
Assert.assertFalse(chillModeHandler.getChillModeStatus());
|
Assert.assertFalse(safeModeHandler.getSafeModeStatus());
|
||||||
Assert.assertFalse(scmClientProtocolServer.getChillModeStatus());
|
Assert.assertFalse(scmClientProtocolServer.getSafeModeStatus());
|
||||||
Assert.assertFalse(((BlockManagerImpl) blockManager).isScmInChillMode());
|
Assert.assertFalse(((BlockManagerImpl) blockManager).isScmInSafeMode());
|
||||||
GenericTestUtils.waitFor(() ->
|
GenericTestUtils.waitFor(() ->
|
||||||
replicationManager.isRunning(), 1000, 5000);
|
replicationManager.isRunning(), 1000, 5000);
|
||||||
}
|
}
|
@ -15,7 +15,7 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hdds.scm.chillmode;
|
package org.apache.hadoop.hdds.scm.safemode;
|
||||||
/**
|
/**
|
||||||
* SCM Chill mode tests.
|
* SCM Safe mode tests.
|
||||||
*/
|
*/
|
@ -23,7 +23,7 @@
|
|||||||
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType;
|
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType;
|
||||||
import org.apache.hadoop.hdds.scm.block.BlockManager;
|
import org.apache.hadoop.hdds.scm.block.BlockManager;
|
||||||
import org.apache.hadoop.hdds.scm.block.BlockManagerImpl;
|
import org.apache.hadoop.hdds.scm.block.BlockManagerImpl;
|
||||||
import org.apache.hadoop.hdds.scm.chillmode.ChillModeHandler;
|
import org.apache.hadoop.hdds.scm.safemode.SafeModeHandler;
|
||||||
import org.apache.hadoop.hdds.scm.container.ReplicationManager;
|
import org.apache.hadoop.hdds.scm.container.ReplicationManager;
|
||||||
import org.apache.hadoop.hdds.scm.events.SCMEvents;
|
import org.apache.hadoop.hdds.scm.events.SCMEvents;
|
||||||
import org.apache.hadoop.hdds.scm.exceptions.SCMException;
|
import org.apache.hadoop.hdds.scm.exceptions.SCMException;
|
||||||
@ -53,10 +53,10 @@ public void setUp() throws Exception {
|
|||||||
ReplicationManager replicationManager =
|
ReplicationManager replicationManager =
|
||||||
Mockito.mock(ReplicationManager.class);
|
Mockito.mock(ReplicationManager.class);
|
||||||
PipelineManager pipelineManager = Mockito.mock(SCMPipelineManager.class);
|
PipelineManager pipelineManager = Mockito.mock(SCMPipelineManager.class);
|
||||||
ChillModeHandler chillModeHandler = new ChillModeHandler(config,
|
SafeModeHandler safeModeHandler = new SafeModeHandler(config,
|
||||||
scmClientProtocolServer, blockManager, replicationManager,
|
scmClientProtocolServer, blockManager, replicationManager,
|
||||||
pipelineManager);
|
pipelineManager);
|
||||||
eventQueue.addHandler(SCMEvents.CHILL_MODE_STATUS, chillModeHandler);
|
eventQueue.addHandler(SCMEvents.SAFE_MODE_STATUS, safeModeHandler);
|
||||||
}
|
}
|
||||||
|
|
||||||
@After
|
@After
|
||||||
@ -64,9 +64,9 @@ public void tearDown() throws Exception {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testAllocateContainerFailureInChillMode() throws Exception {
|
public void testAllocateContainerFailureInSafeMode() throws Exception {
|
||||||
LambdaTestUtils.intercept(SCMException.class,
|
LambdaTestUtils.intercept(SCMException.class,
|
||||||
"hillModePrecheck failed for allocateContainer", () -> {
|
"SafeModePrecheck failed for allocateContainer", () -> {
|
||||||
scmClientProtocolServer.allocateContainer(
|
scmClientProtocolServer.allocateContainer(
|
||||||
ReplicationType.STAND_ALONE, ReplicationFactor.ONE, "");
|
ReplicationType.STAND_ALONE, ReplicationFactor.ONE, "");
|
||||||
});
|
});
|
||||||
|
@ -77,7 +77,7 @@
|
|||||||
+ "operations.",
|
+ "operations.",
|
||||||
versionProvider = HddsVersionProvider.class,
|
versionProvider = HddsVersionProvider.class,
|
||||||
subcommands = {
|
subcommands = {
|
||||||
ChillModeCommands.class,
|
SafeModeCommands.class,
|
||||||
ListSubcommand.class,
|
ListSubcommand.class,
|
||||||
InfoSubcommand.class,
|
InfoSubcommand.class,
|
||||||
DeleteSubcommand.class,
|
DeleteSubcommand.class,
|
||||||
|
@ -28,32 +28,32 @@
|
|||||||
import picocli.CommandLine.ParentCommand;
|
import picocli.CommandLine.ParentCommand;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This is the handler that process chill mode check command.
|
* This is the handler that process safe mode check command.
|
||||||
*/
|
*/
|
||||||
@Command(
|
@Command(
|
||||||
name = "status",
|
name = "status",
|
||||||
description = "Check if SCM is in chill mode",
|
description = "Check if SCM is in safe mode",
|
||||||
mixinStandardHelpOptions = true,
|
mixinStandardHelpOptions = true,
|
||||||
versionProvider = HddsVersionProvider.class)
|
versionProvider = HddsVersionProvider.class)
|
||||||
public class ChillModeCheckSubcommand implements Callable<Void> {
|
public class SafeModeCheckSubcommand implements Callable<Void> {
|
||||||
|
|
||||||
private static final Logger LOG =
|
private static final Logger LOG =
|
||||||
LoggerFactory.getLogger(ChillModeCheckSubcommand.class);
|
LoggerFactory.getLogger(SafeModeCheckSubcommand.class);
|
||||||
|
|
||||||
@ParentCommand
|
@ParentCommand
|
||||||
private ChillModeCommands parent;
|
private SafeModeCommands parent;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Void call() throws Exception {
|
public Void call() throws Exception {
|
||||||
try (ScmClient scmClient = parent.getParent().createScmClient()) {
|
try (ScmClient scmClient = parent.getParent().createScmClient()) {
|
||||||
|
|
||||||
boolean execReturn = scmClient.inChillMode();
|
boolean execReturn = scmClient.inSafeMode();
|
||||||
|
|
||||||
// Output data list
|
// Output data list
|
||||||
if(execReturn){
|
if(execReturn){
|
||||||
LOG.info("SCM is in chill mode.");
|
LOG.info("SCM is in safe mode.");
|
||||||
} else {
|
} else {
|
||||||
LOG.info("SCM is out of chill mode.");
|
LOG.info("SCM is out of safe mode.");
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
@ -28,21 +28,21 @@
|
|||||||
import picocli.CommandLine.ParentCommand;
|
import picocli.CommandLine.ParentCommand;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Subcommand to group chill mode related operations.
|
* Subcommand to group safe mode related operations.
|
||||||
*/
|
*/
|
||||||
@Command(
|
@Command(
|
||||||
name = "chillmode",
|
name = "safemode",
|
||||||
description = "Chill mode specific operations",
|
description = "Safe mode specific operations",
|
||||||
mixinStandardHelpOptions = true,
|
mixinStandardHelpOptions = true,
|
||||||
versionProvider = HddsVersionProvider.class,
|
versionProvider = HddsVersionProvider.class,
|
||||||
subcommands = {
|
subcommands = {
|
||||||
ChillModeCheckSubcommand.class,
|
SafeModeCheckSubcommand.class,
|
||||||
ChillModeExitSubcommand.class,
|
SafeModeExitSubcommand.class,
|
||||||
})
|
})
|
||||||
public class ChillModeCommands implements Callable<Void> {
|
public class SafeModeCommands implements Callable<Void> {
|
||||||
|
|
||||||
private static final Logger LOG =
|
private static final Logger LOG =
|
||||||
LoggerFactory.getLogger(ChillModeCommands.class);
|
LoggerFactory.getLogger(SafeModeCommands.class);
|
||||||
|
|
||||||
@ParentCommand
|
@ParentCommand
|
||||||
private SCMCLI parent;
|
private SCMCLI parent;
|
||||||
@ -54,7 +54,7 @@ public SCMCLI getParent() {
|
|||||||
@Override
|
@Override
|
||||||
public Void call() throws Exception {
|
public Void call() throws Exception {
|
||||||
throw new MissingSubcommandException(
|
throw new MissingSubcommandException(
|
||||||
this.parent.getCmd().getSubcommands().get("chillmode").
|
this.parent.getCmd().getSubcommands().get("safemode").
|
||||||
getUsageMessage());
|
getUsageMessage());
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -28,28 +28,28 @@
|
|||||||
import picocli.CommandLine.ParentCommand;
|
import picocli.CommandLine.ParentCommand;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This is the handler that process chill mode exit command.
|
* This is the handler that process safe mode exit command.
|
||||||
*/
|
*/
|
||||||
@Command(
|
@Command(
|
||||||
name = "exit",
|
name = "exit",
|
||||||
description = "Force SCM out of chill mode",
|
description = "Force SCM out of safe mode",
|
||||||
mixinStandardHelpOptions = true,
|
mixinStandardHelpOptions = true,
|
||||||
versionProvider = HddsVersionProvider.class)
|
versionProvider = HddsVersionProvider.class)
|
||||||
public class ChillModeExitSubcommand implements Callable<Void> {
|
public class SafeModeExitSubcommand implements Callable<Void> {
|
||||||
|
|
||||||
private static final Logger LOG =
|
private static final Logger LOG =
|
||||||
LoggerFactory.getLogger(ChillModeExitSubcommand.class);
|
LoggerFactory.getLogger(SafeModeExitSubcommand.class);
|
||||||
|
|
||||||
@ParentCommand
|
@ParentCommand
|
||||||
private ChillModeCommands parent;
|
private SafeModeCommands parent;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Void call() throws Exception {
|
public Void call() throws Exception {
|
||||||
try (ScmClient scmClient = parent.getParent().createScmClient()) {
|
try (ScmClient scmClient = parent.getParent().createScmClient()) {
|
||||||
|
|
||||||
boolean execReturn = scmClient.forceExitChillMode();
|
boolean execReturn = scmClient.forceExitSafeMode();
|
||||||
if(execReturn){
|
if(execReturn){
|
||||||
LOG.info("SCM exit chill mode successfully.");
|
LOG.info("SCM exit safe mode successfully.");
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
@ -177,7 +177,7 @@ public enum ResultCodes {
|
|||||||
|
|
||||||
LIST_MULTIPART_UPLOAD_PARTS_FAILED,
|
LIST_MULTIPART_UPLOAD_PARTS_FAILED,
|
||||||
|
|
||||||
SCM_IN_CHILL_MODE,
|
SCM_IN_SAFE_MODE,
|
||||||
|
|
||||||
INVALID_REQUEST,
|
INVALID_REQUEST,
|
||||||
|
|
||||||
|
@ -243,7 +243,7 @@ enum Status {
|
|||||||
TOKEN_EXPIRED = 35;
|
TOKEN_EXPIRED = 35;
|
||||||
TOKEN_ERROR_OTHER = 36;
|
TOKEN_ERROR_OTHER = 36;
|
||||||
LIST_MULTIPART_UPLOAD_PARTS_FAILED = 37;
|
LIST_MULTIPART_UPLOAD_PARTS_FAILED = 37;
|
||||||
SCM_IN_CHILL_MODE = 38;
|
SCM_IN_SAFE_MODE = 38;
|
||||||
INVALID_REQUEST = 39;
|
INVALID_REQUEST = 39;
|
||||||
|
|
||||||
BUCKET_ENCRYPTION_KEY_NOT_FOUND = 40;
|
BUCKET_ENCRYPTION_KEY_NOT_FOUND = 40;
|
||||||
|
@ -75,7 +75,7 @@ public void setup() throws Exception {
|
|||||||
ScmConfigKeys.OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT_DEFAULT);
|
ScmConfigKeys.OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT_DEFAULT);
|
||||||
cluster = MiniOzoneCluster.newBuilder(conf).setNumDatanodes(1).build();
|
cluster = MiniOzoneCluster.newBuilder(conf).setNumDatanodes(1).build();
|
||||||
cluster.waitForClusterToBeReady();
|
cluster.waitForClusterToBeReady();
|
||||||
cluster.waitTobeOutOfChillMode();
|
cluster.waitTobeOutOfSafeMode();
|
||||||
xceiverClientManager = new XceiverClientManager(conf);
|
xceiverClientManager = new XceiverClientManager(conf);
|
||||||
scm = cluster.getStorageContainerManager();
|
scm = cluster.getStorageContainerManager();
|
||||||
containerManager = scm.getContainerManager();
|
containerManager = scm.getContainerManager();
|
||||||
|
@ -28,7 +28,7 @@
|
|||||||
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
|
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
|
||||||
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
|
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
|
||||||
import org.apache.hadoop.hdds.scm.TestUtils;
|
import org.apache.hadoop.hdds.scm.TestUtils;
|
||||||
import org.apache.hadoop.hdds.scm.chillmode.SCMChillModeManager;
|
import org.apache.hadoop.hdds.scm.safemode.SCMSafeModeManager;
|
||||||
import org.apache.hadoop.hdds.scm.container.ContainerID;
|
import org.apache.hadoop.hdds.scm.container.ContainerID;
|
||||||
import org.apache.hadoop.hdds.scm.container.MockNodeManager;
|
import org.apache.hadoop.hdds.scm.container.MockNodeManager;
|
||||||
import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher.PipelineReportFromDatanode;
|
import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher.PipelineReportFromDatanode;
|
||||||
@ -158,8 +158,8 @@ public void testPipelineReport() throws IOException {
|
|||||||
pipelineManager.setPipelineProvider(HddsProtos.ReplicationType.RATIS,
|
pipelineManager.setPipelineProvider(HddsProtos.ReplicationType.RATIS,
|
||||||
mockRatisProvider);
|
mockRatisProvider);
|
||||||
|
|
||||||
SCMChillModeManager scmChillModeManager =
|
SCMSafeModeManager scmSafeModeManager =
|
||||||
new SCMChillModeManager(new OzoneConfiguration(),
|
new SCMSafeModeManager(new OzoneConfiguration(),
|
||||||
new ArrayList<>(), pipelineManager, eventQueue);
|
new ArrayList<>(), pipelineManager, eventQueue);
|
||||||
|
|
||||||
// create a pipeline in allocated state with no dns yet reported
|
// create a pipeline in allocated state with no dns yet reported
|
||||||
@ -173,7 +173,7 @@ public void testPipelineReport() throws IOException {
|
|||||||
|
|
||||||
// get pipeline report from each dn in the pipeline
|
// get pipeline report from each dn in the pipeline
|
||||||
PipelineReportHandler pipelineReportHandler =
|
PipelineReportHandler pipelineReportHandler =
|
||||||
new PipelineReportHandler(scmChillModeManager, pipelineManager, conf);
|
new PipelineReportHandler(scmSafeModeManager, pipelineManager, conf);
|
||||||
for (DatanodeDetails dn: pipeline.getNodes()) {
|
for (DatanodeDetails dn: pipeline.getNodes()) {
|
||||||
PipelineReportFromDatanode pipelineReportFromDatanode =
|
PipelineReportFromDatanode pipelineReportFromDatanode =
|
||||||
TestUtils.getPipelineReportFromDatanode(dn, pipeline.getId());
|
TestUtils.getPipelineReportFromDatanode(dn, pipeline.getId());
|
||||||
|
@ -16,7 +16,7 @@
|
|||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.apache.hadoop.hdds.scm.chillmode;
|
package org.apache.hadoop.hdds.scm.safemode;
|
||||||
|
|
||||||
import org.apache.hadoop.hdds.HddsConfigKeys;
|
import org.apache.hadoop.hdds.HddsConfigKeys;
|
||||||
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
|
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
|
||||||
@ -41,10 +41,10 @@
|
|||||||
import static org.junit.Assert.fail;
|
import static org.junit.Assert.fail;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class tests SCM Chill mode with pipeline rules.
|
* This class tests SCM Safe mode with pipeline rules.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class TestSCMChillModeWithPipelineRules {
|
public class TestSCMSafeModeWithPipelineRules {
|
||||||
|
|
||||||
private static MiniOzoneCluster cluster;
|
private static MiniOzoneCluster cluster;
|
||||||
private OzoneConfiguration conf = new OzoneConfiguration();
|
private OzoneConfiguration conf = new OzoneConfiguration();
|
||||||
@ -58,9 +58,9 @@ public void setup(int numDatanodes) throws Exception {
|
|||||||
conf.set(HddsConfigKeys.OZONE_METADATA_DIRS,
|
conf.set(HddsConfigKeys.OZONE_METADATA_DIRS,
|
||||||
temporaryFolder.newFolder().toString());
|
temporaryFolder.newFolder().toString());
|
||||||
conf.setBoolean(
|
conf.setBoolean(
|
||||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK,
|
HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_AVAILABILITY_CHECK,
|
||||||
true);
|
true);
|
||||||
conf.set(HddsConfigKeys.HDDS_SCM_WAIT_TIME_AFTER_CHILL_MODE_EXIT, "10s");
|
conf.set(HddsConfigKeys.HDDS_SCM_WAIT_TIME_AFTER_SAFE_MODE_EXIT, "10s");
|
||||||
conf.set(ScmConfigKeys.OZONE_SCM_PIPELINE_CREATION_INTERVAL, "10s");
|
conf.set(ScmConfigKeys.OZONE_SCM_PIPELINE_CREATION_INTERVAL, "10s");
|
||||||
clusterBuilder = MiniOzoneCluster.newBuilder(conf)
|
clusterBuilder = MiniOzoneCluster.newBuilder(conf)
|
||||||
.setNumDatanodes(numDatanodes)
|
.setNumDatanodes(numDatanodes)
|
||||||
@ -75,7 +75,7 @@ public void setup(int numDatanodes) throws Exception {
|
|||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testScmChillMode() throws Exception {
|
public void testScmSafeMode() throws Exception {
|
||||||
|
|
||||||
int datanodeCount = 6;
|
int datanodeCount = 6;
|
||||||
setup(datanodeCount);
|
setup(datanodeCount);
|
||||||
@ -106,24 +106,24 @@ public void testScmChillMode() throws Exception {
|
|||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
SCMChillModeManager scmChillModeManager =
|
SCMSafeModeManager scmSafeModeManager =
|
||||||
cluster.getStorageContainerManager().getScmChillModeManager();
|
cluster.getStorageContainerManager().getScmSafeModeManager();
|
||||||
|
|
||||||
|
|
||||||
// Ceil(0.1 * 2) is 1, as one pipeline is healthy healthy pipeline rule is
|
// Ceil(0.1 * 2) is 1, as one pipeline is healthy healthy pipeline rule is
|
||||||
// satisfied
|
// satisfied
|
||||||
|
|
||||||
GenericTestUtils.waitFor(() ->
|
GenericTestUtils.waitFor(() ->
|
||||||
scmChillModeManager.getHealthyPipelineChillModeRule()
|
scmSafeModeManager.getHealthyPipelineSafeModeRule()
|
||||||
.validate(), 1000, 60000);
|
.validate(), 1000, 60000);
|
||||||
|
|
||||||
// As Ceil(0.9 * 2) is 2, and from second pipeline no datanodes's are
|
// As Ceil(0.9 * 2) is 2, and from second pipeline no datanodes's are
|
||||||
// reported this rule is not met yet.
|
// reported this rule is not met yet.
|
||||||
GenericTestUtils.waitFor(() ->
|
GenericTestUtils.waitFor(() ->
|
||||||
!scmChillModeManager.getOneReplicaPipelineChillModeRule()
|
!scmSafeModeManager.getOneReplicaPipelineSafeModeRule()
|
||||||
.validate(), 1000, 60000);
|
.validate(), 1000, 60000);
|
||||||
|
|
||||||
Assert.assertTrue(cluster.getStorageContainerManager().isInChillMode());
|
Assert.assertTrue(cluster.getStorageContainerManager().isInSafeMode());
|
||||||
|
|
||||||
DatanodeDetails restartedDatanode = pipelineList.get(1).getFirstNode();
|
DatanodeDetails restartedDatanode = pipelineList.get(1).getFirstNode();
|
||||||
// Now restart one datanode from the 2nd pipeline
|
// Now restart one datanode from the 2nd pipeline
|
||||||
@ -135,13 +135,13 @@ public void testScmChillMode() throws Exception {
|
|||||||
|
|
||||||
|
|
||||||
GenericTestUtils.waitFor(() ->
|
GenericTestUtils.waitFor(() ->
|
||||||
scmChillModeManager.getOneReplicaPipelineChillModeRule()
|
scmSafeModeManager.getOneReplicaPipelineSafeModeRule()
|
||||||
.validate(), 1000, 60000);
|
.validate(), 1000, 60000);
|
||||||
|
|
||||||
GenericTestUtils.waitFor(() -> !scmChillModeManager.getInChillMode(), 1000,
|
GenericTestUtils.waitFor(() -> !scmSafeModeManager.getInSafeMode(), 1000,
|
||||||
60000);
|
60000);
|
||||||
|
|
||||||
// As after chillmode wait time is not completed, we should have total
|
// As after safemode wait time is not completed, we should have total
|
||||||
// pipeline's as original count 6(1 node pipelines) + 2 (3 node pipeline)
|
// pipeline's as original count 6(1 node pipelines) + 2 (3 node pipeline)
|
||||||
Assert.assertEquals(totalPipelineCount,
|
Assert.assertEquals(totalPipelineCount,
|
||||||
pipelineManager.getPipelines().size());
|
pipelineManager.getPipelines().size());
|
@ -89,12 +89,12 @@ static Builder newHABuilder(OzoneConfiguration conf) {
|
|||||||
void setWaitForClusterToBeReadyTimeout(int timeoutInMs);
|
void setWaitForClusterToBeReadyTimeout(int timeoutInMs);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Waits/blocks till the cluster is out of chill mode.
|
* Waits/blocks till the cluster is out of safe mode.
|
||||||
*
|
*
|
||||||
* @throws TimeoutException TimeoutException In case of timeout
|
* @throws TimeoutException TimeoutException In case of timeout
|
||||||
* @throws InterruptedException In case of interrupt while waiting
|
* @throws InterruptedException In case of interrupt while waiting
|
||||||
*/
|
*/
|
||||||
void waitTobeOutOfChillMode() throws TimeoutException, InterruptedException;
|
void waitTobeOutOfSafeMode() throws TimeoutException, InterruptedException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns {@link StorageContainerManager} associated with this
|
* Returns {@link StorageContainerManager} associated with this
|
||||||
|
@ -159,17 +159,17 @@ public void setWaitForClusterToBeReadyTimeout(int timeoutInMs) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Waits for SCM to be out of Chill Mode. Many tests can be run iff we are out
|
* Waits for SCM to be out of Safe Mode. Many tests can be run iff we are out
|
||||||
* of Chill mode.
|
* of Safe mode.
|
||||||
*
|
*
|
||||||
* @throws TimeoutException
|
* @throws TimeoutException
|
||||||
* @throws InterruptedException
|
* @throws InterruptedException
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void waitTobeOutOfChillMode()
|
public void waitTobeOutOfSafeMode()
|
||||||
throws TimeoutException, InterruptedException {
|
throws TimeoutException, InterruptedException {
|
||||||
GenericTestUtils.waitFor(() -> {
|
GenericTestUtils.waitFor(() -> {
|
||||||
if (!scm.isInChillMode()) {
|
if (!scm.isInSafeMode()) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
LOG.info("Waiting for cluster to be ready. No datanodes found");
|
LOG.info("Waiting for cluster to be ready. No datanodes found");
|
||||||
|
@ -28,7 +28,7 @@
|
|||||||
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor;
|
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor;
|
||||||
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType;
|
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType;
|
||||||
import org.apache.hadoop.hdds.scm.container.SCMContainerManager;
|
import org.apache.hadoop.hdds.scm.container.SCMContainerManager;
|
||||||
import org.apache.hadoop.hdds.scm.chillmode.SCMChillModeManager;
|
import org.apache.hadoop.hdds.scm.safemode.SCMSafeModeManager;
|
||||||
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
|
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
|
||||||
import org.apache.hadoop.hdds.scm.events.SCMEvents;
|
import org.apache.hadoop.hdds.scm.events.SCMEvents;
|
||||||
import org.apache.hadoop.hdds.scm.exceptions.SCMException;
|
import org.apache.hadoop.hdds.scm.exceptions.SCMException;
|
||||||
@ -62,10 +62,10 @@
|
|||||||
/**
|
/**
|
||||||
* Test Ozone Manager operation in distributed handler scenario.
|
* Test Ozone Manager operation in distributed handler scenario.
|
||||||
*/
|
*/
|
||||||
public class TestScmChillMode {
|
public class TestScmSafeMode {
|
||||||
|
|
||||||
private final static Logger LOG = LoggerFactory
|
private final static Logger LOG = LoggerFactory
|
||||||
.getLogger(TestScmChillMode.class);
|
.getLogger(TestScmSafeMode.class);
|
||||||
private static MiniOzoneCluster cluster = null;
|
private static MiniOzoneCluster cluster = null;
|
||||||
private static MiniOzoneCluster.Builder builder = null;
|
private static MiniOzoneCluster.Builder builder = null;
|
||||||
private static OzoneConfiguration conf;
|
private static OzoneConfiguration conf;
|
||||||
@ -115,7 +115,7 @@ public void shutdown() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test(timeout = 300_000)
|
@Test(timeout = 300_000)
|
||||||
public void testChillModeOperations() throws Exception {
|
public void testSafeModeOperations() throws Exception {
|
||||||
// Create {numKeys} random names keys.
|
// Create {numKeys} random names keys.
|
||||||
TestStorageContainerManagerHelper helper =
|
TestStorageContainerManagerHelper helper =
|
||||||
new TestStorageContainerManagerHelper(cluster, conf);
|
new TestStorageContainerManagerHelper(cluster, conf);
|
||||||
@ -164,23 +164,23 @@ public void testChillModeOperations() throws Exception {
|
|||||||
StorageContainerManager scm;
|
StorageContainerManager scm;
|
||||||
|
|
||||||
scm = cluster.getStorageContainerManager();
|
scm = cluster.getStorageContainerManager();
|
||||||
Assert.assertTrue(scm.isInChillMode());
|
Assert.assertTrue(scm.isInSafeMode());
|
||||||
|
|
||||||
om = cluster.getOzoneManager();
|
om = cluster.getOzoneManager();
|
||||||
|
|
||||||
// As cluster is restarted with out datanodes restart
|
// As cluster is restarted with out datanodes restart
|
||||||
LambdaTestUtils.intercept(IOException.class,
|
LambdaTestUtils.intercept(IOException.class,
|
||||||
"ChillModePrecheck failed for allocateBlock",
|
"SafeModePrecheck failed for allocateBlock",
|
||||||
() -> om.openKey(keyArgs));
|
() -> om.openKey(keyArgs));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests inChillMode & forceExitChillMode api calls.
|
* Tests inSafeMode & forceExitSafeMode api calls.
|
||||||
*/
|
*/
|
||||||
@Test(timeout = 300_000)
|
@Test(timeout = 300_000)
|
||||||
public void testIsScmInChillModeAndForceExit() throws Exception {
|
public void testIsScmInSafeModeAndForceExit() throws Exception {
|
||||||
// Test 1: SCM should be out of chill mode.
|
// Test 1: SCM should be out of safe mode.
|
||||||
Assert.assertFalse(storageContainerLocationClient.inChillMode());
|
Assert.assertFalse(storageContainerLocationClient.inSafeMode());
|
||||||
cluster.stop();
|
cluster.stop();
|
||||||
// Restart the cluster with same metadata dir.
|
// Restart the cluster with same metadata dir.
|
||||||
|
|
||||||
@ -190,18 +190,18 @@ public void testIsScmInChillModeAndForceExit() throws Exception {
|
|||||||
Assert.fail("Cluster startup failed.");
|
Assert.fail("Cluster startup failed.");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test 2: Scm should be in chill mode as datanodes are not started yet.
|
// Test 2: Scm should be in safe mode as datanodes are not started yet.
|
||||||
storageContainerLocationClient = cluster
|
storageContainerLocationClient = cluster
|
||||||
.getStorageContainerLocationClient();
|
.getStorageContainerLocationClient();
|
||||||
Assert.assertTrue(storageContainerLocationClient.inChillMode());
|
Assert.assertTrue(storageContainerLocationClient.inSafeMode());
|
||||||
// Force scm out of chill mode.
|
// Force scm out of safe mode.
|
||||||
cluster.getStorageContainerManager().getClientProtocolServer()
|
cluster.getStorageContainerManager().getClientProtocolServer()
|
||||||
.forceExitChillMode();
|
.forceExitSafeMode();
|
||||||
// Test 3: SCM should be out of chill mode.
|
// Test 3: SCM should be out of safe mode.
|
||||||
GenericTestUtils.waitFor(() -> {
|
GenericTestUtils.waitFor(() -> {
|
||||||
try {
|
try {
|
||||||
return !cluster.getStorageContainerManager().getClientProtocolServer()
|
return !cluster.getStorageContainerManager().getClientProtocolServer()
|
||||||
.inChillMode();
|
.inSafeMode();
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
Assert.fail("Cluster");
|
Assert.fail("Cluster");
|
||||||
return false;
|
return false;
|
||||||
@ -211,8 +211,8 @@ public void testIsScmInChillModeAndForceExit() throws Exception {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test(timeout = 300_000)
|
@Test(timeout = 300_000)
|
||||||
public void testSCMChillMode() throws Exception {
|
public void testSCMSafeMode() throws Exception {
|
||||||
// Test1: Test chill mode when there are no containers in system.
|
// Test1: Test safe mode when there are no containers in system.
|
||||||
cluster.stop();
|
cluster.stop();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
@ -220,12 +220,12 @@ public void testSCMChillMode() throws Exception {
|
|||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
Assert.fail("Cluster startup failed.");
|
Assert.fail("Cluster startup failed.");
|
||||||
}
|
}
|
||||||
assertTrue(cluster.getStorageContainerManager().isInChillMode());
|
assertTrue(cluster.getStorageContainerManager().isInSafeMode());
|
||||||
cluster.startHddsDatanodes();
|
cluster.startHddsDatanodes();
|
||||||
cluster.waitForClusterToBeReady();
|
cluster.waitForClusterToBeReady();
|
||||||
assertFalse(cluster.getStorageContainerManager().isInChillMode());
|
assertFalse(cluster.getStorageContainerManager().isInSafeMode());
|
||||||
|
|
||||||
// Test2: Test chill mode when containers are there in system.
|
// Test2: Test safe mode when containers are there in system.
|
||||||
// Create {numKeys} random names keys.
|
// Create {numKeys} random names keys.
|
||||||
TestStorageContainerManagerHelper helper =
|
TestStorageContainerManagerHelper helper =
|
||||||
new TestStorageContainerManagerHelper(cluster, conf);
|
new TestStorageContainerManagerHelper(cluster, conf);
|
||||||
@ -254,7 +254,7 @@ public void testSCMChillMode() throws Exception {
|
|||||||
cluster.stop();
|
cluster.stop();
|
||||||
|
|
||||||
GenericTestUtils.LogCapturer logCapturer = GenericTestUtils.LogCapturer
|
GenericTestUtils.LogCapturer logCapturer = GenericTestUtils.LogCapturer
|
||||||
.captureLogs(SCMChillModeManager.getLogger());
|
.captureLogs(SCMSafeModeManager.getLogger());
|
||||||
logCapturer.clearOutput();
|
logCapturer.clearOutput();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
@ -266,8 +266,8 @@ public void testSCMChillMode() throws Exception {
|
|||||||
StorageContainerManager scm;
|
StorageContainerManager scm;
|
||||||
|
|
||||||
scm = cluster.getStorageContainerManager();
|
scm = cluster.getStorageContainerManager();
|
||||||
assertTrue(scm.isInChillMode());
|
assertTrue(scm.isInSafeMode());
|
||||||
assertFalse(logCapturer.getOutput().contains("SCM exiting chill mode."));
|
assertFalse(logCapturer.getOutput().contains("SCM exiting safe mode."));
|
||||||
assertTrue(scm.getCurrentContainerThreshold() == 0);
|
assertTrue(scm.getCurrentContainerThreshold() == 0);
|
||||||
for (HddsDatanodeService dn : cluster.getHddsDatanodes()) {
|
for (HddsDatanodeService dn : cluster.getHddsDatanodes()) {
|
||||||
dn.start(null);
|
dn.start(null);
|
||||||
@ -275,25 +275,25 @@ public void testSCMChillMode() throws Exception {
|
|||||||
GenericTestUtils
|
GenericTestUtils
|
||||||
.waitFor(() -> scm.getCurrentContainerThreshold() == 1.0, 100, 20000);
|
.waitFor(() -> scm.getCurrentContainerThreshold() == 1.0, 100, 20000);
|
||||||
|
|
||||||
double chillModeCutoff = conf
|
double safeModeCutoff = conf
|
||||||
.getDouble(HddsConfigKeys.HDDS_SCM_CHILLMODE_THRESHOLD_PCT,
|
.getDouble(HddsConfigKeys.HDDS_SCM_SAFEMODE_THRESHOLD_PCT,
|
||||||
HddsConfigKeys.HDDS_SCM_CHILLMODE_THRESHOLD_PCT_DEFAULT);
|
HddsConfigKeys.HDDS_SCM_SAFEMODE_THRESHOLD_PCT_DEFAULT);
|
||||||
assertTrue(scm.getCurrentContainerThreshold() >= chillModeCutoff);
|
assertTrue(scm.getCurrentContainerThreshold() >= safeModeCutoff);
|
||||||
assertTrue(logCapturer.getOutput().contains("SCM exiting chill mode."));
|
assertTrue(logCapturer.getOutput().contains("SCM exiting safe mode."));
|
||||||
assertFalse(scm.isInChillMode());
|
assertFalse(scm.isInSafeMode());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(timeout = 300_000)
|
@Test(timeout = 300_000)
|
||||||
public void testSCMChillModeRestrictedOp() throws Exception {
|
public void testSCMSafeModeRestrictedOp() throws Exception {
|
||||||
conf.set(OzoneConfigKeys.OZONE_METADATA_STORE_IMPL,
|
conf.set(OzoneConfigKeys.OZONE_METADATA_STORE_IMPL,
|
||||||
OzoneConfigKeys.OZONE_METADATA_STORE_IMPL_LEVELDB);
|
OzoneConfigKeys.OZONE_METADATA_STORE_IMPL_LEVELDB);
|
||||||
cluster.stop();
|
cluster.stop();
|
||||||
cluster = builder.build();
|
cluster = builder.build();
|
||||||
StorageContainerManager scm = cluster.getStorageContainerManager();
|
StorageContainerManager scm = cluster.getStorageContainerManager();
|
||||||
assertTrue(scm.isInChillMode());
|
assertTrue(scm.isInSafeMode());
|
||||||
|
|
||||||
LambdaTestUtils.intercept(SCMException.class,
|
LambdaTestUtils.intercept(SCMException.class,
|
||||||
"ChillModePrecheck failed for allocateContainer", () -> {
|
"SafeModePrecheck failed for allocateContainer", () -> {
|
||||||
scm.getClientProtocolServer()
|
scm.getClientProtocolServer()
|
||||||
.allocateContainer(ReplicationType.STAND_ALONE,
|
.allocateContainer(ReplicationType.STAND_ALONE,
|
||||||
ReplicationFactor.ONE, "");
|
ReplicationFactor.ONE, "");
|
||||||
@ -301,48 +301,48 @@ public void testSCMChillModeRestrictedOp() throws Exception {
|
|||||||
|
|
||||||
cluster.startHddsDatanodes();
|
cluster.startHddsDatanodes();
|
||||||
cluster.waitForClusterToBeReady();
|
cluster.waitForClusterToBeReady();
|
||||||
assertFalse(scm.isInChillMode());
|
assertFalse(scm.isInSafeMode());
|
||||||
|
|
||||||
TestStorageContainerManagerHelper helper =
|
TestStorageContainerManagerHelper helper =
|
||||||
new TestStorageContainerManagerHelper(cluster, conf);
|
new TestStorageContainerManagerHelper(cluster, conf);
|
||||||
helper.createKeys(10, 4096);
|
helper.createKeys(10, 4096);
|
||||||
SCMClientProtocolServer clientProtocolServer = cluster
|
SCMClientProtocolServer clientProtocolServer = cluster
|
||||||
.getStorageContainerManager().getClientProtocolServer();
|
.getStorageContainerManager().getClientProtocolServer();
|
||||||
assertFalse((scm.getClientProtocolServer()).getChillModeStatus());
|
assertFalse((scm.getClientProtocolServer()).getSafeModeStatus());
|
||||||
final List<ContainerInfo> containers = scm.getContainerManager()
|
final List<ContainerInfo> containers = scm.getContainerManager()
|
||||||
.getContainers();
|
.getContainers();
|
||||||
scm.getEventQueue().fireEvent(SCMEvents.CHILL_MODE_STATUS,
|
scm.getEventQueue().fireEvent(SCMEvents.SAFE_MODE_STATUS,
|
||||||
new SCMChillModeManager.ChillModeStatus(true));
|
new SCMSafeModeManager.SafeModeStatus(true));
|
||||||
GenericTestUtils.waitFor(() -> {
|
GenericTestUtils.waitFor(() -> {
|
||||||
return clientProtocolServer.getChillModeStatus();
|
return clientProtocolServer.getSafeModeStatus();
|
||||||
}, 50, 1000 * 30);
|
}, 50, 1000 * 30);
|
||||||
assertTrue(clientProtocolServer.getChillModeStatus());
|
assertTrue(clientProtocolServer.getSafeModeStatus());
|
||||||
|
|
||||||
LambdaTestUtils.intercept(SCMException.class,
|
LambdaTestUtils.intercept(SCMException.class,
|
||||||
"Open container " + containers.get(0).getContainerID() + " "
|
"Open container " + containers.get(0).getContainerID() + " "
|
||||||
+ "doesn't have enough replicas to service this operation in Chill"
|
+ "doesn't have enough replicas to service this operation in Safe"
|
||||||
+ " mode.", () -> clientProtocolServer
|
+ " mode.", () -> clientProtocolServer
|
||||||
.getContainerWithPipeline(containers.get(0).getContainerID()));
|
.getContainerWithPipeline(containers.get(0).getContainerID()));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(timeout = 300_000)
|
@Test(timeout = 300_000)
|
||||||
public void testSCMChillModeDisabled() throws Exception {
|
public void testSCMSafeModeDisabled() throws Exception {
|
||||||
cluster.stop();
|
cluster.stop();
|
||||||
|
|
||||||
// If chill mode is disabled, cluster should not be in chill mode even if
|
// If safe mode is disabled, cluster should not be in safe mode even if
|
||||||
// min number of datanodes are not started.
|
// min number of datanodes are not started.
|
||||||
conf.setBoolean(HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED, false);
|
conf.setBoolean(HddsConfigKeys.HDDS_SCM_SAFEMODE_ENABLED, false);
|
||||||
conf.setInt(HddsConfigKeys.HDDS_SCM_CHILLMODE_MIN_DATANODE, 3);
|
conf.setInt(HddsConfigKeys.HDDS_SCM_SAFEMODE_MIN_DATANODE, 3);
|
||||||
builder = MiniOzoneCluster.newBuilder(conf)
|
builder = MiniOzoneCluster.newBuilder(conf)
|
||||||
.setHbInterval(1000)
|
.setHbInterval(1000)
|
||||||
.setHbProcessorInterval(500)
|
.setHbProcessorInterval(500)
|
||||||
.setNumDatanodes(1);
|
.setNumDatanodes(1);
|
||||||
cluster = builder.build();
|
cluster = builder.build();
|
||||||
StorageContainerManager scm = cluster.getStorageContainerManager();
|
StorageContainerManager scm = cluster.getStorageContainerManager();
|
||||||
assertFalse(scm.isInChillMode());
|
assertFalse(scm.isInSafeMode());
|
||||||
|
|
||||||
// Even on SCM restart, cluster should be out of chill mode immediately.
|
// Even on SCM restart, cluster should be out of safe mode immediately.
|
||||||
cluster.restartStorageContainerManager(true);
|
cluster.restartStorageContainerManager(true);
|
||||||
assertFalse(scm.isInChillMode());
|
assertFalse(scm.isInSafeMode());
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -116,12 +116,12 @@ public void testSCMMXBean() throws Exception {
|
|||||||
assertEquals(stat.toJsonString(), value);
|
assertEquals(stat.toJsonString(), value);
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean inChillMode = (boolean) mbs.getAttribute(bean,
|
boolean inSafeMode = (boolean) mbs.getAttribute(bean,
|
||||||
"InChillMode");
|
"InSafeMode");
|
||||||
assertEquals(scm.isInChillMode(), inChillMode);
|
assertEquals(scm.isInSafeMode(), inSafeMode);
|
||||||
|
|
||||||
double containerThreshold = (double) mbs.getAttribute(bean,
|
double containerThreshold = (double) mbs.getAttribute(bean,
|
||||||
"ChillModeCurrentContainerThreshold");
|
"SafeModeCurrentContainerThreshold");
|
||||||
assertEquals(scm.getCurrentContainerThreshold(), containerThreshold, 0);
|
assertEquals(scm.getCurrentContainerThreshold(), containerThreshold, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -331,8 +331,8 @@ private List<OmKeyLocationInfo> allocateBlock(OmKeyInfo keyInfo,
|
|||||||
keyInfo.getFactor(), omId, excludeList);
|
keyInfo.getFactor(), omId, excludeList);
|
||||||
} catch (SCMException ex) {
|
} catch (SCMException ex) {
|
||||||
if (ex.getResult()
|
if (ex.getResult()
|
||||||
.equals(SCMException.ResultCodes.CHILL_MODE_EXCEPTION)) {
|
.equals(SCMException.ResultCodes.SAFE_MODE_EXCEPTION)) {
|
||||||
throw new OMException(ex.getMessage(), ResultCodes.SCM_IN_CHILL_MODE);
|
throw new OMException(ex.getMessage(), ResultCodes.SCM_IN_SAFE_MODE);
|
||||||
}
|
}
|
||||||
throw ex;
|
throw ex;
|
||||||
}
|
}
|
||||||
|
@ -344,7 +344,7 @@ private TransactionContext handleAllocateBlock(
|
|||||||
// If request is failed, no need to proceed further.
|
// If request is failed, no need to proceed further.
|
||||||
// Setting the exception with omResponse message and code.
|
// Setting the exception with omResponse message and code.
|
||||||
|
|
||||||
// TODO: the allocate block fails when scm is in chill mode or when scm is
|
// TODO: the allocate block fails when scm is in safe mode or when scm is
|
||||||
// down, but that error is not correctly received in OM end, once that
|
// down, but that error is not correctly received in OM end, once that
|
||||||
// is fixed, we need to see how to handle this failure case or how we
|
// is fixed, we need to see how to handle this failure case or how we
|
||||||
// need to retry or how to handle this scenario. For other errors like
|
// need to retry or how to handle this scenario. For other errors like
|
||||||
|
@ -85,7 +85,7 @@ public static void setUp() throws Exception {
|
|||||||
configurator.setScmNodeManager(nodeManager);
|
configurator.setScmNodeManager(nodeManager);
|
||||||
scm = TestUtils.getScm(conf, configurator);
|
scm = TestUtils.getScm(conf, configurator);
|
||||||
scm.start();
|
scm.start();
|
||||||
scm.exitChillMode();
|
scm.exitSafeMode();
|
||||||
scmBlockSize = (long) conf
|
scmBlockSize = (long) conf
|
||||||
.getStorageSize(OZONE_SCM_BLOCK_SIZE, OZONE_SCM_BLOCK_SIZE_DEFAULT,
|
.getStorageSize(OZONE_SCM_BLOCK_SIZE, OZONE_SCM_BLOCK_SIZE_DEFAULT,
|
||||||
StorageUnit.BYTES);
|
StorageUnit.BYTES);
|
||||||
@ -99,8 +99,8 @@ public static void setUp() throws Exception {
|
|||||||
Mockito.any(ReplicationType.class),
|
Mockito.any(ReplicationType.class),
|
||||||
Mockito.any(ReplicationFactor.class), Mockito.anyString(),
|
Mockito.any(ReplicationFactor.class), Mockito.anyString(),
|
||||||
Mockito.any(ExcludeList.class))).thenThrow(
|
Mockito.any(ExcludeList.class))).thenThrow(
|
||||||
new SCMException("ChillModePrecheck failed for allocateBlock",
|
new SCMException("SafeModePrecheck failed for allocateBlock",
|
||||||
ResultCodes.CHILL_MODE_EXCEPTION));
|
ResultCodes.SAFE_MODE_EXCEPTION));
|
||||||
createVolume(VOLUME_NAME);
|
createVolume(VOLUME_NAME);
|
||||||
createBucket(VOLUME_NAME, BUCKET_NAME);
|
createBucket(VOLUME_NAME, BUCKET_NAME);
|
||||||
}
|
}
|
||||||
@ -133,7 +133,7 @@ private static void createVolume(String volumeName) throws IOException {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void allocateBlockFailureInChillMode() throws Exception {
|
public void allocateBlockFailureInSafeMode() throws Exception {
|
||||||
KeyManager keyManager1 = new KeyManagerImpl(mockScmBlockLocationProtocol,
|
KeyManager keyManager1 = new KeyManagerImpl(mockScmBlockLocationProtocol,
|
||||||
metadataManager, conf, "om1", null);
|
metadataManager, conf, "om1", null);
|
||||||
OmKeyArgs keyArgs = createBuilder()
|
OmKeyArgs keyArgs = createBuilder()
|
||||||
@ -141,14 +141,14 @@ public void allocateBlockFailureInChillMode() throws Exception {
|
|||||||
.build();
|
.build();
|
||||||
OpenKeySession keySession = keyManager1.openKey(keyArgs);
|
OpenKeySession keySession = keyManager1.openKey(keyArgs);
|
||||||
LambdaTestUtils.intercept(OMException.class,
|
LambdaTestUtils.intercept(OMException.class,
|
||||||
"ChillModePrecheck failed for allocateBlock", () -> {
|
"SafeModePrecheck failed for allocateBlock", () -> {
|
||||||
keyManager1
|
keyManager1
|
||||||
.allocateBlock(keyArgs, keySession.getId(), new ExcludeList());
|
.allocateBlock(keyArgs, keySession.getId(), new ExcludeList());
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void openKeyFailureInChillMode() throws Exception {
|
public void openKeyFailureInSafeMode() throws Exception {
|
||||||
KeyManager keyManager1 = new KeyManagerImpl(mockScmBlockLocationProtocol,
|
KeyManager keyManager1 = new KeyManagerImpl(mockScmBlockLocationProtocol,
|
||||||
metadataManager, conf, "om1", null);
|
metadataManager, conf, "om1", null);
|
||||||
OmKeyArgs keyArgs = createBuilder()
|
OmKeyArgs keyArgs = createBuilder()
|
||||||
@ -156,7 +156,7 @@ public void openKeyFailureInChillMode() throws Exception {
|
|||||||
.setDataSize(1000)
|
.setDataSize(1000)
|
||||||
.build();
|
.build();
|
||||||
LambdaTestUtils.intercept(OMException.class,
|
LambdaTestUtils.intercept(OMException.class,
|
||||||
"ChillModePrecheck failed for allocateBlock", () -> {
|
"SafeModePrecheck failed for allocateBlock", () -> {
|
||||||
keyManager1.openKey(keyArgs);
|
keyManager1.openKey(keyArgs);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -206,8 +206,8 @@ private OMResponse createOmResponseForAllocateBlock(boolean status) {
|
|||||||
return OzoneManagerProtocolProtos.OMResponse.newBuilder().setSuccess(true)
|
return OzoneManagerProtocolProtos.OMResponse.newBuilder().setSuccess(true)
|
||||||
.setAllocateBlockResponse(resp)
|
.setAllocateBlockResponse(resp)
|
||||||
.setCmdType(OzoneManagerProtocolProtos.Type.AllocateBlock)
|
.setCmdType(OzoneManagerProtocolProtos.Type.AllocateBlock)
|
||||||
.setStatus(OzoneManagerProtocolProtos.Status.SCM_IN_CHILL_MODE)
|
.setStatus(OzoneManagerProtocolProtos.Status.SCM_IN_SAFE_MODE)
|
||||||
.setMessage("Scm in Chill mode")
|
.setMessage("Scm in Safe mode")
|
||||||
.setSuccess(status).build();
|
.setSuccess(status).build();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -251,8 +251,8 @@ public void testAllocateBlockWithFailure() throws Exception{
|
|||||||
// As the request failed, check for keyLocation and the transaction
|
// As the request failed, check for keyLocation and the transaction
|
||||||
// context error message
|
// context error message
|
||||||
Assert.assertFalse(newOmRequest.getAllocateBlockRequest().hasKeyLocation());
|
Assert.assertFalse(newOmRequest.getAllocateBlockRequest().hasKeyLocation());
|
||||||
Assert.assertEquals("Scm in Chill mode " + OMException.STATUS_CODE
|
Assert.assertEquals("Scm in Safe mode " + OMException.STATUS_CODE
|
||||||
+ OMException.ResultCodes.SCM_IN_CHILL_MODE,
|
+ OMException.ResultCodes.SCM_IN_SAFE_MODE,
|
||||||
transactionContext.getException().getMessage());
|
transactionContext.getException().getMessage());
|
||||||
Assert.assertTrue(transactionContext.getException() instanceof IOException);
|
Assert.assertTrue(transactionContext.getException() instanceof IOException);
|
||||||
|
|
||||||
|
@ -25,7 +25,7 @@
|
|||||||
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
|
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
|
||||||
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor;
|
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor;
|
||||||
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType;
|
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType;
|
||||||
import org.apache.hadoop.hdds.scm.chillmode.SCMChillModeManager;
|
import org.apache.hadoop.hdds.scm.safemode.SCMSafeModeManager;
|
||||||
import org.apache.hadoop.hdds.scm.container.common.helpers.ExcludeList;
|
import org.apache.hadoop.hdds.scm.container.common.helpers.ExcludeList;
|
||||||
import org.apache.hadoop.hdds.scm.events.SCMEvents;
|
import org.apache.hadoop.hdds.scm.events.SCMEvents;
|
||||||
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
|
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
|
||||||
@ -94,8 +94,8 @@ public static void initialize()
|
|||||||
.getPipelines(ReplicationType.RATIS, ReplicationFactor.THREE)) {
|
.getPipelines(ReplicationType.RATIS, ReplicationFactor.THREE)) {
|
||||||
pipelineManager.openPipeline(pipeline.getId());
|
pipelineManager.openPipeline(pipeline.getId());
|
||||||
}
|
}
|
||||||
scm.getEventQueue().fireEvent(SCMEvents.CHILL_MODE_STATUS,
|
scm.getEventQueue().fireEvent(SCMEvents.SAFE_MODE_STATUS,
|
||||||
new SCMChillModeManager.ChillModeStatus(false));
|
new SCMSafeModeManager.SafeModeStatus(false));
|
||||||
Thread.sleep(1000);
|
Thread.sleep(1000);
|
||||||
|
|
||||||
// prepare OM
|
// prepare OM
|
||||||
|
@ -28,7 +28,7 @@
|
|||||||
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor;
|
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor;
|
||||||
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType;
|
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType;
|
||||||
import org.apache.hadoop.hdds.scm.block.BlockManager;
|
import org.apache.hadoop.hdds.scm.block.BlockManager;
|
||||||
import org.apache.hadoop.hdds.scm.chillmode.SCMChillModeManager;
|
import org.apache.hadoop.hdds.scm.safemode.SCMSafeModeManager;
|
||||||
import org.apache.hadoop.hdds.scm.container.common.helpers.ExcludeList;
|
import org.apache.hadoop.hdds.scm.container.common.helpers.ExcludeList;
|
||||||
import org.apache.hadoop.hdds.scm.events.SCMEvents;
|
import org.apache.hadoop.hdds.scm.events.SCMEvents;
|
||||||
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
|
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
|
||||||
@ -85,8 +85,8 @@ public static void initialize()
|
|||||||
.getPipelines(ReplicationType.RATIS, ReplicationFactor.THREE)) {
|
.getPipelines(ReplicationType.RATIS, ReplicationFactor.THREE)) {
|
||||||
pipelineManager.openPipeline(pipeline.getId());
|
pipelineManager.openPipeline(pipeline.getId());
|
||||||
}
|
}
|
||||||
scm.getEventQueue().fireEvent(SCMEvents.CHILL_MODE_STATUS,
|
scm.getEventQueue().fireEvent(SCMEvents.SAFE_MODE_STATUS,
|
||||||
new SCMChillModeManager.ChillModeStatus(false));
|
new SCMSafeModeManager.SafeModeStatus(false));
|
||||||
Thread.sleep(1000);
|
Thread.sleep(1000);
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
|
Loading…
Reference in New Issue
Block a user