diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java index 38eec61db8..cedcc43cbb 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java @@ -85,6 +85,14 @@ public final class ScmConfigKeys { public static final TimeDuration DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_TIMEOUT_DEFAULT = TimeDuration.valueOf(10, TimeUnit.SECONDS); + public static final String + DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_RETRIES = + "dfs.container.ratis.statemachinedata.sync.retries"; + public static final int + DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_RETRIES_DEFAULT = -1; + public static final String DFS_CONTAINER_RATIS_LOG_QUEUE_SIZE = + "dfs.container.ratis.log.queue.size"; + public static final int DFS_CONTAINER_RATIS_LOG_QUEUE_SIZE_DEFAULT = 128; public static final String DFS_RATIS_CLIENT_REQUEST_TIMEOUT_DURATION_KEY = "dfs.ratis.client.request.timeout.duration"; public static final TimeDuration diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java index 54b1cf8d44..97768173f9 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java @@ -261,6 +261,16 @@ public final class OzoneConfigKeys { public static final TimeDuration DFS_RATIS_SERVER_RETRY_CACHE_TIMEOUT_DURATION_DEFAULT = ScmConfigKeys.DFS_RATIS_SERVER_RETRY_CACHE_TIMEOUT_DURATION_DEFAULT; + public static final String + DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_RETRIES = + ScmConfigKeys.DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_RETRIES; + public static final int + DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_RETRIES_DEFAULT = + ScmConfigKeys.DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_RETRIES_DEFAULT; + public static final String DFS_CONTAINER_RATIS_LOG_QUEUE_SIZE = + ScmConfigKeys.DFS_CONTAINER_RATIS_LOG_QUEUE_SIZE; + public static final int DFS_CONTAINER_RATIS_LOG_QUEUE_SIZE_DEFAULT = + ScmConfigKeys.DFS_CONTAINER_RATIS_LOG_QUEUE_SIZE_DEFAULT; public static final String DFS_RATIS_SERVER_REQUEST_TIMEOUT_DURATION_KEY = ScmConfigKeys.DFS_RATIS_SERVER_REQUEST_TIMEOUT_DURATION_KEY; public static final TimeDuration diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index 5ff60ebc45..2ffc2abb7c 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -59,6 +59,21 @@ Timeout for StateMachine data writes by Ratis. + + dfs.container.ratis.statemachinedata.sync.retries + -1 + OZONE, DEBUG, CONTAINER, RATIS + Number of times the WriteStateMachineData op will be tried + before failing, if this value is -1, then this retries indefinitely. + + + + dfs.container.ratis.log.queue.size + 128 + OZONE, DEBUG, CONTAINER, RATIS + Number of operation pending with Raft's Log Worker. + + dfs.container.ratis.datanode.storage.dir diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java index 2a4a227806..38b789e8db 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java @@ -22,7 +22,7 @@ import org.apache.hadoop.hdds.HddsUtils; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.ozone.container.common.helpers.BlockData; -import org.apache.ratis.proto.RaftProtos.StateMachineEntryProto; +import org.apache.ratis.proto.RaftProtos.RaftPeerRole; import org.apache.ratis.protocol.RaftGroup; import org.apache.ratis.protocol.RaftGroupId; import org.apache.ratis.server.RaftServer; @@ -55,7 +55,6 @@ import org.apache.ratis.statemachine.TransactionContext; import org.apache.ratis.statemachine.impl.BaseStateMachine; import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; -import org.apache.ratis.statemachine.impl.TransactionContextImpl; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -206,7 +205,6 @@ public TransactionContext startTransaction(RaftClientRequest request) final ContainerCommandRequestProto proto = getRequestProto(request.getMessage().getContent()); Preconditions.checkArgument(request.getRaftGroupId().equals(gid)); - final StateMachineLogEntryProto log; if (proto.getCmdType() == Type.WriteChunk) { final WriteChunkRequestProto write = proto.getWriteChunk(); // create the state machine data proto @@ -236,33 +234,29 @@ public TransactionContext startTransaction(RaftClientRequest request) .setWriteChunk(commitWriteChunkProto) .build(); - log = createSMLogEntryProto(request, - commitContainerCommandProto.toByteString(), - dataContainerCommandProto.toByteString()); + return TransactionContext.newBuilder() + .setClientRequest(request) + .setStateMachine(this) + .setServerRole(RaftPeerRole.LEADER) + .setStateMachineData(dataContainerCommandProto.toByteString()) + .setLogData(commitContainerCommandProto.toByteString()) + .build(); } else if (proto.getCmdType() == Type.CreateContainer) { - log = createSMLogEntryProto(request, - request.getMessage().getContent(), request.getMessage().getContent()); + return TransactionContext.newBuilder() + .setClientRequest(request) + .setStateMachine(this) + .setServerRole(RaftPeerRole.LEADER) + .setStateMachineData(request.getMessage().getContent()) + .setLogData(request.getMessage().getContent()) + .build(); } else { - log = createSMLogEntryProto(request, request.getMessage().getContent(), - null); + return TransactionContext.newBuilder() + .setClientRequest(request) + .setStateMachine(this) + .setServerRole(RaftPeerRole.LEADER) + .setLogData(request.getMessage().getContent()) + .build(); } - return new TransactionContextImpl(this, request, log); - } - - private StateMachineLogEntryProto createSMLogEntryProto(RaftClientRequest r, - ByteString logData, ByteString smData) { - StateMachineLogEntryProto.Builder builder = - StateMachineLogEntryProto.newBuilder(); - - builder.setCallId(r.getCallId()) - .setClientId(r.getClientId().toByteString()) - .setLogData(logData); - - if (smData != null) { - builder.setStateMachineEntry(StateMachineEntryProto.newBuilder() - .setStateMachineData(smData).build()); - } - return builder.build(); } private ByteString getStateMachineData(StateMachineLogEntryProto entryProto) { diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java index a679e5eab9..7bf4da9b69 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java @@ -287,6 +287,17 @@ private RaftProperties newRaftProperties(Configuration conf) { setAutoTriggerEnabled(properties, true); RaftServerConfigKeys.Snapshot. setAutoTriggerThreshold(properties, snapshotThreshold); + int logQueueSize = + conf.getInt(OzoneConfigKeys.DFS_CONTAINER_RATIS_LOG_QUEUE_SIZE, + OzoneConfigKeys.DFS_CONTAINER_RATIS_LOG_QUEUE_SIZE_DEFAULT); + RaftServerConfigKeys.Log.setQueueSize(properties, logQueueSize); + + int numSyncRetries = conf.getInt( + OzoneConfigKeys.DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_RETRIES, + OzoneConfigKeys. + DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_RETRIES_DEFAULT); + RaftServerConfigKeys.Log.StateMachineData.setSyncTimeoutRetry(properties, + numSyncRetries); return properties; } diff --git a/hadoop-hdds/pom.xml b/hadoop-hdds/pom.xml index 090a53772b..7a1704c553 100644 --- a/hadoop-hdds/pom.xml +++ b/hadoop-hdds/pom.xml @@ -45,7 +45,7 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> 0.4.0-SNAPSHOT - 0.3.0-1d2ebee-SNAPSHOT + 0.3.0-1d07b18-SNAPSHOT 1.60 diff --git a/hadoop-ozone/pom.xml b/hadoop-ozone/pom.xml index 33af31b34d..671421ef82 100644 --- a/hadoop-ozone/pom.xml +++ b/hadoop-ozone/pom.xml @@ -33,7 +33,7 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> 3.2.1-SNAPSHOT 0.4.0-SNAPSHOT 0.4.0-SNAPSHOT - 0.3.0-1d2ebee-SNAPSHOT + 0.3.0-1d07b18-SNAPSHOT 1.60 Badlands ${ozone.version}