diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java index 56692afaa8..38eec61db8 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java @@ -79,6 +79,12 @@ public final class ScmConfigKeys { "dfs.container.ratis.segment.preallocated.size"; public static final int DFS_CONTAINER_RATIS_SEGMENT_PREALLOCATED_SIZE_DEFAULT = 128 * 1024 * 1024; + public static final String + DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_TIMEOUT = + "dfs.container.ratis.statemachinedata.sync.timeout"; + public static final TimeDuration + DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_TIMEOUT_DEFAULT = + TimeDuration.valueOf(10, TimeUnit.SECONDS); public static final String DFS_RATIS_CLIENT_REQUEST_TIMEOUT_DURATION_KEY = "dfs.ratis.client.request.timeout.duration"; public static final TimeDuration diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java index 3b4f017c19..54b1cf8d44 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java @@ -229,6 +229,15 @@ public final class OzoneConfigKeys { = ScmConfigKeys.DFS_CONTAINER_RATIS_SEGMENT_PREALLOCATED_SIZE_KEY; public static final int DFS_CONTAINER_RATIS_SEGMENT_PREALLOCATED_SIZE_DEFAULT = ScmConfigKeys.DFS_CONTAINER_RATIS_SEGMENT_PREALLOCATED_SIZE_DEFAULT; + + // config settings to enable stateMachineData write timeout + public static final String + DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_TIMEOUT = + ScmConfigKeys.DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_TIMEOUT; + public static final TimeDuration + DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_TIMEOUT_DEFAULT = + ScmConfigKeys.DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_TIMEOUT_DEFAULT; + public static final int DFS_CONTAINER_CHUNK_MAX_SIZE = ScmConfigKeys.OZONE_SCM_CHUNK_MAX_SIZE; public static final String DFS_CONTAINER_RATIS_DATANODE_STORAGE_DIR = diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index eb686626f4..5ff60ebc45 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -52,6 +52,13 @@ running unit tests. + + dfs.container.ratis.statemachinedata.sync.timeout + 10s + OZONE, DEBUG, CONTAINER, RATIS + Timeout for StateMachine data writes by Ratis. + + dfs.container.ratis.datanode.storage.dir diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java index d5762bcbbf..2a4a227806 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java @@ -307,9 +307,18 @@ private CompletableFuture handleWriteChunk( () -> runCommand(requestProto), chunkExecutor); } writeChunkFutureMap.put(entryIndex, writeChunkFuture); + LOG.debug("writeChunk writeStateMachineData : blockId " + write.getBlockID() + + " logIndex " + entryIndex + " chunkName " + write.getChunkData() + .getChunkName()); // Remove the future once it finishes execution from the // writeChunkFutureMap. - writeChunkFuture.thenApply(r -> writeChunkFutureMap.remove(entryIndex)); + writeChunkFuture.thenApply(r -> { + writeChunkFutureMap.remove(entryIndex); + LOG.debug("writeChunk writeStateMachineData completed: blockId " + write + .getBlockID() + " logIndex " + entryIndex + " chunkName " + write + .getChunkData().getChunkName()); + return r; + }); return writeChunkFuture; } @@ -531,7 +540,12 @@ public CompletableFuture applyTransaction(TransactionContext trx) { if (cmdType == Type.CreateContainer) { long containerID = requestProto.getContainerID(); future.thenApply( - r -> createContainerFutureMap.remove(containerID).complete(null)); + r -> { + createContainerFutureMap.remove(containerID).complete(null); + LOG.info("create Container Transaction completed for container " + + containerID + " log index " + index); + return r; + }); } future.thenAccept(m -> { diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java index 599f821b8e..a679e5eab9 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java @@ -186,6 +186,20 @@ private RaftProperties newRaftProperties(Configuration conf) { RaftClientConfigKeys.Rpc .setRequestTimeout(properties, clientRequestTimeout); + // set the configs enable and set the stateMachineData sync timeout + RaftServerConfigKeys.Log.StateMachineData.setSync(properties, true); + timeUnit = OzoneConfigKeys. + DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_TIMEOUT_DEFAULT.getUnit(); + duration = conf.getTimeDuration( + OzoneConfigKeys.DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_TIMEOUT, + OzoneConfigKeys. + DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_TIMEOUT_DEFAULT + .getDuration(), timeUnit); + final TimeDuration dataSyncTimeout = + TimeDuration.valueOf(duration, timeUnit); + RaftServerConfigKeys.Log.StateMachineData + .setSyncTimeout(properties, dataSyncTimeout); + // Set the server Request timeout timeUnit = OzoneConfigKeys.DFS_RATIS_SERVER_REQUEST_TIMEOUT_DURATION_DEFAULT .getUnit(); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/ChunkUtils.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/ChunkUtils.java index 718f5ded6e..8f9d589217 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/ChunkUtils.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/ChunkUtils.java @@ -138,6 +138,8 @@ public static void writeData(File chunkFile, ChunkInfo chunkInfo, } } } + log.debug("Write Chunk completed for chunkFile: {}, size {}", chunkFile, + data.length); } /** diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/ChunkManagerImpl.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/ChunkManagerImpl.java index c630e1990e..cdd19dff0b 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/ChunkManagerImpl.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/ChunkManagerImpl.java @@ -82,7 +82,8 @@ public void writeChunk(Container container, BlockID blockID, ChunkInfo info, chunkFile, info); File tmpChunkFile = getTmpChunkFile(chunkFile, info); - LOG.debug("writing chunk:{} chunk stage:{} chunk file:{} tmp chunk file", + LOG.debug( + "writing chunk:{} chunk stage:{} chunk file:{} tmp chunk file:{}", info.getChunkName(), stage, chunkFile, tmpChunkFile); switch (stage) {