HDDS-1783 : Latency metric for applyTransaction in ContainerStateMachine (#1363).

This commit is contained in:
avijayanhwx 2019-09-03 02:48:50 -07:00 committed by bshashikant
parent 5ff76cb8bc
commit b53d19a343
4 changed files with 43 additions and 4 deletions

View File

@ -60,6 +60,9 @@ public class CSMMetrics {
private @Metric MutableCounterLong numStartTransactionVerifyFailures;
private @Metric MutableCounterLong numContainerNotOpenVerifyFailures;
private @Metric MutableRate applyTransaction;
private @Metric MutableRate writeStateMachineData;
public CSMMetrics() {
int numCmdTypes = ContainerProtos.Type.values().length;
this.opsLatency = new MutableRate[numCmdTypes];
@ -186,6 +189,10 @@ public long getNumBytesCommittedCount() {
return numBytesCommittedCount.value();
}
public MutableRate getApplyTransactionLatency() {
return applyTransaction;
}
public void incPipelineLatency(ContainerProtos.Type type, long latencyNanos) {
opsLatency[type.ordinal()].add(latencyNanos);
transactionLatency.add(latencyNanos);
@ -199,6 +206,13 @@ public void incNumContainerNotOpenVerifyFailures() {
numContainerNotOpenVerifyFailures.incr();
}
public void recordApplyTransactionCompletion(long latencyNanos) {
applyTransaction.add(latencyNanos);
}
public void recordWriteStateMachineCompletion(long latencyNanos) {
writeStateMachineData.add(latencyNanos);
}
public void unRegister() {
MetricsSystem ms = DefaultMetricsSystem.instance();

View File

@ -411,7 +411,8 @@ private ExecutorService getCommandExecutor(
}
private CompletableFuture<Message> handleWriteChunk(
ContainerCommandRequestProto requestProto, long entryIndex, long term) {
ContainerCommandRequestProto requestProto, long entryIndex, long term,
long startTime) {
final WriteChunkRequestProto write = requestProto.getWriteChunk();
RaftServer server = ratisServer.getServer();
Preconditions.checkState(server instanceof RaftServerProxy);
@ -461,6 +462,8 @@ private CompletableFuture<Message> handleWriteChunk(
write.getBlockID() + " logIndex " + entryIndex + " chunkName " +
write.getChunkData().getChunkName());
raftFuture.complete(r::toByteString);
metrics.recordWriteStateMachineCompletion(
Time.monotonicNowNanos() - startTime);
}
writeChunkFutureMap.remove(entryIndex);
@ -477,6 +480,7 @@ private CompletableFuture<Message> handleWriteChunk(
public CompletableFuture<Message> writeStateMachineData(LogEntryProto entry) {
try {
metrics.incNumWriteStateMachineOps();
long writeStateMachineStartTime = Time.monotonicNowNanos();
ContainerCommandRequestProto requestProto =
getContainerCommandRequestProto(
entry.getStateMachineLogEntry().getLogData());
@ -493,7 +497,7 @@ public CompletableFuture<Message> writeStateMachineData(LogEntryProto entry) {
switch (cmdType) {
case WriteChunk:
return handleWriteChunk(requestProto, entry.getIndex(),
entry.getTerm());
entry.getTerm(), writeStateMachineStartTime);
default:
throw new IllegalStateException("Cmd Type:" + cmdType
+ " should not have state machine data");
@ -673,6 +677,7 @@ public CompletableFuture<Message> applyTransaction(TransactionContext trx) {
.setTerm(trx.getLogEntry().getTerm())
.setLogIndex(index);
long applyTxnStartTime = Time.monotonicNowNanos();
try {
applyTransactionSemaphore.acquire();
metrics.incNumApplyTransactionsOps();
@ -740,7 +745,11 @@ public CompletableFuture<Message> applyTransaction(TransactionContext trx) {
}
}
return applyTransactionFuture;
}).whenComplete((r, t) -> applyTransactionSemaphore.release());
}).whenComplete((r, t) -> {
applyTransactionSemaphore.release();
metrics.recordApplyTransactionCompletion(
Time.monotonicNowNanos() - applyTxnStartTime);
});
return applyTransactionFuture;
} catch (IOException | InterruptedException e) {
metrics.incNumApplyTransactionsFails();

View File

@ -47,7 +47,7 @@ HDFS-SITE.XML_dfs.datanode.address=0.0.0.0:1019
HDFS-SITE.XML_dfs.datanode.http.address=0.0.0.0:1012
CORE-SITE.XML_dfs.data.transfer.protection=authentication
CORE-SITE.XML_hadoop.security.authentication=kerberos
COER-SITE.XML_hadoop.security.auth_to_local=RULE:[2:$1@$0](.*@EXAMPLE.COM)s/@.*///L
CORE-SITE.XML_hadoop.security.auth_to_local=RULE:[2:$1@$0](.*@EXAMPLE.COM)s/@.*///L
CORE-SITE.XML_hadoop.security.key.provider.path=kms://http@kms:9600/kms
#temporary disable authorization as org.apache.hadoop.yarn.server.api.ResourceTrackerPB is not properly annotated to support it

View File

@ -19,6 +19,7 @@
package org.apache.hadoop.ozone.container.common.transport.server.ratis;
import static org.apache.hadoop.test.MetricsAsserts.assertCounter;
import static org.apache.hadoop.test.MetricsAsserts.getDoubleGauge;
import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
import java.io.File;
@ -49,6 +50,8 @@
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import static org.apache.ratis.rpc.SupportedRpcType.GRPC;
import static org.junit.Assert.assertTrue;
import org.apache.ratis.protocol.RaftGroupId;
import org.apache.ratis.util.function.CheckedBiConsumer;
@ -118,6 +121,12 @@ static void runContainerStateMachineMetrics(
assertCounter("NumStartTransactionVerifyFailures", 0L, metric);
assertCounter("NumContainerNotOpenVerifyFailures", 0L, metric);
assertCounter("WriteChunkNumOps", 0L, metric);
double applyTransactionLatency = getDoubleGauge(
"ApplyTransactionAvgTime", metric);
assertTrue(applyTransactionLatency == 0.0);
double writeStateMachineLatency = getDoubleGauge(
"WriteStateMachineDataAvgTime", metric);
assertTrue(writeStateMachineLatency == 0.0);
// Write Chunk
BlockID blockID = ContainerTestHelper.getTestBlockID(ContainerTestHelper.
@ -152,6 +161,13 @@ static void runContainerStateMachineMetrics(
RaftGroupId.valueOf(pipeline.getId().getId()).toString());
assertCounter("NumQueryStateMachineOps", 1L, metric);
assertCounter("NumApplyTransactionOps", 1L, metric);
applyTransactionLatency = getDoubleGauge(
"ApplyTransactionAvgTime", metric);
assertTrue(applyTransactionLatency > 0.0);
writeStateMachineLatency = getDoubleGauge(
"WriteStateMachineDataAvgTime", metric);
assertTrue(writeStateMachineLatency > 0.0);
} finally {
if (client != null) {
client.close();