HDDS-1502. Add metrics for Ozone Ratis performance.Contributed by Shashikant Banerjee(#833).

This commit is contained in:
Shashikant Banerjee 2019-05-30 16:17:45 +05:30
parent 2b303e9d5f
commit 18c1eebc08
3 changed files with 82 additions and 20 deletions

View File

@ -37,13 +37,18 @@ public class CSMMetrics {
// ratis op metrics metrics
private @Metric MutableCounterLong numWriteStateMachineOps;
private @Metric MutableCounterLong numReadStateMachineOps;
private @Metric MutableCounterLong numQueryStateMachineOps;
private @Metric MutableCounterLong numApplyTransactionOps;
private @Metric MutableCounterLong numReadStateMachineOps;
private @Metric MutableCounterLong numBytesWrittenCount;
private @Metric MutableCounterLong numBytesCommittedCount;
// Failure Metrics
private @Metric MutableCounterLong numWriteStateMachineFails;
private @Metric MutableCounterLong numReadStateMachineFails;
private @Metric MutableCounterLong numQueryStateMachineFails;
private @Metric MutableCounterLong numApplyTransactionFails;
private @Metric MutableCounterLong numReadStateMachineFails;
private @Metric MutableCounterLong numReadStateMachineMissCount;
public CSMMetrics() {
}
@ -59,6 +64,10 @@ public void incNumWriteStateMachineOps() {
numWriteStateMachineOps.incr();
}
public void incNumQueryStateMachineOps() {
numQueryStateMachineOps.incr();
}
public void incNumReadStateMachineOps() {
numReadStateMachineOps.incr();
}
@ -71,10 +80,26 @@ public void incNumWriteStateMachineFails() {
numWriteStateMachineFails.incr();
}
public void incNumQueryStateMachineFails() {
numQueryStateMachineFails.incr();
}
public void incNumBytesWrittenCount(long value) {
numBytesWrittenCount.incr(value);
}
public void incNumBytesCommittedCount(long value) {
numBytesCommittedCount.incr(value);
}
public void incNumReadStateMachineFails() {
numReadStateMachineFails.incr();
}
public void incNumReadStateMachineMissCount() {
numReadStateMachineMissCount.incr();
}
public void incNumApplyTransactionsFails() {
numApplyTransactionFails.incr();
}
@ -85,8 +110,8 @@ public long getNumWriteStateMachineOps() {
}
@VisibleForTesting
public long getNumReadStateMachineOps() {
return numReadStateMachineOps.value();
public long getNumQueryStateMachineOps() {
return numQueryStateMachineOps.value();
}
@VisibleForTesting
@ -100,8 +125,8 @@ public long getNumWriteStateMachineFails() {
}
@VisibleForTesting
public long getNumReadStateMachineFails() {
return numReadStateMachineFails.value();
public long getNumQueryStateMachineFails() {
return numQueryStateMachineFails.value();
}
@VisibleForTesting
@ -109,6 +134,27 @@ public long getNumApplyTransactionsFails() {
return numApplyTransactionFails.value();
}
@VisibleForTesting
public long getNumReadStateMachineFails() {
return numReadStateMachineFails.value();
}
@VisibleForTesting
public long getNumReadStateMachineMissCount() {
return numReadStateMachineMissCount.value();
}
@VisibleForTesting
public long getNumBytesWrittenCount() {
return numBytesWrittenCount.value();
}
@VisibleForTesting
public long getNumBytesCommittedCount() {
return numBytesCommittedCount.value();
}
public void unRegister() {
MetricsSystem ms = DefaultMetricsSystem.instance();
ms.unregisterSource(SOURCE_NAME);

View File

@ -391,6 +391,8 @@ private CompletableFuture<Message> handleWriteChunk(
// Remove the future once it finishes execution from the
// writeChunkFutureMap.
writeChunkFuture.thenApply(r -> {
metrics.incNumBytesWrittenCount(
requestProto.getWriteChunk().getChunkData().getLen());
writeChunkFutureMap.remove(entryIndex);
LOG.debug("writeChunk writeStateMachineData completed: blockId " + write
.getBlockID() + " logIndex " + entryIndex + " chunkName " + write
@ -438,12 +440,12 @@ public CompletableFuture<Message> writeStateMachineData(LogEntryProto entry) {
@Override
public CompletableFuture<Message> query(Message request) {
try {
metrics.incNumReadStateMachineOps();
metrics.incNumQueryStateMachineOps();
final ContainerCommandRequestProto requestProto =
getContainerCommandRequestProto(request.getContent());
return CompletableFuture.completedFuture(runCommand(requestProto, null));
} catch (IOException e) {
metrics.incNumReadStateMachineFails();
metrics.incNumQueryStateMachineFails();
return completeExceptionally(e);
}
}
@ -520,10 +522,14 @@ public CompletableFuture<Void> flushStateMachineData(long index) {
public CompletableFuture<ByteString> readStateMachineData(
LogEntryProto entry) {
StateMachineLogEntryProto smLogEntryProto = entry.getStateMachineLogEntry();
metrics.incNumReadStateMachineOps();
if (!getStateMachineData(smLogEntryProto).isEmpty()) {
return CompletableFuture.completedFuture(ByteString.EMPTY);
}
try {
// the stateMachine data is not present in the stateMachine cache,
// increment the stateMachine cache miss count
metrics.incNumReadStateMachineMissCount();
final ContainerCommandRequestProto requestProto =
getContainerCommandRequestProto(
entry.getStateMachineLogEntry().getLogData());
@ -537,6 +543,7 @@ public CompletableFuture<ByteString> readStateMachineData(
getCachedStateMachineData(entry.getIndex(), entry.getTerm(),
requestProto));
} catch (ExecutionException e) {
metrics.incNumReadStateMachineFails();
future.completeExceptionally(e);
}
return future;
@ -547,6 +554,7 @@ public CompletableFuture<ByteString> readStateMachineData(
+ " cannot have state machine data");
}
} catch (Exception e) {
metrics.incNumReadStateMachineFails();
LOG.error("unable to read stateMachineData:" + e);
return completeExceptionally(e);
}
@ -618,6 +626,10 @@ public CompletableFuture<Message> applyTransaction(TransactionContext trx) {
applyTransactionCompletionMap
.put(index, trx.getLogEntry().getTerm());
Preconditions.checkState(previous == null);
if (cmdType == Type.WriteChunk || cmdType == Type.PutSmallFile) {
metrics.incNumBytesCommittedCount(
requestProto.getWriteChunk().getChunkData().getLen());
}
updateLastApplied();
});
return future;

View File

@ -14,8 +14,10 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
= GenericTestUtils.getTestDir("dfs").getAbsolutePath() + File.separator;
*/
package org.apache.hadoop.ozone.container.common.transport.server.ratis;
package org.apache.hadoop.ozone.container.common.transport.server.ratis;
import static org.apache.hadoop.test.MetricsAsserts.assertCounter;
import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
@ -29,9 +31,9 @@
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos
.ContainerCommandRequestProto;
.ContainerCommandRequestProto;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos
.ContainerCommandResponseProto;
.ContainerCommandResponseProto;
import org.apache.hadoop.hdds.scm.*;
import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException;
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
@ -42,7 +44,7 @@
import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher;
import org.apache.hadoop.ozone.container.common.interfaces.Handler;
import org.apache.hadoop.ozone.container.common.transport.server
.XceiverServerSpi;
.XceiverServerSpi;
import org.apache.hadoop.ozone.web.utils.OzoneUtils;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
@ -57,13 +59,11 @@
import org.junit.Test;
import org.junit.Assert;
/**
* This class tests the metrics of ContainerStateMachine.
*/
public class TestCSMMetrics {
static final String TEST_DIR
= GenericTestUtils.getTestDir("dfs").getAbsolutePath() + File.separator;
/**
* This class tests the metrics of ContainerStateMachine.
*/
public class TestCSMMetrics {
static final String TEST_DIR
@FunctionalInterface
interface CheckedBiFunction<LEFT, RIGHT, OUT, THROWABLE extends Throwable> {
OUT apply(LEFT left, RIGHT right) throws THROWABLE;
@ -112,6 +112,8 @@ static void runContainerStateMachineMetrics(
assertCounter("NumWriteStateMachineOps", 0L, metric);
assertCounter("NumReadStateMachineOps", 0L, metric);
assertCounter("NumApplyTransactionOps", 0L, metric);
assertCounter("NumBytesWrittenCount", 0L, metric);
assertCounter("NumBytesCommittedCount", 0L, metric);
// Write Chunk
BlockID blockID = ContainerTestHelper.getTestBlockID(ContainerTestHelper.
@ -127,7 +129,9 @@ static void runContainerStateMachineMetrics(
metric = getMetrics(CSMMetrics.SOURCE_NAME +
RaftGroupId.valueOf(pipeline.getId().getId()).toString());
assertCounter("NumWriteStateMachineOps", 1L, metric);
assertCounter("NumBytesWrittenCount", 1024L, metric);
assertCounter("NumApplyTransactionOps", 1L, metric);
assertCounter("NumBytesCommittedCount", 1024L, metric);
//Read Chunk
ContainerProtos.ContainerCommandRequestProto readChunkRequest =
@ -139,7 +143,7 @@ static void runContainerStateMachineMetrics(
metric = getMetrics(CSMMetrics.SOURCE_NAME +
RaftGroupId.valueOf(pipeline.getId().getId()).toString());
assertCounter("NumReadStateMachineOps", 1L, metric);
assertCounter("NumQueryStateMachineOps", 1L, metric);
assertCounter("NumApplyTransactionOps", 1L, metric);
} finally {
if (client != null) {