HDDS-1502. Add metrics for Ozone Ratis performance.Contributed by Shashikant Banerjee(#833).
This commit is contained in:
parent
2b303e9d5f
commit
18c1eebc08
@ -37,13 +37,18 @@ public class CSMMetrics {
|
||||
|
||||
// ratis op metrics metrics
|
||||
private @Metric MutableCounterLong numWriteStateMachineOps;
|
||||
private @Metric MutableCounterLong numReadStateMachineOps;
|
||||
private @Metric MutableCounterLong numQueryStateMachineOps;
|
||||
private @Metric MutableCounterLong numApplyTransactionOps;
|
||||
private @Metric MutableCounterLong numReadStateMachineOps;
|
||||
private @Metric MutableCounterLong numBytesWrittenCount;
|
||||
private @Metric MutableCounterLong numBytesCommittedCount;
|
||||
|
||||
// Failure Metrics
|
||||
private @Metric MutableCounterLong numWriteStateMachineFails;
|
||||
private @Metric MutableCounterLong numReadStateMachineFails;
|
||||
private @Metric MutableCounterLong numQueryStateMachineFails;
|
||||
private @Metric MutableCounterLong numApplyTransactionFails;
|
||||
private @Metric MutableCounterLong numReadStateMachineFails;
|
||||
private @Metric MutableCounterLong numReadStateMachineMissCount;
|
||||
|
||||
public CSMMetrics() {
|
||||
}
|
||||
@ -59,6 +64,10 @@ public void incNumWriteStateMachineOps() {
|
||||
numWriteStateMachineOps.incr();
|
||||
}
|
||||
|
||||
public void incNumQueryStateMachineOps() {
|
||||
numQueryStateMachineOps.incr();
|
||||
}
|
||||
|
||||
public void incNumReadStateMachineOps() {
|
||||
numReadStateMachineOps.incr();
|
||||
}
|
||||
@ -71,10 +80,26 @@ public void incNumWriteStateMachineFails() {
|
||||
numWriteStateMachineFails.incr();
|
||||
}
|
||||
|
||||
public void incNumQueryStateMachineFails() {
|
||||
numQueryStateMachineFails.incr();
|
||||
}
|
||||
|
||||
public void incNumBytesWrittenCount(long value) {
|
||||
numBytesWrittenCount.incr(value);
|
||||
}
|
||||
|
||||
public void incNumBytesCommittedCount(long value) {
|
||||
numBytesCommittedCount.incr(value);
|
||||
}
|
||||
|
||||
public void incNumReadStateMachineFails() {
|
||||
numReadStateMachineFails.incr();
|
||||
}
|
||||
|
||||
public void incNumReadStateMachineMissCount() {
|
||||
numReadStateMachineMissCount.incr();
|
||||
}
|
||||
|
||||
public void incNumApplyTransactionsFails() {
|
||||
numApplyTransactionFails.incr();
|
||||
}
|
||||
@ -85,8 +110,8 @@ public long getNumWriteStateMachineOps() {
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
public long getNumReadStateMachineOps() {
|
||||
return numReadStateMachineOps.value();
|
||||
public long getNumQueryStateMachineOps() {
|
||||
return numQueryStateMachineOps.value();
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
@ -100,8 +125,8 @@ public long getNumWriteStateMachineFails() {
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
public long getNumReadStateMachineFails() {
|
||||
return numReadStateMachineFails.value();
|
||||
public long getNumQueryStateMachineFails() {
|
||||
return numQueryStateMachineFails.value();
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
@ -109,6 +134,27 @@ public long getNumApplyTransactionsFails() {
|
||||
return numApplyTransactionFails.value();
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
public long getNumReadStateMachineFails() {
|
||||
return numReadStateMachineFails.value();
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
public long getNumReadStateMachineMissCount() {
|
||||
return numReadStateMachineMissCount.value();
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
public long getNumBytesWrittenCount() {
|
||||
return numBytesWrittenCount.value();
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
public long getNumBytesCommittedCount() {
|
||||
return numBytesCommittedCount.value();
|
||||
}
|
||||
|
||||
|
||||
public void unRegister() {
|
||||
MetricsSystem ms = DefaultMetricsSystem.instance();
|
||||
ms.unregisterSource(SOURCE_NAME);
|
||||
|
@ -391,6 +391,8 @@ private CompletableFuture<Message> handleWriteChunk(
|
||||
// Remove the future once it finishes execution from the
|
||||
// writeChunkFutureMap.
|
||||
writeChunkFuture.thenApply(r -> {
|
||||
metrics.incNumBytesWrittenCount(
|
||||
requestProto.getWriteChunk().getChunkData().getLen());
|
||||
writeChunkFutureMap.remove(entryIndex);
|
||||
LOG.debug("writeChunk writeStateMachineData completed: blockId " + write
|
||||
.getBlockID() + " logIndex " + entryIndex + " chunkName " + write
|
||||
@ -438,12 +440,12 @@ public CompletableFuture<Message> writeStateMachineData(LogEntryProto entry) {
|
||||
@Override
|
||||
public CompletableFuture<Message> query(Message request) {
|
||||
try {
|
||||
metrics.incNumReadStateMachineOps();
|
||||
metrics.incNumQueryStateMachineOps();
|
||||
final ContainerCommandRequestProto requestProto =
|
||||
getContainerCommandRequestProto(request.getContent());
|
||||
return CompletableFuture.completedFuture(runCommand(requestProto, null));
|
||||
} catch (IOException e) {
|
||||
metrics.incNumReadStateMachineFails();
|
||||
metrics.incNumQueryStateMachineFails();
|
||||
return completeExceptionally(e);
|
||||
}
|
||||
}
|
||||
@ -520,10 +522,14 @@ public CompletableFuture<Void> flushStateMachineData(long index) {
|
||||
public CompletableFuture<ByteString> readStateMachineData(
|
||||
LogEntryProto entry) {
|
||||
StateMachineLogEntryProto smLogEntryProto = entry.getStateMachineLogEntry();
|
||||
metrics.incNumReadStateMachineOps();
|
||||
if (!getStateMachineData(smLogEntryProto).isEmpty()) {
|
||||
return CompletableFuture.completedFuture(ByteString.EMPTY);
|
||||
}
|
||||
try {
|
||||
// the stateMachine data is not present in the stateMachine cache,
|
||||
// increment the stateMachine cache miss count
|
||||
metrics.incNumReadStateMachineMissCount();
|
||||
final ContainerCommandRequestProto requestProto =
|
||||
getContainerCommandRequestProto(
|
||||
entry.getStateMachineLogEntry().getLogData());
|
||||
@ -537,6 +543,7 @@ public CompletableFuture<ByteString> readStateMachineData(
|
||||
getCachedStateMachineData(entry.getIndex(), entry.getTerm(),
|
||||
requestProto));
|
||||
} catch (ExecutionException e) {
|
||||
metrics.incNumReadStateMachineFails();
|
||||
future.completeExceptionally(e);
|
||||
}
|
||||
return future;
|
||||
@ -547,6 +554,7 @@ public CompletableFuture<ByteString> readStateMachineData(
|
||||
+ " cannot have state machine data");
|
||||
}
|
||||
} catch (Exception e) {
|
||||
metrics.incNumReadStateMachineFails();
|
||||
LOG.error("unable to read stateMachineData:" + e);
|
||||
return completeExceptionally(e);
|
||||
}
|
||||
@ -618,6 +626,10 @@ public CompletableFuture<Message> applyTransaction(TransactionContext trx) {
|
||||
applyTransactionCompletionMap
|
||||
.put(index, trx.getLogEntry().getTerm());
|
||||
Preconditions.checkState(previous == null);
|
||||
if (cmdType == Type.WriteChunk || cmdType == Type.PutSmallFile) {
|
||||
metrics.incNumBytesCommittedCount(
|
||||
requestProto.getWriteChunk().getChunkData().getLen());
|
||||
}
|
||||
updateLastApplied();
|
||||
});
|
||||
return future;
|
||||
|
@ -14,8 +14,10 @@
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
= GenericTestUtils.getTestDir("dfs").getAbsolutePath() + File.separator;
|
||||
|
||||
*/
|
||||
package org.apache.hadoop.ozone.container.common.transport.server.ratis;
|
||||
package org.apache.hadoop.ozone.container.common.transport.server.ratis;
|
||||
|
||||
import static org.apache.hadoop.test.MetricsAsserts.assertCounter;
|
||||
import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
|
||||
@ -29,9 +31,9 @@
|
||||
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
|
||||
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
|
||||
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos
|
||||
.ContainerCommandRequestProto;
|
||||
.ContainerCommandRequestProto;
|
||||
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos
|
||||
.ContainerCommandResponseProto;
|
||||
.ContainerCommandResponseProto;
|
||||
import org.apache.hadoop.hdds.scm.*;
|
||||
import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException;
|
||||
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
|
||||
@ -42,7 +44,7 @@
|
||||
import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher;
|
||||
import org.apache.hadoop.ozone.container.common.interfaces.Handler;
|
||||
import org.apache.hadoop.ozone.container.common.transport.server
|
||||
.XceiverServerSpi;
|
||||
.XceiverServerSpi;
|
||||
import org.apache.hadoop.ozone.web.utils.OzoneUtils;
|
||||
import org.apache.hadoop.test.GenericTestUtils;
|
||||
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
|
||||
@ -57,13 +59,11 @@
|
||||
import org.junit.Test;
|
||||
import org.junit.Assert;
|
||||
|
||||
/**
|
||||
* This class tests the metrics of ContainerStateMachine.
|
||||
*/
|
||||
public class TestCSMMetrics {
|
||||
static final String TEST_DIR
|
||||
= GenericTestUtils.getTestDir("dfs").getAbsolutePath() + File.separator;
|
||||
|
||||
/**
|
||||
* This class tests the metrics of ContainerStateMachine.
|
||||
*/
|
||||
public class TestCSMMetrics {
|
||||
static final String TEST_DIR
|
||||
@FunctionalInterface
|
||||
interface CheckedBiFunction<LEFT, RIGHT, OUT, THROWABLE extends Throwable> {
|
||||
OUT apply(LEFT left, RIGHT right) throws THROWABLE;
|
||||
@ -112,6 +112,8 @@ static void runContainerStateMachineMetrics(
|
||||
assertCounter("NumWriteStateMachineOps", 0L, metric);
|
||||
assertCounter("NumReadStateMachineOps", 0L, metric);
|
||||
assertCounter("NumApplyTransactionOps", 0L, metric);
|
||||
assertCounter("NumBytesWrittenCount", 0L, metric);
|
||||
assertCounter("NumBytesCommittedCount", 0L, metric);
|
||||
|
||||
// Write Chunk
|
||||
BlockID blockID = ContainerTestHelper.getTestBlockID(ContainerTestHelper.
|
||||
@ -127,7 +129,9 @@ static void runContainerStateMachineMetrics(
|
||||
metric = getMetrics(CSMMetrics.SOURCE_NAME +
|
||||
RaftGroupId.valueOf(pipeline.getId().getId()).toString());
|
||||
assertCounter("NumWriteStateMachineOps", 1L, metric);
|
||||
assertCounter("NumBytesWrittenCount", 1024L, metric);
|
||||
assertCounter("NumApplyTransactionOps", 1L, metric);
|
||||
assertCounter("NumBytesCommittedCount", 1024L, metric);
|
||||
|
||||
//Read Chunk
|
||||
ContainerProtos.ContainerCommandRequestProto readChunkRequest =
|
||||
@ -139,7 +143,7 @@ static void runContainerStateMachineMetrics(
|
||||
|
||||
metric = getMetrics(CSMMetrics.SOURCE_NAME +
|
||||
RaftGroupId.valueOf(pipeline.getId().getId()).toString());
|
||||
assertCounter("NumReadStateMachineOps", 1L, metric);
|
||||
assertCounter("NumQueryStateMachineOps", 1L, metric);
|
||||
assertCounter("NumApplyTransactionOps", 1L, metric);
|
||||
} finally {
|
||||
if (client != null) {
|
||||
|
Loading…
Reference in New Issue
Block a user