HADOOP-18840. Add enQueue time to RpcMetrics (#5926). Contributed by Liangjun He.

Reviewed-by: Shilun Fan <slfan1989@apache.org>
Reviewed-by: Xing Lin <linxingnku@gmail.com>
Signed-off-by: He Xiaoqiao <hexiaoqiao@apache.org>
This commit is contained in:
Liangjun He 2023-08-10 10:38:48 +08:00 committed by GitHub
parent df0381f236
commit b6edcb9a84
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 37 additions and 1 deletions

View File

@ -616,6 +616,9 @@ void updateMetrics(Call call, long processingStartTimeNanos, boolean connDropped
deltaNanos -= details.get(Timing.RESPONSE); deltaNanos -= details.get(Timing.RESPONSE);
details.set(Timing.HANDLER, deltaNanos); details.set(Timing.HANDLER, deltaNanos);
long enQueueTime = details.get(Timing.ENQUEUE, rpcMetrics.getMetricsTimeUnit());
rpcMetrics.addRpcEnQueueTime(enQueueTime);
long queueTime = details.get(Timing.QUEUE, rpcMetrics.getMetricsTimeUnit()); long queueTime = details.get(Timing.QUEUE, rpcMetrics.getMetricsTimeUnit());
rpcMetrics.addRpcQueueTime(queueTime); rpcMetrics.addRpcQueueTime(queueTime);

View File

@ -69,6 +69,8 @@ public class RpcMetrics {
CommonConfigurationKeys.RPC_METRICS_QUANTILE_ENABLE_DEFAULT); CommonConfigurationKeys.RPC_METRICS_QUANTILE_ENABLE_DEFAULT);
metricsTimeUnit = getMetricsTimeUnit(conf); metricsTimeUnit = getMetricsTimeUnit(conf);
if (rpcQuantileEnable) { if (rpcQuantileEnable) {
rpcEnQueueTimeQuantiles =
new MutableQuantiles[intervals.length];
rpcQueueTimeQuantiles = rpcQueueTimeQuantiles =
new MutableQuantiles[intervals.length]; new MutableQuantiles[intervals.length];
rpcLockWaitTimeQuantiles = rpcLockWaitTimeQuantiles =
@ -81,6 +83,9 @@ public class RpcMetrics {
new MutableQuantiles[intervals.length]; new MutableQuantiles[intervals.length];
for (int i = 0; i < intervals.length; i++) { for (int i = 0; i < intervals.length; i++) {
int interval = intervals[i]; int interval = intervals[i];
rpcEnQueueTimeQuantiles[i] = registry.newQuantiles("rpcEnQueueTime"
+ interval + "s", "rpc enqueue time in " + metricsTimeUnit, "ops",
"latency", interval);
rpcQueueTimeQuantiles[i] = registry.newQuantiles("rpcQueueTime" rpcQueueTimeQuantiles[i] = registry.newQuantiles("rpcQueueTime"
+ interval + "s", "rpc queue time in " + metricsTimeUnit, "ops", + interval + "s", "rpc queue time in " + metricsTimeUnit, "ops",
"latency", interval); "latency", interval);
@ -114,6 +119,8 @@ public static RpcMetrics create(Server server, Configuration conf) {
@Metric("Number of received bytes") MutableCounterLong receivedBytes; @Metric("Number of received bytes") MutableCounterLong receivedBytes;
@Metric("Number of sent bytes") MutableCounterLong sentBytes; @Metric("Number of sent bytes") MutableCounterLong sentBytes;
@Metric("EQueue time") MutableRate rpcEnQueueTime;
MutableQuantiles[] rpcEnQueueTimeQuantiles;
@Metric("Queue time") MutableRate rpcQueueTime; @Metric("Queue time") MutableRate rpcQueueTime;
MutableQuantiles[] rpcQueueTimeQuantiles; MutableQuantiles[] rpcQueueTimeQuantiles;
@Metric("Lock wait time") MutableRate rpcLockWaitTime; @Metric("Lock wait time") MutableRate rpcLockWaitTime;
@ -257,6 +264,23 @@ public void incrReceivedBytes(int count) {
receivedBytes.incr(count); receivedBytes.incr(count);
} }
/**
* Sometimes, the request time observed by the client is much longer than
* the queue + process time on the RPC server.Perhaps the RPC request
* 'waiting enQueue' took too long on the RPC server, so we should add
* enQueue time to RpcMetrics. See HADOOP-18840 for details.
* Add an RPC enqueue time sample
* @param enQTime the queue time
*/
public void addRpcEnQueueTime(long enQTime) {
rpcEnQueueTime.add(enQTime);
if (rpcQuantileEnable) {
for (MutableQuantiles q : rpcEnQueueTimeQuantiles) {
q.add(enQTime);
}
}
}
/** /**
* Add an RPC queue time sample * Add an RPC queue time sample
* @param qTime the queue time * @param qTime the queue time

View File

@ -1334,6 +1334,8 @@ public TestRpcService run() {
} }
MetricsRecordBuilder rpcMetrics = MetricsRecordBuilder rpcMetrics =
getMetrics(server.getRpcMetrics().name()); getMetrics(server.getRpcMetrics().name());
assertEquals("Expected correct rpc en queue count",
3000, getLongCounter("RpcEnQueueTimeNumOps", rpcMetrics));
assertEquals("Expected correct rpc queue count", assertEquals("Expected correct rpc queue count",
3000, getLongCounter("RpcQueueTimeNumOps", rpcMetrics)); 3000, getLongCounter("RpcQueueTimeNumOps", rpcMetrics));
assertEquals("Expected correct rpc processing count", assertEquals("Expected correct rpc processing count",
@ -1344,6 +1346,8 @@ public TestRpcService run() {
3000, getLongCounter("RpcResponseTimeNumOps", rpcMetrics)); 3000, getLongCounter("RpcResponseTimeNumOps", rpcMetrics));
assertEquals("Expected zero rpc lock wait time", assertEquals("Expected zero rpc lock wait time",
0, getDoubleGauge("RpcLockWaitTimeAvgTime", rpcMetrics), 0.001); 0, getDoubleGauge("RpcLockWaitTimeAvgTime", rpcMetrics), 0.001);
MetricsAsserts.assertQuantileGauges("RpcEnQueueTime" + interval + "s",
rpcMetrics);
MetricsAsserts.assertQuantileGauges("RpcQueueTime" + interval + "s", MetricsAsserts.assertQuantileGauges("RpcQueueTime" + interval + "s",
rpcMetrics); rpcMetrics);
MetricsAsserts.assertQuantileGauges("RpcProcessingTime" + interval + "s", MetricsAsserts.assertQuantileGauges("RpcProcessingTime" + interval + "s",
@ -2007,6 +2011,8 @@ public void testRpcMetricsInNanos() throws Exception {
getMetrics(server.getRpcMetrics().name()); getMetrics(server.getRpcMetrics().name());
assertEquals("Expected zero rpc lock wait time", assertEquals("Expected zero rpc lock wait time",
0, getDoubleGauge("RpcLockWaitTimeAvgTime", rpcMetrics), 0.001); 0, getDoubleGauge("RpcLockWaitTimeAvgTime", rpcMetrics), 0.001);
MetricsAsserts.assertQuantileGauges("RpcEnQueueTime" + interval + "s",
rpcMetrics);
MetricsAsserts.assertQuantileGauges("RpcQueueTime" + interval + "s", MetricsAsserts.assertQuantileGauges("RpcQueueTime" + interval + "s",
rpcMetrics); rpcMetrics);
MetricsAsserts.assertQuantileGauges("RpcProcessingTime" + interval + "s", MetricsAsserts.assertQuantileGauges("RpcProcessingTime" + interval + "s",
@ -2017,12 +2023,15 @@ public void testRpcMetricsInNanos() throws Exception {
assertGauge("RpcLockWaitTimeAvgTime", assertGauge("RpcLockWaitTimeAvgTime",
(double)(server.getRpcMetrics().getMetricsTimeUnit().convert(10L, (double)(server.getRpcMetrics().getMetricsTimeUnit().convert(10L,
TimeUnit.SECONDS)), rpcMetrics); TimeUnit.SECONDS)), rpcMetrics);
LOG.info("RpcProcessingTimeAvgTime: {} , RpcQueueTimeAvgTime: {}", LOG.info("RpcProcessingTimeAvgTime: {} , RpcEnQueueTimeAvgTime: {} , RpcQueueTimeAvgTime: {}",
getDoubleGauge("RpcProcessingTimeAvgTime", rpcMetrics), getDoubleGauge("RpcProcessingTimeAvgTime", rpcMetrics),
getDoubleGauge("RpcEnQueueTimeAvgTime", rpcMetrics),
getDoubleGauge("RpcQueueTimeAvgTime", rpcMetrics)); getDoubleGauge("RpcQueueTimeAvgTime", rpcMetrics));
assertTrue(getDoubleGauge("RpcProcessingTimeAvgTime", rpcMetrics) assertTrue(getDoubleGauge("RpcProcessingTimeAvgTime", rpcMetrics)
> 4000000D); > 4000000D);
assertTrue(getDoubleGauge("RpcEnQueueTimeAvgTime", rpcMetrics)
> 4000D);
assertTrue(getDoubleGauge("RpcQueueTimeAvgTime", rpcMetrics) assertTrue(getDoubleGauge("RpcQueueTimeAvgTime", rpcMetrics)
> 4000D); > 4000D);
} finally { } finally {