HADOOP-18840. Add enQueue time to RpcMetrics (#5926). Contributed by Liangjun He.
Reviewed-by: Shilun Fan <slfan1989@apache.org> Reviewed-by: Xing Lin <linxingnku@gmail.com> Signed-off-by: He Xiaoqiao <hexiaoqiao@apache.org>
This commit is contained in:
parent
df0381f236
commit
b6edcb9a84
@ -616,6 +616,9 @@ void updateMetrics(Call call, long processingStartTimeNanos, boolean connDropped
|
|||||||
deltaNanos -= details.get(Timing.RESPONSE);
|
deltaNanos -= details.get(Timing.RESPONSE);
|
||||||
details.set(Timing.HANDLER, deltaNanos);
|
details.set(Timing.HANDLER, deltaNanos);
|
||||||
|
|
||||||
|
long enQueueTime = details.get(Timing.ENQUEUE, rpcMetrics.getMetricsTimeUnit());
|
||||||
|
rpcMetrics.addRpcEnQueueTime(enQueueTime);
|
||||||
|
|
||||||
long queueTime = details.get(Timing.QUEUE, rpcMetrics.getMetricsTimeUnit());
|
long queueTime = details.get(Timing.QUEUE, rpcMetrics.getMetricsTimeUnit());
|
||||||
rpcMetrics.addRpcQueueTime(queueTime);
|
rpcMetrics.addRpcQueueTime(queueTime);
|
||||||
|
|
||||||
|
@ -69,6 +69,8 @@ public class RpcMetrics {
|
|||||||
CommonConfigurationKeys.RPC_METRICS_QUANTILE_ENABLE_DEFAULT);
|
CommonConfigurationKeys.RPC_METRICS_QUANTILE_ENABLE_DEFAULT);
|
||||||
metricsTimeUnit = getMetricsTimeUnit(conf);
|
metricsTimeUnit = getMetricsTimeUnit(conf);
|
||||||
if (rpcQuantileEnable) {
|
if (rpcQuantileEnable) {
|
||||||
|
rpcEnQueueTimeQuantiles =
|
||||||
|
new MutableQuantiles[intervals.length];
|
||||||
rpcQueueTimeQuantiles =
|
rpcQueueTimeQuantiles =
|
||||||
new MutableQuantiles[intervals.length];
|
new MutableQuantiles[intervals.length];
|
||||||
rpcLockWaitTimeQuantiles =
|
rpcLockWaitTimeQuantiles =
|
||||||
@ -81,6 +83,9 @@ public class RpcMetrics {
|
|||||||
new MutableQuantiles[intervals.length];
|
new MutableQuantiles[intervals.length];
|
||||||
for (int i = 0; i < intervals.length; i++) {
|
for (int i = 0; i < intervals.length; i++) {
|
||||||
int interval = intervals[i];
|
int interval = intervals[i];
|
||||||
|
rpcEnQueueTimeQuantiles[i] = registry.newQuantiles("rpcEnQueueTime"
|
||||||
|
+ interval + "s", "rpc enqueue time in " + metricsTimeUnit, "ops",
|
||||||
|
"latency", interval);
|
||||||
rpcQueueTimeQuantiles[i] = registry.newQuantiles("rpcQueueTime"
|
rpcQueueTimeQuantiles[i] = registry.newQuantiles("rpcQueueTime"
|
||||||
+ interval + "s", "rpc queue time in " + metricsTimeUnit, "ops",
|
+ interval + "s", "rpc queue time in " + metricsTimeUnit, "ops",
|
||||||
"latency", interval);
|
"latency", interval);
|
||||||
@ -114,6 +119,8 @@ public static RpcMetrics create(Server server, Configuration conf) {
|
|||||||
|
|
||||||
@Metric("Number of received bytes") MutableCounterLong receivedBytes;
|
@Metric("Number of received bytes") MutableCounterLong receivedBytes;
|
||||||
@Metric("Number of sent bytes") MutableCounterLong sentBytes;
|
@Metric("Number of sent bytes") MutableCounterLong sentBytes;
|
||||||
|
@Metric("EQueue time") MutableRate rpcEnQueueTime;
|
||||||
|
MutableQuantiles[] rpcEnQueueTimeQuantiles;
|
||||||
@Metric("Queue time") MutableRate rpcQueueTime;
|
@Metric("Queue time") MutableRate rpcQueueTime;
|
||||||
MutableQuantiles[] rpcQueueTimeQuantiles;
|
MutableQuantiles[] rpcQueueTimeQuantiles;
|
||||||
@Metric("Lock wait time") MutableRate rpcLockWaitTime;
|
@Metric("Lock wait time") MutableRate rpcLockWaitTime;
|
||||||
@ -257,6 +264,23 @@ public void incrReceivedBytes(int count) {
|
|||||||
receivedBytes.incr(count);
|
receivedBytes.incr(count);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sometimes, the request time observed by the client is much longer than
|
||||||
|
* the queue + process time on the RPC server.Perhaps the RPC request
|
||||||
|
* 'waiting enQueue' took too long on the RPC server, so we should add
|
||||||
|
* enQueue time to RpcMetrics. See HADOOP-18840 for details.
|
||||||
|
* Add an RPC enqueue time sample
|
||||||
|
* @param enQTime the queue time
|
||||||
|
*/
|
||||||
|
public void addRpcEnQueueTime(long enQTime) {
|
||||||
|
rpcEnQueueTime.add(enQTime);
|
||||||
|
if (rpcQuantileEnable) {
|
||||||
|
for (MutableQuantiles q : rpcEnQueueTimeQuantiles) {
|
||||||
|
q.add(enQTime);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Add an RPC queue time sample
|
* Add an RPC queue time sample
|
||||||
* @param qTime the queue time
|
* @param qTime the queue time
|
||||||
|
@ -1334,6 +1334,8 @@ public TestRpcService run() {
|
|||||||
}
|
}
|
||||||
MetricsRecordBuilder rpcMetrics =
|
MetricsRecordBuilder rpcMetrics =
|
||||||
getMetrics(server.getRpcMetrics().name());
|
getMetrics(server.getRpcMetrics().name());
|
||||||
|
assertEquals("Expected correct rpc en queue count",
|
||||||
|
3000, getLongCounter("RpcEnQueueTimeNumOps", rpcMetrics));
|
||||||
assertEquals("Expected correct rpc queue count",
|
assertEquals("Expected correct rpc queue count",
|
||||||
3000, getLongCounter("RpcQueueTimeNumOps", rpcMetrics));
|
3000, getLongCounter("RpcQueueTimeNumOps", rpcMetrics));
|
||||||
assertEquals("Expected correct rpc processing count",
|
assertEquals("Expected correct rpc processing count",
|
||||||
@ -1344,6 +1346,8 @@ public TestRpcService run() {
|
|||||||
3000, getLongCounter("RpcResponseTimeNumOps", rpcMetrics));
|
3000, getLongCounter("RpcResponseTimeNumOps", rpcMetrics));
|
||||||
assertEquals("Expected zero rpc lock wait time",
|
assertEquals("Expected zero rpc lock wait time",
|
||||||
0, getDoubleGauge("RpcLockWaitTimeAvgTime", rpcMetrics), 0.001);
|
0, getDoubleGauge("RpcLockWaitTimeAvgTime", rpcMetrics), 0.001);
|
||||||
|
MetricsAsserts.assertQuantileGauges("RpcEnQueueTime" + interval + "s",
|
||||||
|
rpcMetrics);
|
||||||
MetricsAsserts.assertQuantileGauges("RpcQueueTime" + interval + "s",
|
MetricsAsserts.assertQuantileGauges("RpcQueueTime" + interval + "s",
|
||||||
rpcMetrics);
|
rpcMetrics);
|
||||||
MetricsAsserts.assertQuantileGauges("RpcProcessingTime" + interval + "s",
|
MetricsAsserts.assertQuantileGauges("RpcProcessingTime" + interval + "s",
|
||||||
@ -2007,6 +2011,8 @@ public void testRpcMetricsInNanos() throws Exception {
|
|||||||
getMetrics(server.getRpcMetrics().name());
|
getMetrics(server.getRpcMetrics().name());
|
||||||
assertEquals("Expected zero rpc lock wait time",
|
assertEquals("Expected zero rpc lock wait time",
|
||||||
0, getDoubleGauge("RpcLockWaitTimeAvgTime", rpcMetrics), 0.001);
|
0, getDoubleGauge("RpcLockWaitTimeAvgTime", rpcMetrics), 0.001);
|
||||||
|
MetricsAsserts.assertQuantileGauges("RpcEnQueueTime" + interval + "s",
|
||||||
|
rpcMetrics);
|
||||||
MetricsAsserts.assertQuantileGauges("RpcQueueTime" + interval + "s",
|
MetricsAsserts.assertQuantileGauges("RpcQueueTime" + interval + "s",
|
||||||
rpcMetrics);
|
rpcMetrics);
|
||||||
MetricsAsserts.assertQuantileGauges("RpcProcessingTime" + interval + "s",
|
MetricsAsserts.assertQuantileGauges("RpcProcessingTime" + interval + "s",
|
||||||
@ -2017,12 +2023,15 @@ public void testRpcMetricsInNanos() throws Exception {
|
|||||||
assertGauge("RpcLockWaitTimeAvgTime",
|
assertGauge("RpcLockWaitTimeAvgTime",
|
||||||
(double)(server.getRpcMetrics().getMetricsTimeUnit().convert(10L,
|
(double)(server.getRpcMetrics().getMetricsTimeUnit().convert(10L,
|
||||||
TimeUnit.SECONDS)), rpcMetrics);
|
TimeUnit.SECONDS)), rpcMetrics);
|
||||||
LOG.info("RpcProcessingTimeAvgTime: {} , RpcQueueTimeAvgTime: {}",
|
LOG.info("RpcProcessingTimeAvgTime: {} , RpcEnQueueTimeAvgTime: {} , RpcQueueTimeAvgTime: {}",
|
||||||
getDoubleGauge("RpcProcessingTimeAvgTime", rpcMetrics),
|
getDoubleGauge("RpcProcessingTimeAvgTime", rpcMetrics),
|
||||||
|
getDoubleGauge("RpcEnQueueTimeAvgTime", rpcMetrics),
|
||||||
getDoubleGauge("RpcQueueTimeAvgTime", rpcMetrics));
|
getDoubleGauge("RpcQueueTimeAvgTime", rpcMetrics));
|
||||||
|
|
||||||
assertTrue(getDoubleGauge("RpcProcessingTimeAvgTime", rpcMetrics)
|
assertTrue(getDoubleGauge("RpcProcessingTimeAvgTime", rpcMetrics)
|
||||||
> 4000000D);
|
> 4000000D);
|
||||||
|
assertTrue(getDoubleGauge("RpcEnQueueTimeAvgTime", rpcMetrics)
|
||||||
|
> 4000D);
|
||||||
assertTrue(getDoubleGauge("RpcQueueTimeAvgTime", rpcMetrics)
|
assertTrue(getDoubleGauge("RpcQueueTimeAvgTime", rpcMetrics)
|
||||||
> 4000D);
|
> 4000D);
|
||||||
} finally {
|
} finally {
|
||||||
|
Loading…
Reference in New Issue
Block a user