HDFS-17290: Adds disconnected client rpc backoff metrics (#6359)
This commit is contained in:
parent
7d3b6a36b8
commit
661c784662
@ -3133,6 +3133,13 @@ private void internalQueueCall(Call call, boolean blocking)
|
|||||||
// For example, IPC clients using FailoverOnNetworkExceptionRetry handle
|
// For example, IPC clients using FailoverOnNetworkExceptionRetry handle
|
||||||
// RetriableException.
|
// RetriableException.
|
||||||
rpcMetrics.incrClientBackoff();
|
rpcMetrics.incrClientBackoff();
|
||||||
|
// Clients that are directly put into lowest priority queue are backed off and disconnected.
|
||||||
|
if (cqe.getCause() instanceof RpcServerException) {
|
||||||
|
RpcServerException ex = (RpcServerException) cqe.getCause();
|
||||||
|
if (ex.getRpcStatusProto() == RpcStatusProto.FATAL) {
|
||||||
|
rpcMetrics.incrClientBackoffDisconnected();
|
||||||
|
}
|
||||||
|
}
|
||||||
// unwrap retriable exception.
|
// unwrap retriable exception.
|
||||||
throw cqe.getCause();
|
throw cqe.getCause();
|
||||||
}
|
}
|
||||||
|
@ -141,6 +141,8 @@ public static RpcMetrics create(Server server, Configuration conf) {
|
|||||||
MutableCounterLong rpcAuthorizationSuccesses;
|
MutableCounterLong rpcAuthorizationSuccesses;
|
||||||
@Metric("Number of client backoff requests")
|
@Metric("Number of client backoff requests")
|
||||||
MutableCounterLong rpcClientBackoff;
|
MutableCounterLong rpcClientBackoff;
|
||||||
|
@Metric("Number of disconnected client backoff requests")
|
||||||
|
MutableCounterLong rpcClientBackoffDisconnected;
|
||||||
@Metric("Number of slow RPC calls")
|
@Metric("Number of slow RPC calls")
|
||||||
MutableCounterLong rpcSlowCalls;
|
MutableCounterLong rpcSlowCalls;
|
||||||
@Metric("Number of requeue calls")
|
@Metric("Number of requeue calls")
|
||||||
@ -342,6 +344,22 @@ public void incrClientBackoff() {
|
|||||||
rpcClientBackoff.incr();
|
rpcClientBackoff.incr();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Client was disconnected due to backoff
|
||||||
|
*/
|
||||||
|
public void incrClientBackoffDisconnected() {
|
||||||
|
rpcClientBackoffDisconnected.incr();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the number of disconnected backoffs.
|
||||||
|
* @return long
|
||||||
|
*/
|
||||||
|
public long getClientBackoffDisconnected() {
|
||||||
|
return rpcClientBackoffDisconnected.value();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Increments the Slow RPC counter.
|
* Increments the Slow RPC counter.
|
||||||
*/
|
*/
|
||||||
|
@ -87,6 +87,7 @@ The default timeunit used for RPC metrics is milliseconds (as per the below desc
|
|||||||
| `RpcAuthorizationFailures` | Total number of authorization failures |
|
| `RpcAuthorizationFailures` | Total number of authorization failures |
|
||||||
| `RpcAuthorizationSuccesses` | Total number of authorization successes |
|
| `RpcAuthorizationSuccesses` | Total number of authorization successes |
|
||||||
| `RpcClientBackoff` | Total number of client backoff requests |
|
| `RpcClientBackoff` | Total number of client backoff requests |
|
||||||
|
| `RpcClientBackoffDisconnected` | Total number of client backoff requests that are disconnected. This is a subset of RpcClientBackoff |
|
||||||
| `RpcSlowCalls` | Total number of slow RPC calls |
|
| `RpcSlowCalls` | Total number of slow RPC calls |
|
||||||
| `RpcRequeueCalls` | Total number of requeue RPC calls |
|
| `RpcRequeueCalls` | Total number of requeue RPC calls |
|
||||||
| `RpcCallsSuccesses` | Total number of RPC calls that are successfully processed |
|
| `RpcCallsSuccesses` | Total number of RPC calls that are successfully processed |
|
||||||
|
@ -1528,6 +1528,7 @@ public Void call() throws ServiceException, InterruptedException {
|
|||||||
IOException unwrapExeption = re.unwrapRemoteException();
|
IOException unwrapExeption = re.unwrapRemoteException();
|
||||||
if (unwrapExeption instanceof RetriableException) {
|
if (unwrapExeption instanceof RetriableException) {
|
||||||
succeeded = true;
|
succeeded = true;
|
||||||
|
assertEquals(1L, server.getRpcMetrics().getClientBackoffDisconnected());
|
||||||
} else {
|
} else {
|
||||||
lastException = unwrapExeption;
|
lastException = unwrapExeption;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user