HDFS-17290: Adds disconnected client rpc backoff metrics (#6359)
This commit is contained in:
parent
7d3b6a36b8
commit
661c784662
@ -3133,6 +3133,13 @@ private void internalQueueCall(Call call, boolean blocking)
|
||||
// For example, IPC clients using FailoverOnNetworkExceptionRetry handle
|
||||
// RetriableException.
|
||||
rpcMetrics.incrClientBackoff();
|
||||
// Clients that are directly put into lowest priority queue are backed off and disconnected.
|
||||
if (cqe.getCause() instanceof RpcServerException) {
|
||||
RpcServerException ex = (RpcServerException) cqe.getCause();
|
||||
if (ex.getRpcStatusProto() == RpcStatusProto.FATAL) {
|
||||
rpcMetrics.incrClientBackoffDisconnected();
|
||||
}
|
||||
}
|
||||
// unwrap retriable exception.
|
||||
throw cqe.getCause();
|
||||
}
|
||||
|
@ -141,6 +141,8 @@ public static RpcMetrics create(Server server, Configuration conf) {
|
||||
MutableCounterLong rpcAuthorizationSuccesses;
|
||||
@Metric("Number of client backoff requests")
|
||||
MutableCounterLong rpcClientBackoff;
|
||||
@Metric("Number of disconnected client backoff requests")
|
||||
MutableCounterLong rpcClientBackoffDisconnected;
|
||||
@Metric("Number of slow RPC calls")
|
||||
MutableCounterLong rpcSlowCalls;
|
||||
@Metric("Number of requeue calls")
|
||||
@ -342,6 +344,22 @@ public void incrClientBackoff() {
|
||||
rpcClientBackoff.incr();
|
||||
}
|
||||
|
||||
/**
|
||||
* Client was disconnected due to backoff
|
||||
*/
|
||||
public void incrClientBackoffDisconnected() {
|
||||
rpcClientBackoffDisconnected.incr();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of disconnected backoffs.
|
||||
* @return long
|
||||
*/
|
||||
public long getClientBackoffDisconnected() {
|
||||
return rpcClientBackoffDisconnected.value();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Increments the Slow RPC counter.
|
||||
*/
|
||||
|
@ -87,6 +87,7 @@ The default timeunit used for RPC metrics is milliseconds (as per the below desc
|
||||
| `RpcAuthorizationFailures` | Total number of authorization failures |
|
||||
| `RpcAuthorizationSuccesses` | Total number of authorization successes |
|
||||
| `RpcClientBackoff` | Total number of client backoff requests |
|
||||
| `RpcClientBackoffDisconnected` | Total number of client backoff requests that are disconnected. This is a subset of RpcClientBackoff |
|
||||
| `RpcSlowCalls` | Total number of slow RPC calls |
|
||||
| `RpcRequeueCalls` | Total number of requeue RPC calls |
|
||||
| `RpcCallsSuccesses` | Total number of RPC calls that are successfully processed |
|
||||
|
@ -1528,6 +1528,7 @@ public Void call() throws ServiceException, InterruptedException {
|
||||
IOException unwrapExeption = re.unwrapRemoteException();
|
||||
if (unwrapExeption instanceof RetriableException) {
|
||||
succeeded = true;
|
||||
assertEquals(1L, server.getRpcMetrics().getClientBackoffDisconnected());
|
||||
} else {
|
||||
lastException = unwrapExeption;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user