HDFS-16707. RBF: Expose RouterRpcFairnessPolicyController related request record metrics for each nameservice to Prometheus (#4665). Contributed by Jiale Qi.

Reviewed-by: Inigo Goiri <inigoiri@apache.org>
Signed-off-by: Ayush Saxena <ayushsaxena@apache.org>
This commit is contained in:
SevenAddSix 2023-05-01 23:52:26 +08:00 committed by GitHub
parent d75c6d9d57
commit 1079890ae3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 66 additions and 7 deletions

View File

@ -186,6 +186,25 @@ public class FederationRPCPerformanceMonitor implements RouterRpcMonitor {
} }
} }
@Override
public void proxyOpPermitRejected(String nsId) {
if (metrics != null) {
metrics.incrProxyOpPermitRejected();
}
if (nameserviceRPCMetricsMap != null &&
nameserviceRPCMetricsMap.containsKey(nsId)) {
nameserviceRPCMetricsMap.get(nsId).incrProxyOpPermitRejected();
}
}
@Override
public void proxyOpPermitAccepted(String nsId) {
if (nameserviceRPCMetricsMap != null &&
nameserviceRPCMetricsMap.containsKey(nsId)) {
nameserviceRPCMetricsMap.get(nsId).incrProxyOpPermitAccepted();
}
}
@Override @Override
public void proxyOpFailureClientOverloaded() { public void proxyOpFailureClientOverloaded() {
if (metrics != null) { if (metrics != null) {

View File

@ -37,4 +37,7 @@ public interface NameserviceRPCMBean {
long getProxyOpNoNamenodes(); long getProxyOpNoNamenodes();
long getProxyOpPermitRejected();
long getProxyOpPermitAccepted();
} }

View File

@ -22,6 +22,7 @@ import org.apache.hadoop.metrics2.MetricsSystem;
import org.apache.hadoop.metrics2.annotation.Metric; import org.apache.hadoop.metrics2.annotation.Metric;
import org.apache.hadoop.metrics2.annotation.Metrics; import org.apache.hadoop.metrics2.annotation.Metrics;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.metrics2.lib.MetricsRegistry;
import org.apache.hadoop.metrics2.lib.MutableCounterLong; import org.apache.hadoop.metrics2.lib.MutableCounterLong;
import org.apache.hadoop.metrics2.lib.MutableRate; import org.apache.hadoop.metrics2.lib.MutableRate;
@ -37,6 +38,7 @@ public class NameserviceRPCMetrics implements NameserviceRPCMBean {
public final static String NAMESERVICE_RPC_METRICS_PREFIX = "NameserviceActivity-"; public final static String NAMESERVICE_RPC_METRICS_PREFIX = "NameserviceActivity-";
private final String nsId; private final String nsId;
private final MetricsRegistry registry = new MetricsRegistry("NameserviceRPCActivity");
@Metric("Time for the Router to proxy an operation to the Nameservice") @Metric("Time for the Router to proxy an operation to the Nameservice")
private MutableRate proxy; private MutableRate proxy;
@ -49,19 +51,24 @@ public class NameserviceRPCMetrics implements NameserviceRPCMBean {
private MutableCounterLong proxyOpFailureCommunicate; private MutableCounterLong proxyOpFailureCommunicate;
@Metric("Number of operations to hit no namenodes available") @Metric("Number of operations to hit no namenodes available")
private MutableCounterLong proxyOpNoNamenodes; private MutableCounterLong proxyOpNoNamenodes;
@Metric("Number of operations to hit permit limits")
private MutableCounterLong proxyOpPermitRejected;
@Metric("Number of operations accepted to hit a namenode")
private MutableCounterLong proxyOpPermitAccepted;
public NameserviceRPCMetrics(Configuration conf, String nsId) { public NameserviceRPCMetrics(Configuration conf, String nsId) {
this.nsId = nsId; this.nsId = NAMESERVICE_RPC_METRICS_PREFIX + nsId;
registry.tag("ns", "Nameservice", nsId);
} }
public static NameserviceRPCMetrics create(Configuration conf, public static NameserviceRPCMetrics create(Configuration conf,
String nameService) { String nameService) {
MetricsSystem ms = DefaultMetricsSystem.instance(); MetricsSystem ms = DefaultMetricsSystem.instance();
String name = NAMESERVICE_RPC_METRICS_PREFIX + (nameService.isEmpty() String nsId = (nameService.isEmpty() ?
? "UndefinedNameService"+ ThreadLocalRandom.current().nextInt() "UndefinedNameService" + ThreadLocalRandom.current().nextInt() :
: nameService); nameService);
return ms.register(name, "HDFS Federation NameService RPC Metrics", return ms.register(NAMESERVICE_RPC_METRICS_PREFIX + nsId,
new NameserviceRPCMetrics(conf, name)); "HDFS Federation NameService RPC Metrics", new NameserviceRPCMetrics(conf, nsId));
} }
public void incrProxyOpFailureStandby() { public void incrProxyOpFailureStandby() {
@ -91,6 +98,23 @@ public class NameserviceRPCMetrics implements NameserviceRPCMBean {
return proxyOpNoNamenodes.value(); return proxyOpNoNamenodes.value();
} }
public void incrProxyOpPermitRejected() {
proxyOpPermitRejected.incr();
}
@Override
public long getProxyOpPermitRejected() {
return proxyOpPermitRejected.value();
}
public void incrProxyOpPermitAccepted() {
proxyOpPermitAccepted.incr();
}
@Override
public long getProxyOpPermitAccepted() {
return proxyOpPermitAccepted.value();
}
/** /**
* Add the time to proxy an operation from the moment the Router sends it to * Add the time to proxy an operation from the moment the Router sends it to

View File

@ -1629,7 +1629,7 @@ public class RouterRpcClient {
// Throw StandByException, // Throw StandByException,
// Clients could fail over and try another router. // Clients could fail over and try another router.
if (rpcMonitor != null) { if (rpcMonitor != null) {
rpcMonitor.getRPCMetrics().incrProxyOpPermitRejected(); rpcMonitor.proxyOpPermitRejected(nsId);
} }
incrRejectedPermitForNs(nsId); incrRejectedPermitForNs(nsId);
LOG.debug("Permit denied for ugi: {} for method: {}", LOG.debug("Permit denied for ugi: {} for method: {}",
@ -1639,6 +1639,9 @@ public class RouterRpcClient {
" is overloaded for NS: " + nsId; " is overloaded for NS: " + nsId;
throw new StandbyException(msg); throw new StandbyException(msg);
} }
if (rpcMonitor != null) {
rpcMonitor.proxyOpPermitAccepted(nsId);
}
incrAcceptedPermitForNs(nsId); incrAcceptedPermitForNs(nsId);
} }
} }

View File

@ -77,6 +77,16 @@ public interface RouterRpcMonitor {
*/ */
void proxyOpFailureCommunicate(String nsId); void proxyOpFailureCommunicate(String nsId);
/**
* Rejected to proxy an operation to a Namenode.
*/
void proxyOpPermitRejected(String nsId);
/**
* Accepted to proxy an operation to a Namenode.
*/
void proxyOpPermitAccepted(String nsId);
/** /**
* Failed to proxy an operation to a Namenode because the client was * Failed to proxy an operation to a Namenode because the client was
* overloaded. * overloaded.