HDFS-16707. RBF: Expose RouterRpcFairnessPolicyController related request record metrics for each nameservice to Prometheus (#4665). Contributed by Jiale Qi.

Reviewed-by: Inigo Goiri <inigoiri@apache.org>
Signed-off-by: Ayush Saxena <ayushsaxena@apache.org>
This commit is contained in:
SevenAddSix 2023-05-01 23:52:26 +08:00 committed by GitHub
parent d75c6d9d57
commit 1079890ae3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 66 additions and 7 deletions

View File

@ -186,6 +186,25 @@ public void proxyOpFailureCommunicate(String nsId) {
}
}
@Override
public void proxyOpPermitRejected(String nsId) {
if (metrics != null) {
metrics.incrProxyOpPermitRejected();
}
if (nameserviceRPCMetricsMap != null &&
nameserviceRPCMetricsMap.containsKey(nsId)) {
nameserviceRPCMetricsMap.get(nsId).incrProxyOpPermitRejected();
}
}
@Override
public void proxyOpPermitAccepted(String nsId) {
if (nameserviceRPCMetricsMap != null &&
nameserviceRPCMetricsMap.containsKey(nsId)) {
nameserviceRPCMetricsMap.get(nsId).incrProxyOpPermitAccepted();
}
}
@Override
public void proxyOpFailureClientOverloaded() {
if (metrics != null) {

View File

@ -37,4 +37,7 @@ public interface NameserviceRPCMBean {
long getProxyOpNoNamenodes();
long getProxyOpPermitRejected();
long getProxyOpPermitAccepted();
}

View File

@ -22,6 +22,7 @@
import org.apache.hadoop.metrics2.annotation.Metric;
import org.apache.hadoop.metrics2.annotation.Metrics;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.metrics2.lib.MetricsRegistry;
import org.apache.hadoop.metrics2.lib.MutableCounterLong;
import org.apache.hadoop.metrics2.lib.MutableRate;
@ -37,6 +38,7 @@ public class NameserviceRPCMetrics implements NameserviceRPCMBean {
public final static String NAMESERVICE_RPC_METRICS_PREFIX = "NameserviceActivity-";
private final String nsId;
private final MetricsRegistry registry = new MetricsRegistry("NameserviceRPCActivity");
@Metric("Time for the Router to proxy an operation to the Nameservice")
private MutableRate proxy;
@ -49,19 +51,24 @@ public class NameserviceRPCMetrics implements NameserviceRPCMBean {
private MutableCounterLong proxyOpFailureCommunicate;
@Metric("Number of operations to hit no namenodes available")
private MutableCounterLong proxyOpNoNamenodes;
@Metric("Number of operations to hit permit limits")
private MutableCounterLong proxyOpPermitRejected;
@Metric("Number of operations accepted to hit a namenode")
private MutableCounterLong proxyOpPermitAccepted;
public NameserviceRPCMetrics(Configuration conf, String nsId) {
this.nsId = nsId;
this.nsId = NAMESERVICE_RPC_METRICS_PREFIX + nsId;
registry.tag("ns", "Nameservice", nsId);
}
public static NameserviceRPCMetrics create(Configuration conf,
String nameService) {
MetricsSystem ms = DefaultMetricsSystem.instance();
String name = NAMESERVICE_RPC_METRICS_PREFIX + (nameService.isEmpty()
? "UndefinedNameService"+ ThreadLocalRandom.current().nextInt()
: nameService);
return ms.register(name, "HDFS Federation NameService RPC Metrics",
new NameserviceRPCMetrics(conf, name));
String nsId = (nameService.isEmpty() ?
"UndefinedNameService" + ThreadLocalRandom.current().nextInt() :
nameService);
return ms.register(NAMESERVICE_RPC_METRICS_PREFIX + nsId,
"HDFS Federation NameService RPC Metrics", new NameserviceRPCMetrics(conf, nsId));
}
public void incrProxyOpFailureStandby() {
@ -91,6 +98,23 @@ public long getProxyOpNoNamenodes() {
return proxyOpNoNamenodes.value();
}
public void incrProxyOpPermitRejected() {
proxyOpPermitRejected.incr();
}
@Override
public long getProxyOpPermitRejected() {
return proxyOpPermitRejected.value();
}
public void incrProxyOpPermitAccepted() {
proxyOpPermitAccepted.incr();
}
@Override
public long getProxyOpPermitAccepted() {
return proxyOpPermitAccepted.value();
}
/**
* Add the time to proxy an operation from the moment the Router sends it to

View File

@ -1629,7 +1629,7 @@ private void acquirePermit(final String nsId, final UserGroupInformation ugi,
// Throw StandByException,
// Clients could fail over and try another router.
if (rpcMonitor != null) {
rpcMonitor.getRPCMetrics().incrProxyOpPermitRejected();
rpcMonitor.proxyOpPermitRejected(nsId);
}
incrRejectedPermitForNs(nsId);
LOG.debug("Permit denied for ugi: {} for method: {}",
@ -1639,6 +1639,9 @@ private void acquirePermit(final String nsId, final UserGroupInformation ugi,
" is overloaded for NS: " + nsId;
throw new StandbyException(msg);
}
if (rpcMonitor != null) {
rpcMonitor.proxyOpPermitAccepted(nsId);
}
incrAcceptedPermitForNs(nsId);
}
}

View File

@ -77,6 +77,16 @@ void init(
*/
void proxyOpFailureCommunicate(String nsId);
/**
* Rejected to proxy an operation to a Namenode.
*/
void proxyOpPermitRejected(String nsId);
/**
* Accepted to proxy an operation to a Namenode.
*/
void proxyOpPermitAccepted(String nsId);
/**
* Failed to proxy an operation to a Namenode because the client was
* overloaded.