HDFS-16707. RBF: Expose RouterRpcFairnessPolicyController related request record metrics for each nameservice to Prometheus (#4665). Contributed by Jiale Qi.
Reviewed-by: Inigo Goiri <inigoiri@apache.org> Signed-off-by: Ayush Saxena <ayushsaxena@apache.org>
This commit is contained in:
parent
d75c6d9d57
commit
1079890ae3
@ -186,6 +186,25 @@ public void proxyOpFailureCommunicate(String nsId) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void proxyOpPermitRejected(String nsId) {
|
||||||
|
if (metrics != null) {
|
||||||
|
metrics.incrProxyOpPermitRejected();
|
||||||
|
}
|
||||||
|
if (nameserviceRPCMetricsMap != null &&
|
||||||
|
nameserviceRPCMetricsMap.containsKey(nsId)) {
|
||||||
|
nameserviceRPCMetricsMap.get(nsId).incrProxyOpPermitRejected();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void proxyOpPermitAccepted(String nsId) {
|
||||||
|
if (nameserviceRPCMetricsMap != null &&
|
||||||
|
nameserviceRPCMetricsMap.containsKey(nsId)) {
|
||||||
|
nameserviceRPCMetricsMap.get(nsId).incrProxyOpPermitAccepted();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void proxyOpFailureClientOverloaded() {
|
public void proxyOpFailureClientOverloaded() {
|
||||||
if (metrics != null) {
|
if (metrics != null) {
|
||||||
|
@ -37,4 +37,7 @@ public interface NameserviceRPCMBean {
|
|||||||
|
|
||||||
long getProxyOpNoNamenodes();
|
long getProxyOpNoNamenodes();
|
||||||
|
|
||||||
|
long getProxyOpPermitRejected();
|
||||||
|
|
||||||
|
long getProxyOpPermitAccepted();
|
||||||
}
|
}
|
||||||
|
@ -22,6 +22,7 @@
|
|||||||
import org.apache.hadoop.metrics2.annotation.Metric;
|
import org.apache.hadoop.metrics2.annotation.Metric;
|
||||||
import org.apache.hadoop.metrics2.annotation.Metrics;
|
import org.apache.hadoop.metrics2.annotation.Metrics;
|
||||||
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
||||||
|
import org.apache.hadoop.metrics2.lib.MetricsRegistry;
|
||||||
import org.apache.hadoop.metrics2.lib.MutableCounterLong;
|
import org.apache.hadoop.metrics2.lib.MutableCounterLong;
|
||||||
import org.apache.hadoop.metrics2.lib.MutableRate;
|
import org.apache.hadoop.metrics2.lib.MutableRate;
|
||||||
|
|
||||||
@ -37,6 +38,7 @@ public class NameserviceRPCMetrics implements NameserviceRPCMBean {
|
|||||||
public final static String NAMESERVICE_RPC_METRICS_PREFIX = "NameserviceActivity-";
|
public final static String NAMESERVICE_RPC_METRICS_PREFIX = "NameserviceActivity-";
|
||||||
|
|
||||||
private final String nsId;
|
private final String nsId;
|
||||||
|
private final MetricsRegistry registry = new MetricsRegistry("NameserviceRPCActivity");
|
||||||
|
|
||||||
@Metric("Time for the Router to proxy an operation to the Nameservice")
|
@Metric("Time for the Router to proxy an operation to the Nameservice")
|
||||||
private MutableRate proxy;
|
private MutableRate proxy;
|
||||||
@ -49,19 +51,24 @@ public class NameserviceRPCMetrics implements NameserviceRPCMBean {
|
|||||||
private MutableCounterLong proxyOpFailureCommunicate;
|
private MutableCounterLong proxyOpFailureCommunicate;
|
||||||
@Metric("Number of operations to hit no namenodes available")
|
@Metric("Number of operations to hit no namenodes available")
|
||||||
private MutableCounterLong proxyOpNoNamenodes;
|
private MutableCounterLong proxyOpNoNamenodes;
|
||||||
|
@Metric("Number of operations to hit permit limits")
|
||||||
|
private MutableCounterLong proxyOpPermitRejected;
|
||||||
|
@Metric("Number of operations accepted to hit a namenode")
|
||||||
|
private MutableCounterLong proxyOpPermitAccepted;
|
||||||
|
|
||||||
public NameserviceRPCMetrics(Configuration conf, String nsId) {
|
public NameserviceRPCMetrics(Configuration conf, String nsId) {
|
||||||
this.nsId = nsId;
|
this.nsId = NAMESERVICE_RPC_METRICS_PREFIX + nsId;
|
||||||
|
registry.tag("ns", "Nameservice", nsId);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static NameserviceRPCMetrics create(Configuration conf,
|
public static NameserviceRPCMetrics create(Configuration conf,
|
||||||
String nameService) {
|
String nameService) {
|
||||||
MetricsSystem ms = DefaultMetricsSystem.instance();
|
MetricsSystem ms = DefaultMetricsSystem.instance();
|
||||||
String name = NAMESERVICE_RPC_METRICS_PREFIX + (nameService.isEmpty()
|
String nsId = (nameService.isEmpty() ?
|
||||||
? "UndefinedNameService"+ ThreadLocalRandom.current().nextInt()
|
"UndefinedNameService" + ThreadLocalRandom.current().nextInt() :
|
||||||
: nameService);
|
nameService);
|
||||||
return ms.register(name, "HDFS Federation NameService RPC Metrics",
|
return ms.register(NAMESERVICE_RPC_METRICS_PREFIX + nsId,
|
||||||
new NameserviceRPCMetrics(conf, name));
|
"HDFS Federation NameService RPC Metrics", new NameserviceRPCMetrics(conf, nsId));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void incrProxyOpFailureStandby() {
|
public void incrProxyOpFailureStandby() {
|
||||||
@ -91,6 +98,23 @@ public long getProxyOpNoNamenodes() {
|
|||||||
return proxyOpNoNamenodes.value();
|
return proxyOpNoNamenodes.value();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void incrProxyOpPermitRejected() {
|
||||||
|
proxyOpPermitRejected.incr();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getProxyOpPermitRejected() {
|
||||||
|
return proxyOpPermitRejected.value();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void incrProxyOpPermitAccepted() {
|
||||||
|
proxyOpPermitAccepted.incr();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getProxyOpPermitAccepted() {
|
||||||
|
return proxyOpPermitAccepted.value();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Add the time to proxy an operation from the moment the Router sends it to
|
* Add the time to proxy an operation from the moment the Router sends it to
|
||||||
|
@ -1629,7 +1629,7 @@ private void acquirePermit(final String nsId, final UserGroupInformation ugi,
|
|||||||
// Throw StandByException,
|
// Throw StandByException,
|
||||||
// Clients could fail over and try another router.
|
// Clients could fail over and try another router.
|
||||||
if (rpcMonitor != null) {
|
if (rpcMonitor != null) {
|
||||||
rpcMonitor.getRPCMetrics().incrProxyOpPermitRejected();
|
rpcMonitor.proxyOpPermitRejected(nsId);
|
||||||
}
|
}
|
||||||
incrRejectedPermitForNs(nsId);
|
incrRejectedPermitForNs(nsId);
|
||||||
LOG.debug("Permit denied for ugi: {} for method: {}",
|
LOG.debug("Permit denied for ugi: {} for method: {}",
|
||||||
@ -1639,6 +1639,9 @@ private void acquirePermit(final String nsId, final UserGroupInformation ugi,
|
|||||||
" is overloaded for NS: " + nsId;
|
" is overloaded for NS: " + nsId;
|
||||||
throw new StandbyException(msg);
|
throw new StandbyException(msg);
|
||||||
}
|
}
|
||||||
|
if (rpcMonitor != null) {
|
||||||
|
rpcMonitor.proxyOpPermitAccepted(nsId);
|
||||||
|
}
|
||||||
incrAcceptedPermitForNs(nsId);
|
incrAcceptedPermitForNs(nsId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -77,6 +77,16 @@ void init(
|
|||||||
*/
|
*/
|
||||||
void proxyOpFailureCommunicate(String nsId);
|
void proxyOpFailureCommunicate(String nsId);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Rejected to proxy an operation to a Namenode.
|
||||||
|
*/
|
||||||
|
void proxyOpPermitRejected(String nsId);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Accepted to proxy an operation to a Namenode.
|
||||||
|
*/
|
||||||
|
void proxyOpPermitAccepted(String nsId);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Failed to proxy an operation to a Namenode because the client was
|
* Failed to proxy an operation to a Namenode because the client was
|
||||||
* overloaded.
|
* overloaded.
|
||||||
|
Loading…
Reference in New Issue
Block a user