HDFS-16678. RBF should supports disable getNodeUsage() in RBFMetrics (#4606)
This commit is contained in:
parent
521e65acfe
commit
e0c8c6eed4
@ -50,6 +50,7 @@
|
|||||||
import javax.management.ObjectName;
|
import javax.management.ObjectName;
|
||||||
import javax.management.StandardMBean;
|
import javax.management.StandardMBean;
|
||||||
|
|
||||||
|
import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
|
import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
|
||||||
@ -113,6 +114,8 @@ public class RBFMetrics implements RouterMBean, FederationMBean {
|
|||||||
/** Prevent holding the page from load too long. */
|
/** Prevent holding the page from load too long. */
|
||||||
private final long timeOut;
|
private final long timeOut;
|
||||||
|
|
||||||
|
/** Enable/Disable getNodeUsage. **/
|
||||||
|
private boolean enableGetDNUsage;
|
||||||
|
|
||||||
/** Router interface. */
|
/** Router interface. */
|
||||||
private final Router router;
|
private final Router router;
|
||||||
@ -175,6 +178,8 @@ public RBFMetrics(Router router) throws IOException {
|
|||||||
Configuration conf = router.getConfig();
|
Configuration conf = router.getConfig();
|
||||||
this.timeOut = conf.getTimeDuration(RBFConfigKeys.DN_REPORT_TIME_OUT,
|
this.timeOut = conf.getTimeDuration(RBFConfigKeys.DN_REPORT_TIME_OUT,
|
||||||
RBFConfigKeys.DN_REPORT_TIME_OUT_MS_DEFAULT, TimeUnit.MILLISECONDS);
|
RBFConfigKeys.DN_REPORT_TIME_OUT_MS_DEFAULT, TimeUnit.MILLISECONDS);
|
||||||
|
this.enableGetDNUsage = conf.getBoolean(RBFConfigKeys.DFS_ROUTER_ENABLE_GET_DN_USAGE_KEY,
|
||||||
|
RBFConfigKeys.DFS_ROUTER_ENABLE_GET_DN_USAGE_DEFAULT);
|
||||||
this.topTokenRealOwners = conf.getInt(
|
this.topTokenRealOwners = conf.getInt(
|
||||||
RBFConfigKeys.DFS_ROUTER_METRICS_TOP_NUM_TOKEN_OWNERS_KEY,
|
RBFConfigKeys.DFS_ROUTER_METRICS_TOP_NUM_TOKEN_OWNERS_KEY,
|
||||||
RBFConfigKeys.DFS_ROUTER_METRICS_TOP_NUM_TOKEN_OWNERS_KEY_DEFAULT);
|
RBFConfigKeys.DFS_ROUTER_METRICS_TOP_NUM_TOKEN_OWNERS_KEY_DEFAULT);
|
||||||
@ -184,6 +189,11 @@ public RBFMetrics(Router router) throws IOException {
|
|||||||
ms.register(RBFMetrics.class.getName(), "RBFActivity Metrics", this);
|
ms.register(RBFMetrics.class.getName(), "RBFActivity Metrics", this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public void setEnableGetDNUsage(boolean enableGetDNUsage) {
|
||||||
|
this.enableGetDNUsage = enableGetDNUsage;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Unregister the JMX beans.
|
* Unregister the JMX beans.
|
||||||
*/
|
*/
|
||||||
@ -537,35 +547,34 @@ public int getNumEnteringMaintenanceDataNodes() {
|
|||||||
|
|
||||||
@Override // NameNodeMXBean
|
@Override // NameNodeMXBean
|
||||||
public String getNodeUsage() {
|
public String getNodeUsage() {
|
||||||
float median = 0;
|
double median = 0;
|
||||||
float max = 0;
|
double max = 0;
|
||||||
float min = 0;
|
double min = 0;
|
||||||
float dev = 0;
|
double dev = 0;
|
||||||
|
|
||||||
final Map<String, Map<String, Object>> info = new HashMap<>();
|
final Map<String, Map<String, Object>> info = new HashMap<>();
|
||||||
try {
|
try {
|
||||||
RouterRpcServer rpcServer = this.router.getRpcServer();
|
DatanodeInfo[] live = null;
|
||||||
DatanodeInfo[] live = rpcServer.getDatanodeReport(
|
if (this.enableGetDNUsage) {
|
||||||
DatanodeReportType.LIVE, false, timeOut);
|
RouterRpcServer rpcServer = this.router.getRpcServer();
|
||||||
|
live = rpcServer.getDatanodeReport(DatanodeReportType.LIVE, false, timeOut);
|
||||||
|
} else {
|
||||||
|
LOG.debug("Getting node usage is disabled.");
|
||||||
|
}
|
||||||
|
|
||||||
if (live.length > 0) {
|
if (live != null && live.length > 0) {
|
||||||
float totalDfsUsed = 0;
|
double[] usages = new double[live.length];
|
||||||
float[] usages = new float[live.length];
|
|
||||||
int i = 0;
|
int i = 0;
|
||||||
for (DatanodeInfo dn : live) {
|
for (DatanodeInfo dn : live) {
|
||||||
usages[i++] = dn.getDfsUsedPercent();
|
usages[i++] = dn.getDfsUsedPercent();
|
||||||
totalDfsUsed += dn.getDfsUsedPercent();
|
|
||||||
}
|
}
|
||||||
totalDfsUsed /= live.length;
|
|
||||||
Arrays.sort(usages);
|
Arrays.sort(usages);
|
||||||
median = usages[usages.length / 2];
|
median = usages[usages.length / 2];
|
||||||
max = usages[usages.length - 1];
|
max = usages[usages.length - 1];
|
||||||
min = usages[0];
|
min = usages[0];
|
||||||
|
|
||||||
for (i = 0; i < usages.length; i++) {
|
StandardDeviation deviation = new StandardDeviation();
|
||||||
dev += (usages[i] - totalDfsUsed) * (usages[i] - totalDfsUsed);
|
dev = deviation.evaluate(usages);
|
||||||
}
|
|
||||||
dev = (float) Math.sqrt(dev / usages.length);
|
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
LOG.error("Cannot get the live nodes: {}", e.getMessage());
|
LOG.error("Cannot get the live nodes: {}", e.getMessage());
|
||||||
|
@ -321,6 +321,9 @@ public class RBFConfigKeys extends CommonConfigurationKeysPublic {
|
|||||||
FEDERATION_ROUTER_PREFIX + "dn-report.cache-expire";
|
FEDERATION_ROUTER_PREFIX + "dn-report.cache-expire";
|
||||||
public static final long DN_REPORT_CACHE_EXPIRE_MS_DEFAULT =
|
public static final long DN_REPORT_CACHE_EXPIRE_MS_DEFAULT =
|
||||||
TimeUnit.SECONDS.toMillis(10);
|
TimeUnit.SECONDS.toMillis(10);
|
||||||
|
public static final String DFS_ROUTER_ENABLE_GET_DN_USAGE_KEY =
|
||||||
|
FEDERATION_ROUTER_PREFIX + "enable.get.dn.usage";
|
||||||
|
public static final boolean DFS_ROUTER_ENABLE_GET_DN_USAGE_DEFAULT = true;
|
||||||
|
|
||||||
// HDFS Router-based federation quota
|
// HDFS Router-based federation quota
|
||||||
public static final String DFS_ROUTER_QUOTA_ENABLE =
|
public static final String DFS_ROUTER_QUOTA_ENABLE =
|
||||||
|
@ -195,6 +195,16 @@
|
|||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>dfs.federation.router.enable.get.dn.usage</name>
|
||||||
|
<value>true</value>
|
||||||
|
<description>
|
||||||
|
If true, the getNodeUsage method in RBFMetrics will return an up-to-date
|
||||||
|
result collecting from downstream nameservices. But it will take a long
|
||||||
|
time and take up thread resources. If false, it will return a mock result with all 0.
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>dfs.federation.router.metrics.class</name>
|
<name>dfs.federation.router.metrics.class</name>
|
||||||
<value>org.apache.hadoop.hdfs.server.federation.metrics.FederationRPCPerformanceMonitor</value>
|
<value>org.apache.hadoop.hdfs.server.federation.metrics.FederationRPCPerformanceMonitor</value>
|
||||||
|
@ -131,6 +131,7 @@
|
|||||||
import org.apache.hadoop.service.Service.STATE;
|
import org.apache.hadoop.service.Service.STATE;
|
||||||
import org.apache.hadoop.test.GenericTestUtils;
|
import org.apache.hadoop.test.GenericTestUtils;
|
||||||
import org.apache.hadoop.test.LambdaTestUtils;
|
import org.apache.hadoop.test.LambdaTestUtils;
|
||||||
|
import org.codehaus.jettison.json.JSONException;
|
||||||
import org.codehaus.jettison.json.JSONObject;
|
import org.codehaus.jettison.json.JSONObject;
|
||||||
import org.junit.AfterClass;
|
import org.junit.AfterClass;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
@ -2181,4 +2182,34 @@ public void testContentSummaryWithSnapshot() throws Exception {
|
|||||||
routerDFS.delete(dirPath, true);
|
routerDFS.delete(dirPath, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDisableNodeUsageInRBFMetrics() throws JSONException {
|
||||||
|
RBFMetrics rbfMetrics = router.getRouter().getMetrics();
|
||||||
|
FederationRPCMetrics federationRPCMetrics = router.getRouter().getRpcServer().getRPCMetrics();
|
||||||
|
|
||||||
|
long proxyOpBefore = federationRPCMetrics.getProxyOps();
|
||||||
|
String nodeUsageEnable = router.getRouter().getMetrics().getNodeUsage();
|
||||||
|
assertNotNull(nodeUsageEnable);
|
||||||
|
long proxyOpAfterWithEnable = federationRPCMetrics.getProxyOps();
|
||||||
|
assertEquals(proxyOpBefore + 2, proxyOpAfterWithEnable);
|
||||||
|
|
||||||
|
rbfMetrics.setEnableGetDNUsage(false);
|
||||||
|
String nodeUsageDisable = rbfMetrics.getNodeUsage();
|
||||||
|
assertNotNull(nodeUsageDisable);
|
||||||
|
long proxyOpAfterWithDisable = federationRPCMetrics.getProxyOps();
|
||||||
|
assertEquals(proxyOpAfterWithEnable, proxyOpAfterWithDisable);
|
||||||
|
JSONObject jsonObject = new JSONObject(nodeUsageDisable);
|
||||||
|
JSONObject json = jsonObject.getJSONObject("nodeUsage");
|
||||||
|
assertEquals("0.00%", json.get("min"));
|
||||||
|
assertEquals("0.00%", json.get("median"));
|
||||||
|
assertEquals("0.00%", json.get("max"));
|
||||||
|
assertEquals("0.00%", json.get("stdDev"));
|
||||||
|
|
||||||
|
rbfMetrics.setEnableGetDNUsage(true);
|
||||||
|
String nodeUsageWithReEnable = rbfMetrics.getNodeUsage();
|
||||||
|
assertNotNull(nodeUsageWithReEnable);
|
||||||
|
long proxyOpAfterWithReEnable = federationRPCMetrics.getProxyOps();
|
||||||
|
assertEquals(proxyOpAfterWithDisable + 2, proxyOpAfterWithReEnable);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user