diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/RBFMetrics.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/RBFMetrics.java index be88069b49..a9d761f45d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/RBFMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/RBFMetrics.java @@ -50,6 +50,7 @@ import javax.management.ObjectName; import javax.management.StandardMBean; +import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType; @@ -113,6 +114,8 @@ public class RBFMetrics implements RouterMBean, FederationMBean { /** Prevent holding the page from load too long. */ private final long timeOut; + /** Enable/Disable getNodeUsage. **/ + private boolean enableGetDNUsage; /** Router interface. */ private final Router router; @@ -175,6 +178,8 @@ public RBFMetrics(Router router) throws IOException { Configuration conf = router.getConfig(); this.timeOut = conf.getTimeDuration(RBFConfigKeys.DN_REPORT_TIME_OUT, RBFConfigKeys.DN_REPORT_TIME_OUT_MS_DEFAULT, TimeUnit.MILLISECONDS); + this.enableGetDNUsage = conf.getBoolean(RBFConfigKeys.DFS_ROUTER_ENABLE_GET_DN_USAGE_KEY, + RBFConfigKeys.DFS_ROUTER_ENABLE_GET_DN_USAGE_DEFAULT); this.topTokenRealOwners = conf.getInt( RBFConfigKeys.DFS_ROUTER_METRICS_TOP_NUM_TOKEN_OWNERS_KEY, RBFConfigKeys.DFS_ROUTER_METRICS_TOP_NUM_TOKEN_OWNERS_KEY_DEFAULT); @@ -184,6 +189,11 @@ public RBFMetrics(Router router) throws IOException { ms.register(RBFMetrics.class.getName(), "RBFActivity Metrics", this); } + @VisibleForTesting + public void setEnableGetDNUsage(boolean enableGetDNUsage) { + this.enableGetDNUsage = enableGetDNUsage; + } + /** * Unregister the JMX beans. */ @@ -537,35 +547,34 @@ public int getNumEnteringMaintenanceDataNodes() { @Override // NameNodeMXBean public String getNodeUsage() { - float median = 0; - float max = 0; - float min = 0; - float dev = 0; + double median = 0; + double max = 0; + double min = 0; + double dev = 0; final Map> info = new HashMap<>(); try { - RouterRpcServer rpcServer = this.router.getRpcServer(); - DatanodeInfo[] live = rpcServer.getDatanodeReport( - DatanodeReportType.LIVE, false, timeOut); + DatanodeInfo[] live = null; + if (this.enableGetDNUsage) { + RouterRpcServer rpcServer = this.router.getRpcServer(); + live = rpcServer.getDatanodeReport(DatanodeReportType.LIVE, false, timeOut); + } else { + LOG.debug("Getting node usage is disabled."); + } - if (live.length > 0) { - float totalDfsUsed = 0; - float[] usages = new float[live.length]; + if (live != null && live.length > 0) { + double[] usages = new double[live.length]; int i = 0; for (DatanodeInfo dn : live) { usages[i++] = dn.getDfsUsedPercent(); - totalDfsUsed += dn.getDfsUsedPercent(); } - totalDfsUsed /= live.length; Arrays.sort(usages); median = usages[usages.length / 2]; max = usages[usages.length - 1]; min = usages[0]; - for (i = 0; i < usages.length; i++) { - dev += (usages[i] - totalDfsUsed) * (usages[i] - totalDfsUsed); - } - dev = (float) Math.sqrt(dev / usages.length); + StandardDeviation deviation = new StandardDeviation(); + dev = deviation.evaluate(usages); } } catch (IOException e) { LOG.error("Cannot get the live nodes: {}", e.getMessage()); diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RBFConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RBFConfigKeys.java index 266e3c144f..3a317717ed 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RBFConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RBFConfigKeys.java @@ -321,6 +321,9 @@ public class RBFConfigKeys extends CommonConfigurationKeysPublic { FEDERATION_ROUTER_PREFIX + "dn-report.cache-expire"; public static final long DN_REPORT_CACHE_EXPIRE_MS_DEFAULT = TimeUnit.SECONDS.toMillis(10); + public static final String DFS_ROUTER_ENABLE_GET_DN_USAGE_KEY = + FEDERATION_ROUTER_PREFIX + "enable.get.dn.usage"; + public static final boolean DFS_ROUTER_ENABLE_GET_DN_USAGE_DEFAULT = true; // HDFS Router-based federation quota public static final String DFS_ROUTER_QUOTA_ENABLE = diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml index a261ddc583..58e4b27ac8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml @@ -195,6 +195,16 @@ + + dfs.federation.router.enable.get.dn.usage + true + + If true, the getNodeUsage method in RBFMetrics will return an up-to-date + result collecting from downstream nameservices. But it will take a long + time and take up thread resources. If false, it will return a mock result with all 0. + + + dfs.federation.router.metrics.class org.apache.hadoop.hdfs.server.federation.metrics.FederationRPCPerformanceMonitor diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRpc.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRpc.java index 31cc18fc88..11417ef144 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRpc.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRpc.java @@ -131,6 +131,7 @@ import org.apache.hadoop.service.Service.STATE; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.LambdaTestUtils; +import org.codehaus.jettison.json.JSONException; import org.codehaus.jettison.json.JSONObject; import org.junit.AfterClass; import org.junit.Before; @@ -2181,4 +2182,34 @@ public void testContentSummaryWithSnapshot() throws Exception { routerDFS.delete(dirPath, true); } } + + @Test + public void testDisableNodeUsageInRBFMetrics() throws JSONException { + RBFMetrics rbfMetrics = router.getRouter().getMetrics(); + FederationRPCMetrics federationRPCMetrics = router.getRouter().getRpcServer().getRPCMetrics(); + + long proxyOpBefore = federationRPCMetrics.getProxyOps(); + String nodeUsageEnable = router.getRouter().getMetrics().getNodeUsage(); + assertNotNull(nodeUsageEnable); + long proxyOpAfterWithEnable = federationRPCMetrics.getProxyOps(); + assertEquals(proxyOpBefore + 2, proxyOpAfterWithEnable); + + rbfMetrics.setEnableGetDNUsage(false); + String nodeUsageDisable = rbfMetrics.getNodeUsage(); + assertNotNull(nodeUsageDisable); + long proxyOpAfterWithDisable = federationRPCMetrics.getProxyOps(); + assertEquals(proxyOpAfterWithEnable, proxyOpAfterWithDisable); + JSONObject jsonObject = new JSONObject(nodeUsageDisable); + JSONObject json = jsonObject.getJSONObject("nodeUsage"); + assertEquals("0.00%", json.get("min")); + assertEquals("0.00%", json.get("median")); + assertEquals("0.00%", json.get("max")); + assertEquals("0.00%", json.get("stdDev")); + + rbfMetrics.setEnableGetDNUsage(true); + String nodeUsageWithReEnable = rbfMetrics.getNodeUsage(); + assertNotNull(nodeUsageWithReEnable); + long proxyOpAfterWithReEnable = federationRPCMetrics.getProxyOps(); + assertEquals(proxyOpAfterWithDisable + 2, proxyOpAfterWithReEnable); + } }