From 73835c73e2d34b3854a71dd29d88c8303d698ac8 Mon Sep 17 00:00:00 2001 From: Hanisha Koneru Date: Fri, 31 Mar 2017 13:50:29 -0700 Subject: [PATCH] HDFS-11560. Expose slow disks via NameNode JMX. Contributed by Hanisha Koneru. --- .../blockmanagement/DatanodeManager.java | 9 +++ .../blockmanagement/SlowDiskTracker.java | 3 + .../hadoop/hdfs/server/namenode/NameNode.java | 6 ++ .../server/namenode/NameNodeStatusMXBean.java | 8 +++ .../blockmanagement/TestSlowDiskTracker.java | 13 +--- .../namenode/TestNameNodeStatusMXBean.java | 59 ++++++++++++++++++- 6 files changed, 85 insertions(+), 13 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java index 18135a8a54..c7bdca9c15 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java @@ -1907,5 +1907,14 @@ public String getSlowPeersReport() { public SlowDiskTracker getSlowDiskTracker() { return slowDiskTracker; } + /** + * Retrieve information about slow disks as a JSON. + * Returns null if we are not tracking slow disks. + * @return + */ + public String getSlowDisksReport() { + return slowDiskTracker != null ? + slowDiskTracker.getSlowDiskReportAsJsonString() : null; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowDiskTracker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowDiskTracker.java index 25920a2e07..52fce5d35c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowDiskTracker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowDiskTracker.java @@ -256,6 +256,9 @@ public int compare(DiskLatency o1, DiskLatency o2) { public String getSlowDiskReportAsJsonString() { ObjectMapper objectMapper = new ObjectMapper(); try { + if (slowDisksReport.isEmpty()) { + return null; + } return objectMapper.writeValueAsString(slowDisksReport); } catch (JsonProcessingException e) { // Failed to serialize. Don't log the exception call stack. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java index e7841f02a9..32d268a9db 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java @@ -1826,6 +1826,12 @@ public String getSlowPeersReport() { .getSlowPeersReport(); } + @Override //NameNodeStatusMXBean + public String getSlowDisksReport() { + return namesystem.getBlockManager().getDatanodeManager() + .getSlowDisksReport(); + } + /** * Shutdown the NN immediately in an ungraceful way. Used when it would be * unsafe for the NN to continue operating, e.g. during a failed HA state diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeStatusMXBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeStatusMXBean.java index f46b9ae927..ed1b96bdf1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeStatusMXBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeStatusMXBean.java @@ -75,4 +75,12 @@ public interface NameNodeStatusMXBean { * enabled. The report is in a JSON format. */ String getSlowPeersReport(); + + + /** + * Gets the topN slow disks in the cluster, if the feature is enabled. + * + * @return JSON string of list of diskIDs and latencies + */ + String getSlowDisksReport(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestSlowDiskTracker.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestSlowDiskTracker.java index e96b96a25a..16dfab208a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestSlowDiskTracker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestSlowDiskTracker.java @@ -393,18 +393,9 @@ public void testEmptyReport() throws Exception { timer.advance(reportValidityMs); tracker.updateSlowDiskReportAsync(timer.monotonicNow()); + Thread.sleep(OUTLIERS_REPORT_INTERVAL*2); - GenericTestUtils.waitFor(new Supplier() { - @Override - public Boolean get() { - return tracker.getSlowDiskReportAsJsonString() != null; - } - }, 500, 5000); - - ArrayList jsonReport = getAndDeserializeJson( - tracker.getSlowDiskReportAsJsonString()); - - assertTrue(jsonReport.isEmpty()); + assertTrue(tracker.getSlowDiskReportAsJsonString() == null); } private boolean isDiskInReports(ArrayList reports, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeStatusMXBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeStatusMXBean.java index c03dc209f9..8fe734e44b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeStatusMXBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeStatusMXBean.java @@ -17,17 +17,23 @@ */ package org.apache.hadoop.hdfs.server.namenode; +import com.google.common.base.Supplier; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager; +import org.apache.hadoop.hdfs.server.datanode.DataNode; +import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.MiniDFSCluster; -import org.apache.hadoop.hdfs.server.datanode.TestDataNodeMXBean; +import org.apache.hadoop.test.GenericTestUtils; import org.junit.Assert; import org.junit.Test; import javax.management.MBeanServer; import javax.management.ObjectName; import java.lang.management.ManagementFactory; +import java.util.List; +import java.util.concurrent.TimeUnit; /** * Class for testing {@link NameNodeStatusMXBean} implementation. @@ -38,7 +44,7 @@ public class TestNameNodeStatusMXBean { TestNameNodeStatusMXBean.class); @Test(timeout = 120000L) - public void testDataNodeMXBean() throws Exception { + public void testNameNodeStatusMXBean() throws Exception { Configuration conf = new Configuration(); MiniDFSCluster cluster = null; @@ -84,6 +90,55 @@ public void testDataNodeMXBean() throws Exception { String slowPeersReport = (String)mbs.getAttribute(mxbeanName, "SlowPeersReport"); Assert.assertEquals(nn.getSlowPeersReport(), slowPeersReport); + + // Get attribute "SlowDisksReport" + String slowDisksReport = (String)mbs.getAttribute(mxbeanName, + "SlowDisksReport"); + Assert.assertEquals(nn.getSlowDisksReport(), slowDisksReport); + } finally { + if (cluster != null) { + cluster.shutdown(); + } + } + } + + @Test + public void testNameNodeMXBeanSlowDisksEnabled() throws Exception { + Configuration conf = new Configuration(); + conf.setDouble( + DFSConfigKeys.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_KEY, 1.0); + conf.setTimeDuration( + DFSConfigKeys.DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY, + 1000, TimeUnit.MILLISECONDS); + MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build(); + + try { + List datanodes = cluster.getDataNodes(); + Assert.assertEquals(datanodes.size(), 1); + DataNode datanode = datanodes.get(0); + String slowDiskPath = "test/data1/slowVolume"; + datanode.getDiskMetrics().addSlowDiskForTesting(slowDiskPath, null); + + NameNode nn = cluster.getNameNode(); + DatanodeManager datanodeManager = nn.getNamesystem().getBlockManager() + .getDatanodeManager(); + + MBeanServer mbs = ManagementFactory.getPlatformMBeanServer(); + ObjectName mxbeanName = new ObjectName( + "Hadoop:service=NameNode,name=NameNodeStatus"); + + GenericTestUtils.waitFor(new Supplier() { + @Override + public Boolean get() { + return (datanodeManager.getSlowDisksReport() != null); + } + }, 1000, 100000); + + String slowDisksReport = (String)mbs.getAttribute( + mxbeanName, "SlowDisksReport"); + Assert.assertEquals(datanodeManager.getSlowDisksReport(), + slowDisksReport); + Assert.assertTrue(slowDisksReport.contains(slowDiskPath)); } finally { if (cluster != null) { cluster.shutdown();