HDFS-11560. Expose slow disks via NameNode JMX. Contributed by Hanisha Koneru.
This commit is contained in:
parent
5485d93bda
commit
73835c73e2
@ -1907,5 +1907,14 @@ public String getSlowPeersReport() {
|
||||
public SlowDiskTracker getSlowDiskTracker() {
|
||||
return slowDiskTracker;
|
||||
}
|
||||
/**
|
||||
* Retrieve information about slow disks as a JSON.
|
||||
* Returns null if we are not tracking slow disks.
|
||||
* @return
|
||||
*/
|
||||
public String getSlowDisksReport() {
|
||||
return slowDiskTracker != null ?
|
||||
slowDiskTracker.getSlowDiskReportAsJsonString() : null;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -256,6 +256,9 @@ public int compare(DiskLatency o1, DiskLatency o2) {
|
||||
public String getSlowDiskReportAsJsonString() {
|
||||
ObjectMapper objectMapper = new ObjectMapper();
|
||||
try {
|
||||
if (slowDisksReport.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
return objectMapper.writeValueAsString(slowDisksReport);
|
||||
} catch (JsonProcessingException e) {
|
||||
// Failed to serialize. Don't log the exception call stack.
|
||||
|
@ -1826,6 +1826,12 @@ public String getSlowPeersReport() {
|
||||
.getSlowPeersReport();
|
||||
}
|
||||
|
||||
@Override //NameNodeStatusMXBean
|
||||
public String getSlowDisksReport() {
|
||||
return namesystem.getBlockManager().getDatanodeManager()
|
||||
.getSlowDisksReport();
|
||||
}
|
||||
|
||||
/**
|
||||
* Shutdown the NN immediately in an ungraceful way. Used when it would be
|
||||
* unsafe for the NN to continue operating, e.g. during a failed HA state
|
||||
|
@ -75,4 +75,12 @@ public interface NameNodeStatusMXBean {
|
||||
* enabled. The report is in a JSON format.
|
||||
*/
|
||||
String getSlowPeersReport();
|
||||
|
||||
|
||||
/**
|
||||
* Gets the topN slow disks in the cluster, if the feature is enabled.
|
||||
*
|
||||
* @return JSON string of list of diskIDs and latencies
|
||||
*/
|
||||
String getSlowDisksReport();
|
||||
}
|
||||
|
@ -393,18 +393,9 @@ public void testEmptyReport() throws Exception {
|
||||
timer.advance(reportValidityMs);
|
||||
|
||||
tracker.updateSlowDiskReportAsync(timer.monotonicNow());
|
||||
Thread.sleep(OUTLIERS_REPORT_INTERVAL*2);
|
||||
|
||||
GenericTestUtils.waitFor(new Supplier<Boolean>() {
|
||||
@Override
|
||||
public Boolean get() {
|
||||
return tracker.getSlowDiskReportAsJsonString() != null;
|
||||
}
|
||||
}, 500, 5000);
|
||||
|
||||
ArrayList<DiskLatency> jsonReport = getAndDeserializeJson(
|
||||
tracker.getSlowDiskReportAsJsonString());
|
||||
|
||||
assertTrue(jsonReport.isEmpty());
|
||||
assertTrue(tracker.getSlowDiskReportAsJsonString() == null);
|
||||
}
|
||||
|
||||
private boolean isDiskInReports(ArrayList<DiskLatency> reports,
|
||||
|
@ -17,17 +17,23 @@
|
||||
*/
|
||||
package org.apache.hadoop.hdfs.server.namenode;
|
||||
|
||||
import com.google.common.base.Supplier;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager;
|
||||
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
||||
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||
import org.apache.hadoop.hdfs.server.datanode.TestDataNodeMXBean;
|
||||
import org.apache.hadoop.test.GenericTestUtils;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import javax.management.MBeanServer;
|
||||
import javax.management.ObjectName;
|
||||
import java.lang.management.ManagementFactory;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* Class for testing {@link NameNodeStatusMXBean} implementation.
|
||||
@ -38,7 +44,7 @@ public class TestNameNodeStatusMXBean {
|
||||
TestNameNodeStatusMXBean.class);
|
||||
|
||||
@Test(timeout = 120000L)
|
||||
public void testDataNodeMXBean() throws Exception {
|
||||
public void testNameNodeStatusMXBean() throws Exception {
|
||||
Configuration conf = new Configuration();
|
||||
MiniDFSCluster cluster = null;
|
||||
|
||||
@ -84,6 +90,55 @@ public void testDataNodeMXBean() throws Exception {
|
||||
String slowPeersReport = (String)mbs.getAttribute(mxbeanName,
|
||||
"SlowPeersReport");
|
||||
Assert.assertEquals(nn.getSlowPeersReport(), slowPeersReport);
|
||||
|
||||
// Get attribute "SlowDisksReport"
|
||||
String slowDisksReport = (String)mbs.getAttribute(mxbeanName,
|
||||
"SlowDisksReport");
|
||||
Assert.assertEquals(nn.getSlowDisksReport(), slowDisksReport);
|
||||
} finally {
|
||||
if (cluster != null) {
|
||||
cluster.shutdown();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNameNodeMXBeanSlowDisksEnabled() throws Exception {
|
||||
Configuration conf = new Configuration();
|
||||
conf.setDouble(
|
||||
DFSConfigKeys.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_KEY, 1.0);
|
||||
conf.setTimeDuration(
|
||||
DFSConfigKeys.DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY,
|
||||
1000, TimeUnit.MILLISECONDS);
|
||||
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
|
||||
|
||||
try {
|
||||
List<DataNode> datanodes = cluster.getDataNodes();
|
||||
Assert.assertEquals(datanodes.size(), 1);
|
||||
DataNode datanode = datanodes.get(0);
|
||||
String slowDiskPath = "test/data1/slowVolume";
|
||||
datanode.getDiskMetrics().addSlowDiskForTesting(slowDiskPath, null);
|
||||
|
||||
NameNode nn = cluster.getNameNode();
|
||||
DatanodeManager datanodeManager = nn.getNamesystem().getBlockManager()
|
||||
.getDatanodeManager();
|
||||
|
||||
MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
|
||||
ObjectName mxbeanName = new ObjectName(
|
||||
"Hadoop:service=NameNode,name=NameNodeStatus");
|
||||
|
||||
GenericTestUtils.waitFor(new Supplier<Boolean>() {
|
||||
@Override
|
||||
public Boolean get() {
|
||||
return (datanodeManager.getSlowDisksReport() != null);
|
||||
}
|
||||
}, 1000, 100000);
|
||||
|
||||
String slowDisksReport = (String)mbs.getAttribute(
|
||||
mxbeanName, "SlowDisksReport");
|
||||
Assert.assertEquals(datanodeManager.getSlowDisksReport(),
|
||||
slowDisksReport);
|
||||
Assert.assertTrue(slowDisksReport.contains(slowDiskPath));
|
||||
} finally {
|
||||
if (cluster != null) {
|
||||
cluster.shutdown();
|
||||
|
Loading…
Reference in New Issue
Block a user