diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java index ebf3e28a76..d390c1e542 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java @@ -2215,7 +2215,7 @@ public class DataNode extends ReconfigurableBase }); } - private void handleDiskError(String failedVolumes) { + private void handleDiskError(String failedVolumes, int failedNumber) { final boolean hasEnoughResources = data.hasEnoughResource(); LOG.warn("DataNode.handleDiskError on: " + "[{}] Keep Running: {}", failedVolumes, hasEnoughResources); @@ -2224,7 +2224,7 @@ public class DataNode extends ReconfigurableBase // shutdown the DN completely. int dpError = hasEnoughResources ? DatanodeProtocol.DISK_ERROR : DatanodeProtocol.FATAL_DISK_ERROR; - metrics.incrVolumeFailures(); + metrics.incrVolumeFailures(failedNumber); //inform NameNodes for(BPOfferService bpos: blockPoolManager.getAllNamenodeThreads()) { @@ -3452,8 +3452,8 @@ public class DataNode extends ReconfigurableBase } data.handleVolumeFailures(unhealthyVolumes); - Set unhealthyLocations = new HashSet<>( - unhealthyVolumes.size()); + int failedNumber = unhealthyVolumes.size(); + Set unhealthyLocations = new HashSet<>(failedNumber); StringBuilder sb = new StringBuilder("DataNode failed volumes:"); for (FsVolumeSpi vol : unhealthyVolumes) { @@ -3468,8 +3468,8 @@ public class DataNode extends ReconfigurableBase LOG.warn("Error occurred when removing unhealthy storage dirs", e); } LOG.debug("{}", sb); - // send blockreport regarding volume failure - handleDiskError(sb.toString()); + // send blockreport regarding volume failure + handleDiskError(sb.toString(), failedNumber); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java index ffd0b7b718..68eaf72228 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java @@ -399,9 +399,9 @@ public class DataNodeMetrics { remoteBytesRead.incr(size); } } - - public void incrVolumeFailures() { - volumeFailures.incr(); + + public void incrVolumeFailures(int size) { + volumeFailures.incr(size); } public void incrDatanodeNetworkErrors() { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java index c4527514e7..c6f4f01d89 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hdfs.server.datanode; +import static org.apache.hadoop.test.MetricsAsserts.getLongCounter; +import static org.apache.hadoop.test.MetricsAsserts.getMetrics; import static org.apache.hadoop.test.PlatformAssumptions.assumeNotWindows; import static org.hamcrest.core.Is.is; import static org.junit.Assert.assertEquals; @@ -77,6 +79,7 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols; import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.metrics2.MetricsRecordBuilder; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.test.GenericTestUtils; @@ -947,4 +950,27 @@ public class TestDataNodeVolumeFailure { } }, 10, 30 * 1000); } + + /* + * Fail two volumes, and check the metrics of VolumeFailures + */ + @Test + public void testVolumeFailureTwo() throws Exception { + // fail two volumes + data_fail = cluster.getInstanceStorageDir(1, 0); + failedDir = MiniDFSCluster.getFinalizedDir(data_fail, + cluster.getNamesystem().getBlockPoolId()); + failedDir.setReadOnly(); + data_fail = cluster.getInstanceStorageDir(1, 1); + failedDir = MiniDFSCluster.getFinalizedDir(data_fail, + cluster.getNamesystem().getBlockPoolId()); + failedDir.setReadOnly(); + + final DataNode dn = cluster.getDataNodes().get(1); + dn.checkDiskError(); + + MetricsRecordBuilder rb = getMetrics(dn.getMetrics().name()); + long volumeFailures = getLongCounter("VolumeFailures", rb); + assertEquals(2, volumeFailures); + } }