diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md index 4b89bc2a58..852a1e9ec1 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md @@ -213,7 +213,15 @@ Each metrics record contains tags such as HAState and Hostname as additional inf | `PendingDataNodeMessageCount` | (HA-only) Current number of pending block-related messages for later processing in the standby NameNode | | `MillisSinceLastLoadedEdits` | (HA-only) Time in milliseconds since the last time standby NameNode load edit log. In active NameNode, set to 0 | | `BlockCapacity` | Current number of block capacity | +| `NumLiveDataNodes` | Number of datanodes which are currently live | +| `NumDeadDataNodes` | Number of datanodes which are currently dead | +| `NumDecomLiveDataNodes` | Number of datanodes which have been decommissioned and are now live | +| `NumDecomDeadDataNodes` | Number of datanodes which have been decommissioned and are now dead | +| `NumDecommissioningDataNodes` | Number of datanodes in decommissioning state | +| `VolumeFailuresTotal` | Total number of volume failures across all Datanodes | +| `EstimatedCapacityLostTotal` | An estimate of the total capacity lost due to volume failures | | `StaleDataNodes` | Current number of DataNodes marked stale due to delayed heartbeat | +| `NumStaleStorages` | Number of storages marked as content stale (after NameNode restart/failover before first block report is received) | | `MissingReplOneBlocks` | Current number of missing blocks with replication factor 1 | | `NumFilesUnderConstruction` | Current number of files under construction | | `NumActiveClients` | Current number of active clients holding lease | @@ -224,6 +232,9 @@ Each metrics record contains tags such as HAState and Hostname as additional inf | `TotalSyncTimes` | Total number of milliseconds spent by various edit logs in sync operation| | `NameDirSize` | NameNode name directories size in bytes | | `NumTimedOutPendingReconstructions` | The number of timed out reconstructions. Not the number of unique blocks that timed out. | +| `NumInMaintenanceLiveDataNodes` | Number of live Datanodes which are in maintenance state | +| `NumInMaintenanceDeadDataNodes` | Number of dead Datanodes which are in maintenance state | +| `NumEnteringMaintenanceDataNodes` | Number of Datanodes that are entering the maintenance state | | `FSN(Read|Write)Lock`*OperationName*`NumOps` | Total number of acquiring lock by operations | | `FSN(Read|Write)Lock`*OperationName*`AvgTime` | Average time of holding the lock by operations in milliseconds | diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 5849712203..229de0555b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -4843,16 +4843,20 @@ void shutdown() { } @Override // FSNamesystemMBean + @Metric({"NumLiveDataNodes", "Number of datanodes which are currently live"}) public int getNumLiveDataNodes() { return getBlockManager().getDatanodeManager().getNumLiveDataNodes(); } @Override // FSNamesystemMBean + @Metric({"NumDeadDataNodes", "Number of datanodes which are currently dead"}) public int getNumDeadDataNodes() { return getBlockManager().getDatanodeManager().getNumDeadDataNodes(); } @Override // FSNamesystemMBean + @Metric({"NumDecomLiveDataNodes", + "Number of datanodes which have been decommissioned and are now live"}) public int getNumDecomLiveDataNodes() { final List live = new ArrayList(); getBlockManager().getDatanodeManager().fetchDatanodes(live, null, false); @@ -4864,6 +4868,8 @@ public int getNumDecomLiveDataNodes() { } @Override // FSNamesystemMBean + @Metric({"NumDecomDeadDataNodes", + "Number of datanodes which have been decommissioned and are now dead"}) public int getNumDecomDeadDataNodes() { final List dead = new ArrayList(); getBlockManager().getDatanodeManager().fetchDatanodes(null, dead, false); @@ -4875,6 +4881,8 @@ public int getNumDecomDeadDataNodes() { } @Override // FSNamesystemMBean + @Metric({"VolumeFailuresTotal", + "Total number of volume failures across all Datanodes"}) public int getVolumeFailuresTotal() { List live = new ArrayList(); getBlockManager().getDatanodeManager().fetchDatanodes(live, null, false); @@ -4886,6 +4894,8 @@ public int getVolumeFailuresTotal() { } @Override // FSNamesystemMBean + @Metric({"EstimatedCapacityLostTotal", + "An estimate of the total capacity lost due to volume failures"}) public long getEstimatedCapacityLostTotal() { List live = new ArrayList(); getBlockManager().getDatanodeManager().fetchDatanodes(live, null, false); @@ -4901,6 +4911,8 @@ public long getEstimatedCapacityLostTotal() { } @Override // FSNamesystemMBean + @Metric({"NumDecommissioningDataNodes", + "Number of datanodes in decommissioning state"}) public int getNumDecommissioningDataNodes() { return getBlockManager().getDatanodeManager().getDecommissioningNodes() .size(); @@ -4918,6 +4930,8 @@ public int getNumStaleDataNodes() { * before NN receives the first Heartbeat followed by the first Blockreport. */ @Override // FSNamesystemMBean + @Metric({"NumStaleStorages", + "Number of storages marked as content stale"}) public int getNumStaleStorages() { return getBlockManager().getDatanodeManager().getNumStaleStorages(); } @@ -7542,6 +7556,8 @@ public long getBytesInFuture() { @Override // FSNamesystemMBean + @Metric({"NumInMaintenanceLiveDataNodes", + "Number of live Datanodes which are in maintenance state"}) public int getNumInMaintenanceLiveDataNodes() { final List live = new ArrayList(); getBlockManager().getDatanodeManager().fetchDatanodes(live, null, true); @@ -7553,6 +7569,8 @@ public int getNumInMaintenanceLiveDataNodes() { } @Override // FSNamesystemMBean + @Metric({"NumInMaintenanceDeadDataNodes", + "Number of dead Datanodes which are in maintenance state"}) public int getNumInMaintenanceDeadDataNodes() { final List dead = new ArrayList(); getBlockManager().getDatanodeManager().fetchDatanodes(null, dead, true); @@ -7564,6 +7582,8 @@ public int getNumInMaintenanceDeadDataNodes() { } @Override // FSNamesystemMBean + @Metric({"NumEnteringMaintenanceDataNodes", + "Number of Datanodes that are entering the maintenance state"}) public int getNumEnteringMaintenanceDataNodes() { return getBlockManager().getDatanodeManager().getEnteringMaintenanceNodes() .size(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java index f9908fe272..0345cf5ef5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java @@ -61,6 +61,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import com.google.common.base.Supplier; @@ -148,6 +149,8 @@ public class MiniDFSCluster implements AutoCloseable { public static final String HDFS_MINIDFS_BASEDIR = "hdfs.minidfs.basedir"; public static final String DFS_NAMENODE_SAFEMODE_EXTENSION_TESTING_KEY = DFS_NAMENODE_SAFEMODE_EXTENSION_KEY + ".testing"; + public static final String DFS_NAMENODE_DECOMMISSION_INTERVAL_TESTING_KEY + = DFS_NAMENODE_DECOMMISSION_INTERVAL_KEY + ".testing"; // Changing this default may break some tests that assume it is 2. private static final int DEFAULT_STORAGES_PER_DATANODE = 2; @@ -818,7 +821,10 @@ private void initMiniDFSCluster( int safemodeExtension = conf.getInt( DFS_NAMENODE_SAFEMODE_EXTENSION_TESTING_KEY, 0); conf.setInt(DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, safemodeExtension); - conf.setInt(DFS_NAMENODE_DECOMMISSION_INTERVAL_KEY, 3); // 3 second + long decommissionInterval = conf.getTimeDuration( + DFS_NAMENODE_DECOMMISSION_INTERVAL_TESTING_KEY, 3, TimeUnit.SECONDS); + conf.setTimeDuration(DFS_NAMENODE_DECOMMISSION_INTERVAL_KEY, + decommissionInterval, TimeUnit.SECONDS); if (!useConfiguredTopologyMappingClass) { conf.setClass(NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, StaticMapping.class, DNSToSwitchMapping.class); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java index c3bb255fc0..b983fd1626 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hdfs.server.namenode.metrics; +import java.util.concurrent.TimeUnit; import org.apache.hadoop.crypto.key.JavaKeyStoreProvider; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.FileSystemTestHelper; @@ -41,7 +42,9 @@ import java.io.File; import java.io.IOException; import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; import java.util.EnumSet; +import java.util.List; import java.util.Random; import com.google.common.collect.ImmutableList; @@ -69,12 +72,15 @@ import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils; +import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi; +import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsVolumeImpl; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; import org.apache.hadoop.hdfs.server.namenode.MockNameNodeResourceChecker; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil; import org.apache.hadoop.hdfs.tools.NNHAServiceTarget; +import org.apache.hadoop.hdfs.util.HostsFileWriter; import org.apache.hadoop.metrics2.MetricsRecordBuilder; import org.apache.hadoop.metrics2.MetricsSource; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; @@ -115,6 +121,15 @@ public class TestNameNodeMetrics { CONF.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, 1); CONF.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, DFS_REDUNDANCY_INTERVAL); + // Set it long enough to essentially disable unless we manually call it + // Used for decommissioning DataNode metrics + CONF.setTimeDuration( + MiniDFSCluster.DFS_NAMENODE_DECOMMISSION_INTERVAL_TESTING_KEY, 999, + TimeUnit.DAYS); + // Next two configs used for checking failed volume metrics + CONF.setTimeDuration(DFSConfigKeys.DFS_DATANODE_DISK_CHECK_MIN_GAP_KEY, + 10, TimeUnit.MILLISECONDS); + CONF.setInt(DFSConfigKeys.DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY, 1); CONF.setInt(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY, DFS_REDUNDANCY_INTERVAL); CONF.set(DFSConfigKeys.DFS_METRICS_PERCENTILES_INTERVALS_KEY, @@ -133,6 +148,7 @@ public class TestNameNodeMetrics { private DistributedFileSystem fs; private final Random rand = new Random(); private FSNamesystem namesystem; + private HostsFileWriter hostsFileWriter; private BlockManager bm; private Path ecDir; @@ -142,6 +158,8 @@ private static Path getTestPath(String fileName) { @Before public void setUp() throws Exception { + hostsFileWriter = new HostsFileWriter(); + hostsFileWriter.initialize(CONF, "temp/decommission"); cluster = new MiniDFSCluster.Builder(CONF).numDataNodes(DATANODE_COUNT) .build(); cluster.waitActive(); @@ -161,6 +179,10 @@ public void tearDown() throws Exception { MetricsRecordBuilder rb = getMetrics(source); assertQuantileGauges("GetGroups1s", rb); } + if (hostsFileWriter != null) { + hostsFileWriter.cleanup(); + hostsFileWriter = null; + } if (cluster != null) { cluster.shutdown(); cluster = null; @@ -235,6 +257,96 @@ public void testStaleNodes() throws Exception { .getBlockManager()); assertGauge("StaleDataNodes", 0, getMetrics(NS_METRICS)); } + + /** + * Test metrics associated with volume failures. + */ + @Test + public void testVolumeFailures() throws Exception { + assertGauge("VolumeFailuresTotal", 0, getMetrics(NS_METRICS)); + assertGauge("EstimatedCapacityLostTotal", 0L, getMetrics(NS_METRICS)); + DataNode dn = cluster.getDataNodes().get(0); + FsDatasetSpi.FsVolumeReferences volumeReferences = + DataNodeTestUtils.getFSDataset(dn).getFsVolumeReferences(); + FsVolumeImpl fsVolume = (FsVolumeImpl) volumeReferences.get(0); + File dataDir = new File(fsVolume.getBaseURI()); + long capacity = fsVolume.getCapacity(); + volumeReferences.close(); + DataNodeTestUtils.injectDataDirFailure(dataDir); + DataNodeTestUtils.waitForDiskError(dn, fsVolume); + DataNodeTestUtils.triggerHeartbeat(dn); + BlockManagerTestUtil.checkHeartbeat(bm); + assertGauge("VolumeFailuresTotal", 1, getMetrics(NS_METRICS)); + assertGauge("EstimatedCapacityLostTotal", capacity, getMetrics(NS_METRICS)); + } + + /** + * Test metrics associated with liveness and decommission status of DataNodes. + */ + @Test + public void testDataNodeLivenessAndDecom() throws Exception { + List dataNodes = cluster.getDataNodes(); + DatanodeDescriptor[] dnDescriptors = new DatanodeDescriptor[DATANODE_COUNT]; + String[] dnAddresses = new String[DATANODE_COUNT]; + for (int i = 0; i < DATANODE_COUNT; i++) { + dnDescriptors[i] = bm.getDatanodeManager() + .getDatanode(dataNodes.get(i).getDatanodeId()); + dnAddresses[i] = dnDescriptors[i].getXferAddr(); + } + // First put all DNs into include + hostsFileWriter.initIncludeHosts(dnAddresses); + bm.getDatanodeManager().refreshNodes(CONF); + assertGauge("NumDecomLiveDataNodes", 0, getMetrics(NS_METRICS)); + assertGauge("NumLiveDataNodes", DATANODE_COUNT, getMetrics(NS_METRICS)); + + // Now decommission one DN + hostsFileWriter.initExcludeHost(dnAddresses[0]); + bm.getDatanodeManager().refreshNodes(CONF); + assertGauge("NumDecommissioningDataNodes", 1, getMetrics(NS_METRICS)); + BlockManagerTestUtil.recheckDecommissionState(bm.getDatanodeManager()); + assertGauge("NumDecommissioningDataNodes", 0, getMetrics(NS_METRICS)); + assertGauge("NumDecomLiveDataNodes", 1, getMetrics(NS_METRICS)); + assertGauge("NumLiveDataNodes", DATANODE_COUNT, getMetrics(NS_METRICS)); + + // Now kill all DNs by expiring their heartbeats + for (int i = 0; i < DATANODE_COUNT; i++) { + DataNodeTestUtils.setHeartbeatsDisabledForTests(dataNodes.get(i), true); + long expireInterval = CONF.getLong( + DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, + DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_DEFAULT) * 2L + + CONF.getLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, + DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_DEFAULT) * 10 * 1000L; + DFSTestUtil.resetLastUpdatesWithOffset(dnDescriptors[i], + -(expireInterval + 1)); + } + BlockManagerTestUtil.checkHeartbeat(bm); + assertGauge("NumDecomLiveDataNodes", 0, getMetrics(NS_METRICS)); + assertGauge("NumDecomDeadDataNodes", 1, getMetrics(NS_METRICS)); + assertGauge("NumLiveDataNodes", 0, getMetrics(NS_METRICS)); + assertGauge("NumDeadDataNodes", DATANODE_COUNT, getMetrics(NS_METRICS)); + + // Now remove the decommissioned DN altogether + String[] includeHosts = new String[dnAddresses.length - 1]; + for (int i = 0; i < includeHosts.length; i++) { + includeHosts[i] = dnAddresses[i + 1]; + } + hostsFileWriter.initIncludeHosts(includeHosts); + hostsFileWriter.initExcludeHosts(new ArrayList<>()); + bm.getDatanodeManager().refreshNodes(CONF); + assertGauge("NumDecomLiveDataNodes", 0, getMetrics(NS_METRICS)); + assertGauge("NumDecomDeadDataNodes", 0, getMetrics(NS_METRICS)); + assertGauge("NumLiveDataNodes", 0, getMetrics(NS_METRICS)); + assertGauge("NumDeadDataNodes", DATANODE_COUNT - 1, getMetrics(NS_METRICS)); + + // Finally mark the remaining DNs as live again + for (int i = 1; i < dataNodes.size(); i++) { + DataNodeTestUtils.setHeartbeatsDisabledForTests(dataNodes.get(i), false); + DFSTestUtil.resetLastUpdatesWithOffset(dnDescriptors[i], 0); + } + BlockManagerTestUtil.checkHeartbeat(bm); + assertGauge("NumLiveDataNodes", DATANODE_COUNT - 1, getMetrics(NS_METRICS)); + assertGauge("NumDeadDataNodes", 0, getMetrics(NS_METRICS)); + } /** Test metrics associated with addition of a file */ @Test