diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java index 320c680cc4..8ab4bbaaeb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java @@ -730,7 +730,7 @@ public int hashCode() { // Super implementation is sufficient return super.hashCode(); } - + @Override public boolean equals(Object obj) { // Sufficient to use super equality as datanodes are uniquely identified @@ -745,14 +745,14 @@ public class LeavingServiceStatus { private int underReplicatedInOpenFiles; private long startTime; - synchronized void set(int underRep, - int onlyRep, int underConstruction) { + synchronized void set(int underRepInOpenFiles, int underRepBlocks, + int outOfServiceOnlyRep) { if (!isDecommissionInProgress() && !isEnteringMaintenance()) { return; } - underReplicatedBlocks = underRep; - outOfServiceOnlyReplicas = onlyRep; - underReplicatedInOpenFiles = underConstruction; + underReplicatedInOpenFiles = underRepInOpenFiles; + underReplicatedBlocks = underRepBlocks; + outOfServiceOnlyReplicas = outOfServiceOnlyRep; } /** @return the number of under-replicated blocks */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DecommissionManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DecommissionManager.java index b1cfd78090..ae7982628f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DecommissionManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DecommissionManager.java @@ -634,9 +634,12 @@ private void processBlocksInternal( final List insufficientList, boolean pruneReliableBlocks) { boolean firstReplicationLog = true; - int lowRedundancyBlocks = 0; - int outOfServiceOnlyReplicas = 0; + // Low redundancy in UC Blocks only int lowRedundancyInOpenFiles = 0; + // All low redundancy blocks. Includes lowRedundancyInOpenFiles. + int lowRedundancyBlocks = 0; + // All maintenance and decommission replicas. + int outOfServiceOnlyReplicas = 0; while (it.hasNext()) { if (insufficientList == null && numBlocksCheckedPerLock >= numBlocksPerCheck) { @@ -726,8 +729,8 @@ private void processBlocksInternal( } } - datanode.getLeavingServiceStatus().set(lowRedundancyBlocks, - outOfServiceOnlyReplicas, lowRedundancyInOpenFiles); + datanode.getLeavingServiceStatus().set(lowRedundancyInOpenFiles, + lowRedundancyBlocks, outOfServiceOnlyReplicas); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/NumberReplicas.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/NumberReplicas.java index be984f9826..e61df8716e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/NumberReplicas.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/NumberReplicas.java @@ -77,7 +77,7 @@ public int readOnlyReplicas() { * @return decommissioned and decommissioning replicas */ public int decommissionedAndDecommissioning() { - return (int) (get(DECOMMISSIONED) + get(DECOMMISSIONING)); + return decommissioned() + decommissioning(); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 249324bed1..2a1f4d1c76 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -5500,6 +5500,7 @@ public String getDeadNodes() { Map innerinfo = ImmutableMap.builder() .put("lastContact", getLastContact(node)) .put("decommissioned", node.isDecommissioned()) + .put("adminState", node.getAdminState().toString()) .put("xferaddr", node.getXferAddr()) .build(); info.put(node.getHostName() + ":" + node.getXferPort(), innerinfo); @@ -5524,7 +5525,6 @@ public String getDecomNodes() { .put("xferaddr", node.getXferAddr()) .put("underReplicatedBlocks", node.getLeavingServiceStatus().getUnderReplicatedBlocks()) - // TODO use another property name for outOfServiceOnlyReplicas. .put("decommissionOnlyReplicas", node.getLeavingServiceStatus().getOutOfServiceOnlyReplicas()) .put("underReplicateInOpenFiles", @@ -5535,6 +5535,33 @@ public String getDecomNodes() { return JSON.toString(info); } + /** + * Returned information is a JSON representation of map with host name of + * nodes entering maintenance as the key and value as a map of various node + * attributes to its values. + */ + @Override // NameNodeMXBean + public String getEnteringMaintenanceNodes() { + final Map> nodesMap = + new HashMap>(); + final List enteringMaintenanceNodeList = + blockManager.getDatanodeManager().getEnteringMaintenanceNodes(); + for (DatanodeDescriptor node : enteringMaintenanceNodeList) { + Map attrMap = ImmutableMap + . builder() + .put("xferaddr", node.getXferAddr()) + .put("underReplicatedBlocks", + node.getLeavingServiceStatus().getUnderReplicatedBlocks()) + .put("maintenanceOnlyReplicas", + node.getLeavingServiceStatus().getOutOfServiceOnlyReplicas()) + .put("underReplicateInOpenFiles", + node.getLeavingServiceStatus().getUnderReplicatedInOpenFiles()) + .build(); + nodesMap.put(node.getHostName() + ":" + node.getXferPort(), attrMap); + } + return JSON.toString(nodesMap); + } + private long getLastContact(DatanodeDescriptor alivenode) { return (monotonicNow() - alivenode.getLastUpdateMonotonic())/1000; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeMXBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeMXBean.java index e796122c05..82cec33376 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeMXBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeMXBean.java @@ -190,7 +190,14 @@ public interface NameNodeMXBean { * @return the decommissioning node information */ public String getDecomNodes(); - + + /** + * Gets the information on nodes entering maintenance. + * + * @return the information on nodes entering maintenance + */ + String getEnteringMaintenanceNodes(); + /** * Gets the cluster id. * diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.html b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.html index d71e79871a..94516a472c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.html +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.html @@ -171,9 +171,10 @@ {/nn} {#fs} - Live Nodes{NumLiveDataNodes} (Decommissioned: {NumDecomLiveDataNodes}) - Dead Nodes{NumDeadDataNodes} (Decommissioned: {NumDecomDeadDataNodes}) + Live Nodes{NumLiveDataNodes} (Decommissioned: {NumDecomLiveDataNodes}, In Maintenance: {NumInMaintenanceLiveDataNodes}) + Dead Nodes{NumDeadDataNodes} (Decommissioned: {NumDecomDeadDataNodes}, In Maintenance: {NumInMaintenanceDeadDataNodes}) Decommissioning Nodes{NumDecommissioningDataNodes} + Entering Maintenance Nodes {NumEnteringMaintenanceDataNodes} Total Datanode Volume Failures{VolumeFailuresTotal} ({EstimatedCapacityLostTotal|fmt_bytes}) {@eq key=nnstat.State value="active"} Number of Under-Replicated Blocks{UnderReplicatedBlocks} @@ -295,6 +296,7 @@
  • Down
  • Decommissioned
  • Decommissioned & dead
  • +
  • In Maintenance & dead
  • @@ -344,6 +346,32 @@ + + + {?EnteringMaintenanceNodes} + + + + + + + + + + {#EnteringMaintenanceNodes} + + + + + + + {/EnteringMaintenanceNodes} +
    NodeUnder replicated blocksBlocks with no live replicasUnder Replicated Blocks
    In files under construction
    {name} ({xferaddr}){underReplicatedBlocks}{maintenanceOnlyReplicas}{underReplicateInOpenFiles}
    + {:else} + No nodes are entering maintenance. + {/EnteringMaintenanceNodes} +
    + {?DecomNodes} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.js b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.js index 02aa8955cb..e4603818e8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.js +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.js @@ -223,17 +223,23 @@ if (n.adminState === "In Service") { n.state = "alive"; } else if (nodes[i].adminState === "Decommission In Progress") { - n.state = "decommisioning"; + n.state = "decommissioning"; } else if (nodes[i].adminState === "Decommissioned") { n.state = "decommissioned"; + } else if (nodes[i].adminState === "Entering Maintenance") { + n.state = "entering-maintenance"; + } else if (nodes[i].adminState === "In Maintenance") { + n.state = "in-maintenance"; } } } function augment_dead_nodes(nodes) { for (var i = 0, e = nodes.length; i < e; ++i) { - if (nodes[i].decommissioned) { + if (nodes[i].adminState === "Decommissioned") { nodes[i].state = "down-decommissioned"; + } else if (nodes[i].adminState === "In Maintenance") { + nodes[i].state = "down-maintenance"; } else { nodes[i].state = "down"; } @@ -245,6 +251,7 @@ r.DeadNodes = node_map_to_array(JSON.parse(r.DeadNodes)); augment_dead_nodes(r.DeadNodes); r.DecomNodes = node_map_to_array(JSON.parse(r.DecomNodes)); + r.EnteringMaintenanceNodes = node_map_to_array(JSON.parse(r.EnteringMaintenanceNodes)); return r; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java index 4287a92687..7322cba72e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java @@ -37,8 +37,10 @@ import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction; import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.protocol.LocatedStripedBlock; +import org.apache.hadoop.hdfs.server.blockmanagement.CombinedHostFileManager; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager; +import org.apache.hadoop.hdfs.server.blockmanagement.HostConfigManager; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils; import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil; @@ -50,10 +52,13 @@ import org.apache.hadoop.io.nativeio.NativeIO.POSIX.NoMlockCacheManipulator; import org.apache.hadoop.net.ServerSocketUtil; import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.util.Time; import org.apache.hadoop.util.VersionInfo; import org.junit.Assert; import org.junit.Test; import org.eclipse.jetty.util.ajax.JSON; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import javax.management.MBeanServer; import javax.management.ObjectName; @@ -64,6 +69,7 @@ import java.net.URI; import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; @@ -80,6 +86,9 @@ */ public class TestNameNodeMXBean { + private static final Logger LOG = + LoggerFactory.getLogger(TestNameNodeMXBean.class); + /** * Used to assert equality between doubles */ @@ -180,10 +189,10 @@ public void testNameNodeMXBeanInfo() throws Exception { assertFalse(xferAddr.equals(dnXferAddrInMaintenance) ^ inMaintenance); } assertEquals(fsn.getLiveNodes(), alivenodeinfo); - // get attribute deadnodeinfo - String deadnodeinfo = (String) (mbs.getAttribute(mxbeanName, + // get attributes DeadNodes + String deadNodeInfo = (String) (mbs.getAttribute(mxbeanName, "DeadNodes")); - assertEquals(fsn.getDeadNodes(), deadnodeinfo); + assertEquals(fsn.getDeadNodes(), deadNodeInfo); // get attribute NodeUsage String nodeUsage = (String) (mbs.getAttribute(mxbeanName, "NodeUsage")); @@ -295,16 +304,16 @@ public void testLastContactTime() throws Exception { Uninterruptibles.sleepUninterruptibly(1, TimeUnit.SECONDS); } - // get attribute deadnodeinfo - String deadnodeinfo = (String) (mbs.getAttribute(mxbeanName, + // get attribute DeadNodes + String deadNodeInfo = (String) (mbs.getAttribute(mxbeanName, "DeadNodes")); - assertEquals(fsn.getDeadNodes(), deadnodeinfo); + assertEquals(fsn.getDeadNodes(), deadNodeInfo); Map> deadNodes = - (Map>) JSON.parse(deadnodeinfo); + (Map>) JSON.parse(deadNodeInfo); assertTrue(deadNodes.size() > 0); for (Map deadNode : deadNodes.values()) { assertTrue(deadNode.containsKey("lastContact")); - assertTrue(deadNode.containsKey("decommissioned")); + assertTrue(deadNode.containsKey("adminState")); assertTrue(deadNode.containsKey("xferaddr")); } } finally { @@ -415,6 +424,106 @@ public Boolean get() { } } + @Test (timeout = 120000) + public void testMaintenanceNodes() throws Exception { + LOG.info("Starting testMaintenanceNodes"); + int expirationInMs = 30 * 1000; + Configuration conf = new Configuration(); + conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1); + conf.setInt(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, + expirationInMs); + conf.setClass(DFSConfigKeys.DFS_NAMENODE_HOSTS_PROVIDER_CLASSNAME_KEY, + CombinedHostFileManager.class, HostConfigManager.class); + MiniDFSCluster cluster = null; + HostsFileWriter hostsFileWriter = new HostsFileWriter(); + hostsFileWriter.initialize(conf, "temp/TestNameNodeMXBean"); + + try { + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build(); + cluster.waitActive(); + + FSNamesystem fsn = cluster.getNameNode().namesystem; + MBeanServer mbs = ManagementFactory.getPlatformMBeanServer(); + ObjectName mxbeanName = new ObjectName( + "Hadoop:service=NameNode,name=NameNodeInfo"); + + List hosts = new ArrayList<>(); + for(DataNode dn : cluster.getDataNodes()) { + hosts.add(dn.getDisplayName()); + } + hostsFileWriter.initIncludeHosts(hosts.toArray( + new String[hosts.size()])); + fsn.getBlockManager().getDatanodeManager().refreshNodes(conf); + + // 1. Verify nodes for DatanodeReportType.LIVE state + String liveNodesInfo = (String) (mbs.getAttribute(mxbeanName, + "LiveNodes")); + LOG.info("Live Nodes: " + liveNodesInfo); + Map> liveNodes = + (Map>) JSON.parse(liveNodesInfo); + assertEquals(fsn.getLiveNodes(), liveNodesInfo); + assertEquals(fsn.getNumLiveDataNodes(), liveNodes.size()); + + for (Map liveNode : liveNodes.values()) { + assertTrue(liveNode.containsKey("lastContact")); + assertTrue(liveNode.containsKey("xferaddr")); + } + + // Add the 1st DataNode to Maintenance list + Map maintenanceNodes = new HashMap<>(); + maintenanceNodes.put(cluster.getDataNodes().get(0).getDisplayName(), + Time.monotonicNow() + expirationInMs); + hostsFileWriter.initOutOfServiceHosts(null, maintenanceNodes); + fsn.getBlockManager().getDatanodeManager().refreshNodes(conf); + + boolean recheck = true; + while (recheck) { + // 2. Verify nodes for DatanodeReportType.ENTERING_MAINTENANCE state + String enteringMaintenanceNodesInfo = + (String) (mbs.getAttribute(mxbeanName, "EnteringMaintenanceNodes")); + Map> enteringMaintenanceNodes = + (Map>) JSON.parse( + enteringMaintenanceNodesInfo); + if (enteringMaintenanceNodes.size() <= 0) { + LOG.info("Waiting for a node to Enter Maintenance state!"); + Uninterruptibles.sleepUninterruptibly(1, TimeUnit.SECONDS); + continue; + } + LOG.info("Nodes entering Maintenance: " + enteringMaintenanceNodesInfo); + recheck = false; + assertEquals(fsn.getEnteringMaintenanceNodes(), + enteringMaintenanceNodesInfo); + assertEquals(fsn.getNumEnteringMaintenanceDataNodes(), + enteringMaintenanceNodes.size()); + assertEquals(0, fsn.getNumInMaintenanceLiveDataNodes()); + assertEquals(0, fsn.getNumInMaintenanceDeadDataNodes()); + } + + // Wait for the DecommissionManager to complete check + // and perform state transition + while (fsn.getNumInMaintenanceLiveDataNodes() != 1) { + Uninterruptibles.sleepUninterruptibly(1, TimeUnit.SECONDS); + } + + // 3. Verify nodes for AdminStates.IN_MAINTENANCE state + String enteringMaintenanceNodesInfo = + (String) (mbs.getAttribute(mxbeanName, "EnteringMaintenanceNodes")); + Map> enteringMaintenanceNodes = + (Map>) JSON.parse( + enteringMaintenanceNodesInfo); + assertEquals(0, enteringMaintenanceNodes.size()); + assertEquals(fsn.getEnteringMaintenanceNodes(), + enteringMaintenanceNodesInfo); + assertEquals(1, fsn.getNumInMaintenanceLiveDataNodes()); + assertEquals(0, fsn.getNumInMaintenanceDeadDataNodes()); + } finally { + if (cluster != null) { + cluster.shutdown(); + } + hostsFileWriter.cleanup(); + } + } + @Test(timeout=120000) @SuppressWarnings("unchecked") public void testTopUsers() throws Exception {