diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java index 5fb9b75286..2cfa243895 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java @@ -883,15 +883,16 @@ protected int getWeight(Node reader, Node node) { * @param seed Used to seed the pseudo-random generator that randomizes the * set of nodes at each network distance. */ - public void sortByDistance(Node reader, Node[] nodes, long seed) { + public void sortByDistance(Node reader, Node[] nodes, + int activeLen, long seed) { /** Sort weights for the nodes array */ - int[] weights = new int[nodes.length]; - for (int i=0; i> tree = new TreeMap>(); - for (int i=0; i list = tree.get(weight); @@ -917,7 +918,7 @@ public void sortByDistance(Node reader, Node[] nodes, long seed) { } } } - Preconditions.checkState(idx == nodes.length, + Preconditions.checkState(idx == activeLen, "Sorted the wrong number of nodes!"); } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopologyWithNodeGroup.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopologyWithNodeGroup.java index 975fe4d1e3..7243f72876 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopologyWithNodeGroup.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopologyWithNodeGroup.java @@ -279,7 +279,8 @@ protected int getWeight(Node reader, Node node) { * set of nodes at each network distance. */ @Override - public void sortByDistance( Node reader, Node[] nodes, long seed) { + public void sortByDistance( Node reader, Node[] nodes, + int activeLen, long seed) { // If reader is not a datanode (not in NetworkTopology tree), we need to // replace this reader with a sibling leaf node in tree. if (reader != null && !this.contains(reader)) { @@ -292,7 +293,7 @@ public void sortByDistance( Node reader, Node[] nodes, long seed) { return; } } - super.sortByDistance(reader, nodes, seed); + super.sortByDistance(reader, nodes, nodes.length, seed); } /** InnerNodeWithNodeGroup represents a switch/router of a data center, rack diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestNetworkTopologyWithNodeGroup.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestNetworkTopologyWithNodeGroup.java index c082cce14a..ca61c1e86b 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestNetworkTopologyWithNodeGroup.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestNetworkTopologyWithNodeGroup.java @@ -104,7 +104,8 @@ public void testSortByDistance() throws Exception { testNodes[1] = dataNodes[2]; testNodes[2] = dataNodes[3]; testNodes[3] = dataNodes[0]; - cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF); + cluster.sortByDistance(dataNodes[0], testNodes, + testNodes.length, 0xDEADBEEF); assertTrue(testNodes[0] == dataNodes[0]); assertTrue(testNodes[1] == dataNodes[1]); assertTrue(testNodes[2] == dataNodes[2]); @@ -115,7 +116,8 @@ public void testSortByDistance() throws Exception { testNodes[1] = dataNodes[4]; testNodes[2] = dataNodes[1]; testNodes[3] = dataNodes[0]; - cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF); + cluster.sortByDistance(dataNodes[0], testNodes, + testNodes.length, 0xDEADBEEF); assertTrue(testNodes[0] == dataNodes[0]); assertTrue(testNodes[1] == dataNodes[1]); @@ -124,7 +126,8 @@ public void testSortByDistance() throws Exception { testNodes[1] = dataNodes[3]; testNodes[2] = dataNodes[2]; testNodes[3] = dataNodes[0]; - cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF); + cluster.sortByDistance(dataNodes[0], testNodes, + testNodes.length, 0xDEADBEEF); assertTrue(testNodes[0] == dataNodes[0]); assertTrue(testNodes[1] == dataNodes[2]); @@ -133,7 +136,8 @@ public void testSortByDistance() throws Exception { testNodes[1] = dataNodes[7]; testNodes[2] = dataNodes[2]; testNodes[3] = dataNodes[0]; - cluster.sortByDistance(computeNode, testNodes, 0xDEADBEEF); + cluster.sortByDistance(computeNode, testNodes, + testNodes.length, 0xDEADBEEF); assertTrue(testNodes[0] == dataNodes[0]); assertTrue(testNodes[1] == dataNodes[2]); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index cb73ac55aa..4d58be45e1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -489,6 +489,9 @@ Release 2.5.0 - UNRELEASED HDFS-6214. Webhdfs has poor throughput for files >2GB (daryn) + HDFS-6460. Ignore stale and decommissioned nodes in + NetworkTopology#sortByDistance. (Yongjun Zhang via wang) + BUG FIXES HDFS-6112. NFS Gateway docs are incorrect for allowed hosts configuration. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LocatedBlock.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LocatedBlock.java index 925db17b8b..16bcc0b93d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LocatedBlock.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LocatedBlock.java @@ -26,7 +26,6 @@ import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo; import org.apache.hadoop.security.token.Token; -import com.google.common.base.Preconditions; import com.google.common.collect.Lists; /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java index 8806fb9b44..aea34ecbc9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java @@ -331,6 +331,18 @@ public DatanodeStatistics getDatanodeStatistics() { return heartbeatManager; } + private boolean isInactive(DatanodeInfo datanode) { + if (datanode.isDecommissioned()) { + return true; + } + + if (avoidStaleDataNodesForRead) { + return datanode.isStale(staleInterval); + } + + return false; + } + /** Sort the located blocks by the distance to the target host. */ public void sortLocatedBlocks(final String targethost, final List locatedblocks) { @@ -351,10 +363,17 @@ public void sortLocatedBlocks(final String targethost, DFSUtil.DECOM_COMPARATOR; for (LocatedBlock b : locatedblocks) { - networktopology.sortByDistance(client, b.getLocations(), b - .getBlock().getBlockId()); + DatanodeInfo[] di = b.getLocations(); // Move decommissioned/stale datanodes to the bottom - Arrays.sort(b.getLocations(), comparator); + Arrays.sort(di, comparator); + + int lastActiveIndex = di.length - 1; + while (lastActiveIndex > 0 && isInactive(di[lastActiveIndex])) { + --lastActiveIndex; + } + int activeLen = lastActiveIndex + 1; + networktopology.sortByDistance(client, b.getLocations(), activeLen, + b.getBlock().getBlockId()); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/net/TestNetworkTopology.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/net/TestNetworkTopology.java index 4f98ae4a22..2e6383cc26 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/net/TestNetworkTopology.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/net/TestNetworkTopology.java @@ -55,11 +55,18 @@ public void setupDatanodes() { DFSTestUtil.getDatanodeDescriptor("5.5.5.5", "/d1/r2"), DFSTestUtil.getDatanodeDescriptor("6.6.6.6", "/d2/r3"), DFSTestUtil.getDatanodeDescriptor("7.7.7.7", "/d2/r3"), - DFSTestUtil.getDatanodeDescriptor("8.8.8.8", "/d2/r3") + DFSTestUtil.getDatanodeDescriptor("8.8.8.8", "/d2/r3"), + DFSTestUtil.getDatanodeDescriptor("9.9.9.9", "/d3/r1"), + DFSTestUtil.getDatanodeDescriptor("10.10.10.10", "/d3/r1"), + DFSTestUtil.getDatanodeDescriptor("11.11.11.11", "/d3/r1"), + DFSTestUtil.getDatanodeDescriptor("12.12.12.12", "/d3/r2"), + DFSTestUtil.getDatanodeDescriptor("13.13.13.13", "/d3/r2") }; for (int i = 0; i < dataNodes.length; i++) { cluster.add(dataNodes[i]); } + dataNodes[9].setDecommissioned(); + dataNodes[10].setDecommissioned(); } @Test @@ -100,7 +107,7 @@ public void testCreateInvalidTopology() throws Exception { @Test public void testRacks() throws Exception { - assertEquals(cluster.getNumOfRacks(), 3); + assertEquals(cluster.getNumOfRacks(), 5); assertTrue(cluster.isOnSameRack(dataNodes[0], dataNodes[1])); assertFalse(cluster.isOnSameRack(dataNodes[1], dataNodes[2])); assertTrue(cluster.isOnSameRack(dataNodes[2], dataNodes[3])); @@ -125,16 +132,33 @@ public void testSortByDistance() throws Exception { testNodes[0] = dataNodes[1]; testNodes[1] = dataNodes[2]; testNodes[2] = dataNodes[0]; - cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF); + cluster.sortByDistance(dataNodes[0], testNodes, + testNodes.length, 0xDEADBEEF); assertTrue(testNodes[0] == dataNodes[0]); assertTrue(testNodes[1] == dataNodes[1]); assertTrue(testNodes[2] == dataNodes[2]); + // array contains both local node & local rack node & decommissioned node + DatanodeDescriptor[] dtestNodes = new DatanodeDescriptor[5]; + dtestNodes[0] = dataNodes[8]; + dtestNodes[1] = dataNodes[12]; + dtestNodes[2] = dataNodes[11]; + dtestNodes[3] = dataNodes[9]; + dtestNodes[4] = dataNodes[10]; + cluster.sortByDistance(dataNodes[8], dtestNodes, + dtestNodes.length - 2, 0xDEADBEEF); + assertTrue(dtestNodes[0] == dataNodes[8]); + assertTrue(dtestNodes[1] == dataNodes[11]); + assertTrue(dtestNodes[2] == dataNodes[12]); + assertTrue(dtestNodes[3] == dataNodes[9]); + assertTrue(dtestNodes[4] == dataNodes[10]); + // array contains local node testNodes[0] = dataNodes[1]; testNodes[1] = dataNodes[3]; testNodes[2] = dataNodes[0]; - cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF); + cluster.sortByDistance(dataNodes[0], testNodes, + testNodes.length, 0xDEADBEEF); assertTrue(testNodes[0] == dataNodes[0]); assertTrue(testNodes[1] == dataNodes[1]); assertTrue(testNodes[2] == dataNodes[3]); @@ -143,7 +167,8 @@ public void testSortByDistance() throws Exception { testNodes[0] = dataNodes[5]; testNodes[1] = dataNodes[3]; testNodes[2] = dataNodes[1]; - cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF); + cluster.sortByDistance(dataNodes[0], testNodes, + testNodes.length, 0xDEADBEEF); assertTrue(testNodes[0] == dataNodes[1]); assertTrue(testNodes[1] == dataNodes[3]); assertTrue(testNodes[2] == dataNodes[5]); @@ -152,7 +177,8 @@ public void testSortByDistance() throws Exception { testNodes[0] = dataNodes[1]; testNodes[1] = dataNodes[5]; testNodes[2] = dataNodes[3]; - cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF); + cluster.sortByDistance(dataNodes[0], testNodes, + testNodes.length, 0xDEADBEEF); assertTrue(testNodes[0] == dataNodes[1]); assertTrue(testNodes[1] == dataNodes[3]); assertTrue(testNodes[2] == dataNodes[5]); @@ -161,7 +187,8 @@ public void testSortByDistance() throws Exception { testNodes[0] = dataNodes[1]; testNodes[1] = dataNodes[5]; testNodes[2] = dataNodes[3]; - cluster.sortByDistance(dataNodes[0], testNodes, 0xDEAD); + cluster.sortByDistance(dataNodes[0], testNodes, + testNodes.length, 0xDEAD); // sortByDistance does not take the "data center" layer into consideration // and it doesn't sort by getDistance, so 1, 5, 3 is also valid here assertTrue(testNodes[0] == dataNodes[1]); @@ -176,7 +203,8 @@ public void testSortByDistance() throws Exception { testNodes[0] = dataNodes[5]; testNodes[1] = dataNodes[6]; testNodes[2] = dataNodes[7]; - cluster.sortByDistance(dataNodes[i], testNodes, 0xBEADED+i); + cluster.sortByDistance(dataNodes[i], testNodes, + testNodes.length, 0xBEADED+i); if (first == null) { first = testNodes[0]; } else {