HDFS-6460. Ignore stale and decommissioned nodes in NetworkTopology#sortByDistance. Contributed by Yongjun Zhang.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1601535 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andrew Wang 2014-06-10 00:39:20 +00:00
parent c94f2cec3a
commit 123c563fe6
7 changed files with 78 additions and 23 deletions

View File

@ -883,15 +883,16 @@ protected int getWeight(Node reader, Node node) {
* @param seed Used to seed the pseudo-random generator that randomizes the * @param seed Used to seed the pseudo-random generator that randomizes the
* set of nodes at each network distance. * set of nodes at each network distance.
*/ */
public void sortByDistance(Node reader, Node[] nodes, long seed) { public void sortByDistance(Node reader, Node[] nodes,
int activeLen, long seed) {
/** Sort weights for the nodes array */ /** Sort weights for the nodes array */
int[] weights = new int[nodes.length]; int[] weights = new int[activeLen];
for (int i=0; i<nodes.length; i++) { for (int i=0; i<activeLen; i++) {
weights[i] = getWeight(reader, nodes[i]); weights[i] = getWeight(reader, nodes[i]);
} }
// Add weight/node pairs to a TreeMap to sort // Add weight/node pairs to a TreeMap to sort
TreeMap<Integer, List<Node>> tree = new TreeMap<Integer, List<Node>>(); TreeMap<Integer, List<Node>> tree = new TreeMap<Integer, List<Node>>();
for (int i=0; i<nodes.length; i++) { for (int i=0; i<activeLen; i++) {
int weight = weights[i]; int weight = weights[i];
Node node = nodes[i]; Node node = nodes[i];
List<Node> list = tree.get(weight); List<Node> list = tree.get(weight);
@ -917,7 +918,7 @@ public void sortByDistance(Node reader, Node[] nodes, long seed) {
} }
} }
} }
Preconditions.checkState(idx == nodes.length, Preconditions.checkState(idx == activeLen,
"Sorted the wrong number of nodes!"); "Sorted the wrong number of nodes!");
} }
} }

View File

@ -279,7 +279,8 @@ protected int getWeight(Node reader, Node node) {
* set of nodes at each network distance. * set of nodes at each network distance.
*/ */
@Override @Override
public void sortByDistance( Node reader, Node[] nodes, long seed) { public void sortByDistance( Node reader, Node[] nodes,
int activeLen, long seed) {
// If reader is not a datanode (not in NetworkTopology tree), we need to // If reader is not a datanode (not in NetworkTopology tree), we need to
// replace this reader with a sibling leaf node in tree. // replace this reader with a sibling leaf node in tree.
if (reader != null && !this.contains(reader)) { if (reader != null && !this.contains(reader)) {
@ -292,7 +293,7 @@ public void sortByDistance( Node reader, Node[] nodes, long seed) {
return; return;
} }
} }
super.sortByDistance(reader, nodes, seed); super.sortByDistance(reader, nodes, nodes.length, seed);
} }
/** InnerNodeWithNodeGroup represents a switch/router of a data center, rack /** InnerNodeWithNodeGroup represents a switch/router of a data center, rack

View File

@ -104,7 +104,8 @@ public void testSortByDistance() throws Exception {
testNodes[1] = dataNodes[2]; testNodes[1] = dataNodes[2];
testNodes[2] = dataNodes[3]; testNodes[2] = dataNodes[3];
testNodes[3] = dataNodes[0]; testNodes[3] = dataNodes[0];
cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF); cluster.sortByDistance(dataNodes[0], testNodes,
testNodes.length, 0xDEADBEEF);
assertTrue(testNodes[0] == dataNodes[0]); assertTrue(testNodes[0] == dataNodes[0]);
assertTrue(testNodes[1] == dataNodes[1]); assertTrue(testNodes[1] == dataNodes[1]);
assertTrue(testNodes[2] == dataNodes[2]); assertTrue(testNodes[2] == dataNodes[2]);
@ -115,7 +116,8 @@ public void testSortByDistance() throws Exception {
testNodes[1] = dataNodes[4]; testNodes[1] = dataNodes[4];
testNodes[2] = dataNodes[1]; testNodes[2] = dataNodes[1];
testNodes[3] = dataNodes[0]; testNodes[3] = dataNodes[0];
cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF); cluster.sortByDistance(dataNodes[0], testNodes,
testNodes.length, 0xDEADBEEF);
assertTrue(testNodes[0] == dataNodes[0]); assertTrue(testNodes[0] == dataNodes[0]);
assertTrue(testNodes[1] == dataNodes[1]); assertTrue(testNodes[1] == dataNodes[1]);
@ -124,7 +126,8 @@ public void testSortByDistance() throws Exception {
testNodes[1] = dataNodes[3]; testNodes[1] = dataNodes[3];
testNodes[2] = dataNodes[2]; testNodes[2] = dataNodes[2];
testNodes[3] = dataNodes[0]; testNodes[3] = dataNodes[0];
cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF); cluster.sortByDistance(dataNodes[0], testNodes,
testNodes.length, 0xDEADBEEF);
assertTrue(testNodes[0] == dataNodes[0]); assertTrue(testNodes[0] == dataNodes[0]);
assertTrue(testNodes[1] == dataNodes[2]); assertTrue(testNodes[1] == dataNodes[2]);
@ -133,7 +136,8 @@ public void testSortByDistance() throws Exception {
testNodes[1] = dataNodes[7]; testNodes[1] = dataNodes[7];
testNodes[2] = dataNodes[2]; testNodes[2] = dataNodes[2];
testNodes[3] = dataNodes[0]; testNodes[3] = dataNodes[0];
cluster.sortByDistance(computeNode, testNodes, 0xDEADBEEF); cluster.sortByDistance(computeNode, testNodes,
testNodes.length, 0xDEADBEEF);
assertTrue(testNodes[0] == dataNodes[0]); assertTrue(testNodes[0] == dataNodes[0]);
assertTrue(testNodes[1] == dataNodes[2]); assertTrue(testNodes[1] == dataNodes[2]);
} }

View File

@ -489,6 +489,9 @@ Release 2.5.0 - UNRELEASED
HDFS-6214. Webhdfs has poor throughput for files >2GB (daryn) HDFS-6214. Webhdfs has poor throughput for files >2GB (daryn)
HDFS-6460. Ignore stale and decommissioned nodes in
NetworkTopology#sortByDistance. (Yongjun Zhang via wang)
BUG FIXES BUG FIXES
HDFS-6112. NFS Gateway docs are incorrect for allowed hosts configuration. HDFS-6112. NFS Gateway docs are incorrect for allowed hosts configuration.

View File

@ -26,7 +26,6 @@
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.Token;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
/** /**

View File

@ -331,6 +331,18 @@ public DatanodeStatistics getDatanodeStatistics() {
return heartbeatManager; return heartbeatManager;
} }
private boolean isInactive(DatanodeInfo datanode) {
if (datanode.isDecommissioned()) {
return true;
}
if (avoidStaleDataNodesForRead) {
return datanode.isStale(staleInterval);
}
return false;
}
/** Sort the located blocks by the distance to the target host. */ /** Sort the located blocks by the distance to the target host. */
public void sortLocatedBlocks(final String targethost, public void sortLocatedBlocks(final String targethost,
final List<LocatedBlock> locatedblocks) { final List<LocatedBlock> locatedblocks) {
@ -351,10 +363,17 @@ public void sortLocatedBlocks(final String targethost,
DFSUtil.DECOM_COMPARATOR; DFSUtil.DECOM_COMPARATOR;
for (LocatedBlock b : locatedblocks) { for (LocatedBlock b : locatedblocks) {
networktopology.sortByDistance(client, b.getLocations(), b DatanodeInfo[] di = b.getLocations();
.getBlock().getBlockId());
// Move decommissioned/stale datanodes to the bottom // Move decommissioned/stale datanodes to the bottom
Arrays.sort(b.getLocations(), comparator); Arrays.sort(di, comparator);
int lastActiveIndex = di.length - 1;
while (lastActiveIndex > 0 && isInactive(di[lastActiveIndex])) {
--lastActiveIndex;
}
int activeLen = lastActiveIndex + 1;
networktopology.sortByDistance(client, b.getLocations(), activeLen,
b.getBlock().getBlockId());
} }
} }

View File

@ -55,11 +55,18 @@ public void setupDatanodes() {
DFSTestUtil.getDatanodeDescriptor("5.5.5.5", "/d1/r2"), DFSTestUtil.getDatanodeDescriptor("5.5.5.5", "/d1/r2"),
DFSTestUtil.getDatanodeDescriptor("6.6.6.6", "/d2/r3"), DFSTestUtil.getDatanodeDescriptor("6.6.6.6", "/d2/r3"),
DFSTestUtil.getDatanodeDescriptor("7.7.7.7", "/d2/r3"), DFSTestUtil.getDatanodeDescriptor("7.7.7.7", "/d2/r3"),
DFSTestUtil.getDatanodeDescriptor("8.8.8.8", "/d2/r3") DFSTestUtil.getDatanodeDescriptor("8.8.8.8", "/d2/r3"),
DFSTestUtil.getDatanodeDescriptor("9.9.9.9", "/d3/r1"),
DFSTestUtil.getDatanodeDescriptor("10.10.10.10", "/d3/r1"),
DFSTestUtil.getDatanodeDescriptor("11.11.11.11", "/d3/r1"),
DFSTestUtil.getDatanodeDescriptor("12.12.12.12", "/d3/r2"),
DFSTestUtil.getDatanodeDescriptor("13.13.13.13", "/d3/r2")
}; };
for (int i = 0; i < dataNodes.length; i++) { for (int i = 0; i < dataNodes.length; i++) {
cluster.add(dataNodes[i]); cluster.add(dataNodes[i]);
} }
dataNodes[9].setDecommissioned();
dataNodes[10].setDecommissioned();
} }
@Test @Test
@ -100,7 +107,7 @@ public void testCreateInvalidTopology() throws Exception {
@Test @Test
public void testRacks() throws Exception { public void testRacks() throws Exception {
assertEquals(cluster.getNumOfRacks(), 3); assertEquals(cluster.getNumOfRacks(), 5);
assertTrue(cluster.isOnSameRack(dataNodes[0], dataNodes[1])); assertTrue(cluster.isOnSameRack(dataNodes[0], dataNodes[1]));
assertFalse(cluster.isOnSameRack(dataNodes[1], dataNodes[2])); assertFalse(cluster.isOnSameRack(dataNodes[1], dataNodes[2]));
assertTrue(cluster.isOnSameRack(dataNodes[2], dataNodes[3])); assertTrue(cluster.isOnSameRack(dataNodes[2], dataNodes[3]));
@ -125,16 +132,33 @@ public void testSortByDistance() throws Exception {
testNodes[0] = dataNodes[1]; testNodes[0] = dataNodes[1];
testNodes[1] = dataNodes[2]; testNodes[1] = dataNodes[2];
testNodes[2] = dataNodes[0]; testNodes[2] = dataNodes[0];
cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF); cluster.sortByDistance(dataNodes[0], testNodes,
testNodes.length, 0xDEADBEEF);
assertTrue(testNodes[0] == dataNodes[0]); assertTrue(testNodes[0] == dataNodes[0]);
assertTrue(testNodes[1] == dataNodes[1]); assertTrue(testNodes[1] == dataNodes[1]);
assertTrue(testNodes[2] == dataNodes[2]); assertTrue(testNodes[2] == dataNodes[2]);
// array contains both local node & local rack node & decommissioned node
DatanodeDescriptor[] dtestNodes = new DatanodeDescriptor[5];
dtestNodes[0] = dataNodes[8];
dtestNodes[1] = dataNodes[12];
dtestNodes[2] = dataNodes[11];
dtestNodes[3] = dataNodes[9];
dtestNodes[4] = dataNodes[10];
cluster.sortByDistance(dataNodes[8], dtestNodes,
dtestNodes.length - 2, 0xDEADBEEF);
assertTrue(dtestNodes[0] == dataNodes[8]);
assertTrue(dtestNodes[1] == dataNodes[11]);
assertTrue(dtestNodes[2] == dataNodes[12]);
assertTrue(dtestNodes[3] == dataNodes[9]);
assertTrue(dtestNodes[4] == dataNodes[10]);
// array contains local node // array contains local node
testNodes[0] = dataNodes[1]; testNodes[0] = dataNodes[1];
testNodes[1] = dataNodes[3]; testNodes[1] = dataNodes[3];
testNodes[2] = dataNodes[0]; testNodes[2] = dataNodes[0];
cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF); cluster.sortByDistance(dataNodes[0], testNodes,
testNodes.length, 0xDEADBEEF);
assertTrue(testNodes[0] == dataNodes[0]); assertTrue(testNodes[0] == dataNodes[0]);
assertTrue(testNodes[1] == dataNodes[1]); assertTrue(testNodes[1] == dataNodes[1]);
assertTrue(testNodes[2] == dataNodes[3]); assertTrue(testNodes[2] == dataNodes[3]);
@ -143,7 +167,8 @@ public void testSortByDistance() throws Exception {
testNodes[0] = dataNodes[5]; testNodes[0] = dataNodes[5];
testNodes[1] = dataNodes[3]; testNodes[1] = dataNodes[3];
testNodes[2] = dataNodes[1]; testNodes[2] = dataNodes[1];
cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF); cluster.sortByDistance(dataNodes[0], testNodes,
testNodes.length, 0xDEADBEEF);
assertTrue(testNodes[0] == dataNodes[1]); assertTrue(testNodes[0] == dataNodes[1]);
assertTrue(testNodes[1] == dataNodes[3]); assertTrue(testNodes[1] == dataNodes[3]);
assertTrue(testNodes[2] == dataNodes[5]); assertTrue(testNodes[2] == dataNodes[5]);
@ -152,7 +177,8 @@ public void testSortByDistance() throws Exception {
testNodes[0] = dataNodes[1]; testNodes[0] = dataNodes[1];
testNodes[1] = dataNodes[5]; testNodes[1] = dataNodes[5];
testNodes[2] = dataNodes[3]; testNodes[2] = dataNodes[3];
cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF); cluster.sortByDistance(dataNodes[0], testNodes,
testNodes.length, 0xDEADBEEF);
assertTrue(testNodes[0] == dataNodes[1]); assertTrue(testNodes[0] == dataNodes[1]);
assertTrue(testNodes[1] == dataNodes[3]); assertTrue(testNodes[1] == dataNodes[3]);
assertTrue(testNodes[2] == dataNodes[5]); assertTrue(testNodes[2] == dataNodes[5]);
@ -161,7 +187,8 @@ public void testSortByDistance() throws Exception {
testNodes[0] = dataNodes[1]; testNodes[0] = dataNodes[1];
testNodes[1] = dataNodes[5]; testNodes[1] = dataNodes[5];
testNodes[2] = dataNodes[3]; testNodes[2] = dataNodes[3];
cluster.sortByDistance(dataNodes[0], testNodes, 0xDEAD); cluster.sortByDistance(dataNodes[0], testNodes,
testNodes.length, 0xDEAD);
// sortByDistance does not take the "data center" layer into consideration // sortByDistance does not take the "data center" layer into consideration
// and it doesn't sort by getDistance, so 1, 5, 3 is also valid here // and it doesn't sort by getDistance, so 1, 5, 3 is also valid here
assertTrue(testNodes[0] == dataNodes[1]); assertTrue(testNodes[0] == dataNodes[1]);
@ -176,7 +203,8 @@ public void testSortByDistance() throws Exception {
testNodes[0] = dataNodes[5]; testNodes[0] = dataNodes[5];
testNodes[1] = dataNodes[6]; testNodes[1] = dataNodes[6];
testNodes[2] = dataNodes[7]; testNodes[2] = dataNodes[7];
cluster.sortByDistance(dataNodes[i], testNodes, 0xBEADED+i); cluster.sortByDistance(dataNodes[i], testNodes,
testNodes.length, 0xBEADED+i);
if (first == null) { if (first == null) {
first = testNodes[0]; first = testNodes[0];
} else { } else {