HDFS-6680. BlockPlacementPolicyDefault does not choose favored nodes correctly.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1612427 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1ad2d7b405
commit
44d9bb26d6
@ -589,6 +589,9 @@ Release 2.5.0 - UNRELEASED
|
|||||||
HDFS-6493. Change dfs.namenode.startup.delay.block.deletion to second
|
HDFS-6493. Change dfs.namenode.startup.delay.block.deletion to second
|
||||||
instead of millisecond. (Juan Yu via wang)
|
instead of millisecond. (Juan Yu via wang)
|
||||||
|
|
||||||
|
HDFS-6680. BlockPlacementPolicyDefault does not choose favored nodes
|
||||||
|
correctly. (szetszwo)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
HDFS-6214. Webhdfs has poor throughput for files >2GB (daryn)
|
HDFS-6214. Webhdfs has poor throughput for files >2GB (daryn)
|
||||||
|
@ -145,14 +145,14 @@ DatanodeStorageInfo[] chooseTarget(String src,
|
|||||||
List<DatanodeStorageInfo> results = new ArrayList<DatanodeStorageInfo>();
|
List<DatanodeStorageInfo> results = new ArrayList<DatanodeStorageInfo>();
|
||||||
boolean avoidStaleNodes = stats != null
|
boolean avoidStaleNodes = stats != null
|
||||||
&& stats.isAvoidingStaleDataNodesForWrite();
|
&& stats.isAvoidingStaleDataNodesForWrite();
|
||||||
for (int i = 0; i < Math.min(favoredNodes.size(), numOfReplicas); i++) {
|
for (int i = 0; i < favoredNodes.size() && results.size() < numOfReplicas; i++) {
|
||||||
DatanodeDescriptor favoredNode = favoredNodes.get(i);
|
DatanodeDescriptor favoredNode = favoredNodes.get(i);
|
||||||
// Choose a single node which is local to favoredNode.
|
// Choose a single node which is local to favoredNode.
|
||||||
// 'results' is updated within chooseLocalNode
|
// 'results' is updated within chooseLocalNode
|
||||||
final DatanodeStorageInfo target = chooseLocalStorage(favoredNode,
|
final DatanodeStorageInfo target = chooseLocalStorage(favoredNode,
|
||||||
favoriteAndExcludedNodes, blocksize,
|
favoriteAndExcludedNodes, blocksize,
|
||||||
getMaxNodesPerRack(results.size(), numOfReplicas)[1],
|
getMaxNodesPerRack(results.size(), numOfReplicas)[1],
|
||||||
results, avoidStaleNodes, storageType);
|
results, avoidStaleNodes, storageType, false);
|
||||||
if (target == null) {
|
if (target == null) {
|
||||||
LOG.warn("Could not find a target for file " + src
|
LOG.warn("Could not find a target for file " + src
|
||||||
+ " with favored node " + favoredNode);
|
+ " with favored node " + favoredNode);
|
||||||
@ -271,7 +271,7 @@ private Node chooseTarget(int numOfReplicas,
|
|||||||
try {
|
try {
|
||||||
if (numOfResults == 0) {
|
if (numOfResults == 0) {
|
||||||
writer = chooseLocalStorage(writer, excludedNodes, blocksize,
|
writer = chooseLocalStorage(writer, excludedNodes, blocksize,
|
||||||
maxNodesPerRack, results, avoidStaleNodes, storageType)
|
maxNodesPerRack, results, avoidStaleNodes, storageType, true)
|
||||||
.getDatanodeDescriptor();
|
.getDatanodeDescriptor();
|
||||||
if (--numOfReplicas == 0) {
|
if (--numOfReplicas == 0) {
|
||||||
return writer;
|
return writer;
|
||||||
@ -345,12 +345,14 @@ protected DatanodeStorageInfo chooseLocalStorage(Node localMachine,
|
|||||||
int maxNodesPerRack,
|
int maxNodesPerRack,
|
||||||
List<DatanodeStorageInfo> results,
|
List<DatanodeStorageInfo> results,
|
||||||
boolean avoidStaleNodes,
|
boolean avoidStaleNodes,
|
||||||
StorageType storageType)
|
StorageType storageType,
|
||||||
|
boolean fallbackToLocalRack)
|
||||||
throws NotEnoughReplicasException {
|
throws NotEnoughReplicasException {
|
||||||
// if no local machine, randomly choose one node
|
// if no local machine, randomly choose one node
|
||||||
if (localMachine == null)
|
if (localMachine == null) {
|
||||||
return chooseRandom(NodeBase.ROOT, excludedNodes, blocksize,
|
return chooseRandom(NodeBase.ROOT, excludedNodes, blocksize,
|
||||||
maxNodesPerRack, results, avoidStaleNodes, storageType);
|
maxNodesPerRack, results, avoidStaleNodes, storageType);
|
||||||
|
}
|
||||||
if (preferLocalNode && localMachine instanceof DatanodeDescriptor) {
|
if (preferLocalNode && localMachine instanceof DatanodeDescriptor) {
|
||||||
DatanodeDescriptor localDatanode = (DatanodeDescriptor) localMachine;
|
DatanodeDescriptor localDatanode = (DatanodeDescriptor) localMachine;
|
||||||
// otherwise try local machine first
|
// otherwise try local machine first
|
||||||
@ -364,6 +366,10 @@ protected DatanodeStorageInfo chooseLocalStorage(Node localMachine,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!fallbackToLocalRack) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
// try a node on local rack
|
// try a node on local rack
|
||||||
return chooseLocalRack(localMachine, excludedNodes, blocksize,
|
return chooseLocalRack(localMachine, excludedNodes, blocksize,
|
||||||
maxNodesPerRack, results, avoidStaleNodes, storageType);
|
maxNodesPerRack, results, avoidStaleNodes, storageType);
|
||||||
|
@ -70,7 +70,8 @@ public void initialize(Configuration conf, FSClusterStats stats,
|
|||||||
protected DatanodeStorageInfo chooseLocalStorage(Node localMachine,
|
protected DatanodeStorageInfo chooseLocalStorage(Node localMachine,
|
||||||
Set<Node> excludedNodes, long blocksize, int maxNodesPerRack,
|
Set<Node> excludedNodes, long blocksize, int maxNodesPerRack,
|
||||||
List<DatanodeStorageInfo> results, boolean avoidStaleNodes,
|
List<DatanodeStorageInfo> results, boolean avoidStaleNodes,
|
||||||
StorageType storageType) throws NotEnoughReplicasException {
|
StorageType storageType, boolean fallbackToLocalRack
|
||||||
|
) throws NotEnoughReplicasException {
|
||||||
// if no local machine, randomly choose one node
|
// if no local machine, randomly choose one node
|
||||||
if (localMachine == null)
|
if (localMachine == null)
|
||||||
return chooseRandom(NodeBase.ROOT, excludedNodes,
|
return chooseRandom(NodeBase.ROOT, excludedNodes,
|
||||||
@ -97,6 +98,10 @@ protected DatanodeStorageInfo chooseLocalStorage(Node localMachine,
|
|||||||
if (chosenStorage != null) {
|
if (chosenStorage != null) {
|
||||||
return chosenStorage;
|
return chosenStorage;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!fallbackToLocalRack) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
// try a node on local rack
|
// try a node on local rack
|
||||||
return chooseLocalRack(localMachine, excludedNodes,
|
return chooseLocalRack(localMachine, excludedNodes,
|
||||||
blocksize, maxNodesPerRack, results, avoidStaleNodes, storageType);
|
blocksize, maxNodesPerRack, results, avoidStaleNodes, storageType);
|
||||||
|
@ -18,32 +18,41 @@
|
|||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hdfs.server.namenode;
|
package org.apache.hadoop.hdfs.server.namenode;
|
||||||
|
|
||||||
import static org.junit.Assert.*;
|
import static org.junit.Assert.assertTrue;
|
||||||
|
import static org.junit.Assert.fail;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Random;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.net.InetAddress;
|
import java.net.InetAddress;
|
||||||
import java.net.InetSocketAddress;
|
import java.net.InetSocketAddress;
|
||||||
import java.net.UnknownHostException;
|
import java.net.UnknownHostException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.commons.logging.impl.Log4JLogger;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.BlockLocation;
|
import org.apache.hadoop.fs.BlockLocation;
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.fs.permission.FsPermission;
|
import org.apache.hadoop.fs.permission.FsPermission;
|
||||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
|
||||||
import org.apache.hadoop.hdfs.DFSTestUtil;
|
import org.apache.hadoop.hdfs.DFSTestUtil;
|
||||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||||
import org.apache.hadoop.hdfs.client.HdfsDataOutputStream;
|
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||||
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicy;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
||||||
import org.junit.Test;
|
import org.apache.log4j.Level;
|
||||||
import org.junit.AfterClass;
|
import org.junit.AfterClass;
|
||||||
|
import org.junit.Assert;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
|
||||||
public class TestFavoredNodesEndToEnd {
|
public class TestFavoredNodesEndToEnd {
|
||||||
|
{
|
||||||
|
((Log4JLogger)LogFactory.getLog(BlockPlacementPolicy.class)).getLogger().setLevel(Level.ALL);
|
||||||
|
}
|
||||||
|
|
||||||
private static MiniDFSCluster cluster;
|
private static MiniDFSCluster cluster;
|
||||||
private static Configuration conf;
|
private static Configuration conf;
|
||||||
private final static int NUM_DATA_NODES = 10;
|
private final static int NUM_DATA_NODES = 10;
|
||||||
@ -79,7 +88,7 @@ public void testFavoredNodesEndToEnd() throws Exception {
|
|||||||
InetSocketAddress datanode[] = getDatanodes(rand);
|
InetSocketAddress datanode[] = getDatanodes(rand);
|
||||||
Path p = new Path("/filename"+i);
|
Path p = new Path("/filename"+i);
|
||||||
FSDataOutputStream out = dfs.create(p, FsPermission.getDefault(), true,
|
FSDataOutputStream out = dfs.create(p, FsPermission.getDefault(), true,
|
||||||
4096, (short)3, (long)4096, null, datanode);
|
4096, (short)3, 4096L, null, datanode);
|
||||||
out.write(SOME_BYTES);
|
out.write(SOME_BYTES);
|
||||||
out.close();
|
out.close();
|
||||||
BlockLocation[] locations = getBlockLocations(p);
|
BlockLocation[] locations = getBlockLocations(p);
|
||||||
@ -98,14 +107,13 @@ public void testWhenFavoredNodesNotPresent() throws Exception {
|
|||||||
//get some other nodes. In other words, the write to hdfs should not fail
|
//get some other nodes. In other words, the write to hdfs should not fail
|
||||||
//and if we do getBlockLocations on the file, we should see one blklocation
|
//and if we do getBlockLocations on the file, we should see one blklocation
|
||||||
//and three hosts for that
|
//and three hosts for that
|
||||||
Random rand = new Random(System.currentTimeMillis());
|
|
||||||
InetSocketAddress arbitraryAddrs[] = new InetSocketAddress[3];
|
InetSocketAddress arbitraryAddrs[] = new InetSocketAddress[3];
|
||||||
for (int i = 0; i < 3; i++) {
|
for (int i = 0; i < 3; i++) {
|
||||||
arbitraryAddrs[i] = getArbitraryLocalHostAddr();
|
arbitraryAddrs[i] = getArbitraryLocalHostAddr();
|
||||||
}
|
}
|
||||||
Path p = new Path("/filename-foo-bar");
|
Path p = new Path("/filename-foo-bar");
|
||||||
FSDataOutputStream out = dfs.create(p, FsPermission.getDefault(), true,
|
FSDataOutputStream out = dfs.create(p, FsPermission.getDefault(), true,
|
||||||
4096, (short)3, (long)4096, null, arbitraryAddrs);
|
4096, (short)3, 4096L, null, arbitraryAddrs);
|
||||||
out.write(SOME_BYTES);
|
out.write(SOME_BYTES);
|
||||||
out.close();
|
out.close();
|
||||||
getBlockLocations(p);
|
getBlockLocations(p);
|
||||||
@ -113,35 +121,41 @@ public void testWhenFavoredNodesNotPresent() throws Exception {
|
|||||||
|
|
||||||
@Test(timeout=180000)
|
@Test(timeout=180000)
|
||||||
public void testWhenSomeNodesAreNotGood() throws Exception {
|
public void testWhenSomeNodesAreNotGood() throws Exception {
|
||||||
|
// 4 favored nodes
|
||||||
|
final InetSocketAddress addrs[] = new InetSocketAddress[4];
|
||||||
|
final String[] hosts = new String[addrs.length];
|
||||||
|
for (int i = 0; i < addrs.length; i++) {
|
||||||
|
addrs[i] = datanodes.get(i).getXferAddress();
|
||||||
|
hosts[i] = addrs[i].getAddress().getHostAddress() + ":" + addrs[i].getPort();
|
||||||
|
}
|
||||||
|
|
||||||
//make some datanode not "good" so that even if the client prefers it,
|
//make some datanode not "good" so that even if the client prefers it,
|
||||||
//the namenode would not give it as a replica to write to
|
//the namenode would not give it as a replica to write to
|
||||||
DatanodeInfo d = cluster.getNameNode().getNamesystem().getBlockManager()
|
DatanodeInfo d = cluster.getNameNode().getNamesystem().getBlockManager()
|
||||||
.getDatanodeManager().getDatanodeByXferAddr(
|
.getDatanodeManager().getDatanodeByXferAddr(
|
||||||
datanodes.get(0).getXferAddress().getAddress().getHostAddress(),
|
addrs[0].getAddress().getHostAddress(), addrs[0].getPort());
|
||||||
datanodes.get(0).getXferAddress().getPort());
|
|
||||||
//set the decommission status to true so that
|
//set the decommission status to true so that
|
||||||
//BlockPlacementPolicyDefault.isGoodTarget returns false for this dn
|
//BlockPlacementPolicyDefault.isGoodTarget returns false for this dn
|
||||||
d.setDecommissioned();
|
d.setDecommissioned();
|
||||||
InetSocketAddress addrs[] = new InetSocketAddress[3];
|
|
||||||
for (int i = 0; i < 3; i++) {
|
|
||||||
addrs[i] = datanodes.get(i).getXferAddress();
|
|
||||||
}
|
|
||||||
Path p = new Path("/filename-foo-bar-baz");
|
Path p = new Path("/filename-foo-bar-baz");
|
||||||
|
final short replication = (short)3;
|
||||||
FSDataOutputStream out = dfs.create(p, FsPermission.getDefault(), true,
|
FSDataOutputStream out = dfs.create(p, FsPermission.getDefault(), true,
|
||||||
4096, (short)3, (long)4096, null, addrs);
|
4096, replication, 4096L, null, addrs);
|
||||||
out.write(SOME_BYTES);
|
out.write(SOME_BYTES);
|
||||||
out.close();
|
out.close();
|
||||||
//reset the state
|
//reset the state
|
||||||
d.stopDecommission();
|
d.stopDecommission();
|
||||||
|
|
||||||
BlockLocation[] locations = getBlockLocations(p);
|
BlockLocation[] locations = getBlockLocations(p);
|
||||||
|
Assert.assertEquals(replication, locations[0].getNames().length);;
|
||||||
//also make sure that the datanode[0] is not in the list of hosts
|
//also make sure that the datanode[0] is not in the list of hosts
|
||||||
String datanode0 =
|
for (int i = 0; i < replication; i++) {
|
||||||
datanodes.get(0).getXferAddress().getAddress().getHostAddress()
|
final String loc = locations[0].getNames()[i];
|
||||||
+ ":" + datanodes.get(0).getXferAddress().getPort();
|
int j = 0;
|
||||||
for (int i = 0; i < 3; i++) {
|
for(; j < hosts.length && !loc.equals(hosts[j]); j++);
|
||||||
if (locations[0].getNames()[i].equals(datanode0)) {
|
Assert.assertTrue("j=" + j, j > 0);
|
||||||
fail(datanode0 + " not supposed to be a replica for the block");
|
Assert.assertTrue("loc=" + loc + " not in host list "
|
||||||
}
|
+ Arrays.asList(hosts) + ", j=" + j, j < hosts.length);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user