HDFS-7742. Favoring decommissioning node for replication can cause a block to stay
underreplicated for long periods. Contributed by Nathan Roberts.
This commit is contained in:
parent
ae3e8c61ff
commit
04ee18ed48
hadoop-hdfs-project/hadoop-hdfs
CHANGES.txt
src
main/java/org/apache/hadoop/hdfs/server/blockmanagement
test/java/org/apache/hadoop/hdfs/server/blockmanagement
@ -829,6 +829,9 @@ Release 2.7.0 - UNRELEASED
|
||||
HDFS-7410. Support CreateFlags with append() to support hsync() for
|
||||
appending streams (Vinayakumar B via Colin P. McCabe)
|
||||
|
||||
HDFS-7742. Favoring decommissioning node for replication can cause a block
|
||||
to stay underreplicated for long periods (Nathan Roberts via kihwal)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
HDFS-7454. Reduce memory footprint for AclEntries in NameNode.
|
||||
|
@ -1638,6 +1638,7 @@ public class BlockManager {
|
||||
if ((nodesCorrupt != null) && nodesCorrupt.contains(node))
|
||||
continue;
|
||||
if(priority != UnderReplicatedBlocks.QUEUE_HIGHEST_PRIORITY
|
||||
&& !node.isDecommissionInProgress()
|
||||
&& node.getNumberOfBlocksToBeReplicated() >= maxReplicationStreams)
|
||||
{
|
||||
continue; // already reached replication limit
|
||||
@ -1652,13 +1653,12 @@ public class BlockManager {
|
||||
// never use already decommissioned nodes
|
||||
if(node.isDecommissioned())
|
||||
continue;
|
||||
// we prefer nodes that are in DECOMMISSION_INPROGRESS state
|
||||
if(node.isDecommissionInProgress() || srcNode == null) {
|
||||
|
||||
// We got this far, current node is a reasonable choice
|
||||
if (srcNode == null) {
|
||||
srcNode = node;
|
||||
continue;
|
||||
}
|
||||
if(srcNode.isDecommissionInProgress())
|
||||
continue;
|
||||
// switch to a different node randomly
|
||||
// this to prevent from deterministically selecting the same node even
|
||||
// if the node failed to replicate the block on previous iterations
|
||||
|
@ -534,6 +534,48 @@ public class TestBlockManager {
|
||||
UnderReplicatedBlocks.QUEUE_HIGHEST_PRIORITY));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFavorDecomUntilHardLimit() throws Exception {
|
||||
bm.maxReplicationStreams = 0;
|
||||
bm.replicationStreamsHardLimit = 1;
|
||||
|
||||
long blockId = 42; // arbitrary
|
||||
Block aBlock = new Block(blockId, 0, 0);
|
||||
List<DatanodeDescriptor> origNodes = getNodes(0, 1);
|
||||
// Add the block to the first node.
|
||||
addBlockOnNodes(blockId,origNodes.subList(0,1));
|
||||
origNodes.get(0).startDecommission();
|
||||
|
||||
List<DatanodeDescriptor> cntNodes = new LinkedList<DatanodeDescriptor>();
|
||||
List<DatanodeStorageInfo> liveNodes = new LinkedList<DatanodeStorageInfo>();
|
||||
|
||||
assertNotNull("Chooses decommissioning source node for a normal replication"
|
||||
+ " if all available source nodes have reached their replication"
|
||||
+ " limits below the hard limit.",
|
||||
bm.chooseSourceDatanode(
|
||||
aBlock,
|
||||
cntNodes,
|
||||
liveNodes,
|
||||
new NumberReplicas(),
|
||||
UnderReplicatedBlocks.QUEUE_UNDER_REPLICATED));
|
||||
|
||||
|
||||
// Increase the replication count to test replication count > hard limit
|
||||
DatanodeStorageInfo targets[] = { origNodes.get(1).getStorageInfos()[0] };
|
||||
origNodes.get(0).addBlockToBeReplicated(aBlock, targets);
|
||||
|
||||
assertNull("Does not choose a source decommissioning node for a normal"
|
||||
+ " replication when all available nodes exceed the hard limit.",
|
||||
bm.chooseSourceDatanode(
|
||||
aBlock,
|
||||
cntNodes,
|
||||
liveNodes,
|
||||
new NumberReplicas(),
|
||||
UnderReplicatedBlocks.QUEUE_UNDER_REPLICATED));
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
public void testSafeModeIBR() throws Exception {
|
||||
DatanodeDescriptor node = spy(nodes.get(0));
|
||||
|
Loading…
x
Reference in New Issue
Block a user