HDFS-16839 It should consider EC reconstruction work when we determine if a node is busy (#5128)
Co-authored-by: Takanobu Asanuma <tasanuma@apache.org>
Reviewed-by: Tao Li <tomscut@apache.org>
(cherry picked from commit 72749a4ff8
)
This commit is contained in:
parent
6b23e70539
commit
8c7f2ddc10
@ -2497,7 +2497,8 @@ DatanodeDescriptor[] chooseSourceDatanodes(BlockInfo block,
|
||||
|
||||
if (priority != LowRedundancyBlocks.QUEUE_HIGHEST_PRIORITY
|
||||
&& (!node.isDecommissionInProgress() && !node.isEnteringMaintenance())
|
||||
&& node.getNumberOfBlocksToBeReplicated() >= maxReplicationStreams) {
|
||||
&& node.getNumberOfBlocksToBeReplicated() +
|
||||
node.getNumberOfBlocksToBeErasureCoded() >= maxReplicationStreams) {
|
||||
if (isStriped && (state == StoredReplicaState.LIVE
|
||||
|| state == StoredReplicaState.DECOMMISSIONING)) {
|
||||
liveBusyBlockIndices.add(blockIndex);
|
||||
@ -2507,7 +2508,8 @@ DatanodeDescriptor[] chooseSourceDatanodes(BlockInfo block,
|
||||
continue; // already reached replication limit
|
||||
}
|
||||
|
||||
if (node.getNumberOfBlocksToBeReplicated() >= replicationStreamsHardLimit) {
|
||||
if (node.getNumberOfBlocksToBeReplicated() +
|
||||
node.getNumberOfBlocksToBeErasureCoded() >= replicationStreamsHardLimit) {
|
||||
if (isStriped && (state == StoredReplicaState.LIVE
|
||||
|| state == StoredReplicaState.DECOMMISSIONING)) {
|
||||
liveBusyBlockIndices.add(blockIndex);
|
||||
|
@ -957,6 +957,58 @@ public void testSkipReconstructionWithManyBusyNodes2() {
|
||||
assertNull(work);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSkipReconstructionWithManyBusyNodes3() {
|
||||
NameNode.initMetrics(new Configuration(), HdfsServerConstants.NamenodeRole.NAMENODE);
|
||||
long blockId = -9223372036854775776L; // Real ec block id
|
||||
// RS-3-2 EC policy
|
||||
ErasureCodingPolicy ecPolicy =
|
||||
SystemErasureCodingPolicies.getPolicies().get(1);
|
||||
|
||||
// Create an EC block group: 3 data blocks + 2 parity blocks.
|
||||
Block aBlockGroup = new Block(blockId, ecPolicy.getCellSize() * ecPolicy.getNumDataUnits(), 0);
|
||||
BlockInfoStriped aBlockInfoStriped = new BlockInfoStriped(aBlockGroup, ecPolicy);
|
||||
|
||||
// Create 4 storageInfo, which means 1 block is missing.
|
||||
DatanodeStorageInfo ds1 = DFSTestUtil.createDatanodeStorageInfo(
|
||||
"storage1", "1.1.1.1", "rack1", "host1");
|
||||
DatanodeStorageInfo ds2 = DFSTestUtil.createDatanodeStorageInfo(
|
||||
"storage2", "2.2.2.2", "rack2", "host2");
|
||||
DatanodeStorageInfo ds3 = DFSTestUtil.createDatanodeStorageInfo(
|
||||
"storage3", "3.3.3.3", "rack3", "host3");
|
||||
DatanodeStorageInfo ds4 = DFSTestUtil.createDatanodeStorageInfo(
|
||||
"storage4", "4.4.4.4", "rack4", "host4");
|
||||
|
||||
// Link block with storage.
|
||||
aBlockInfoStriped.addStorage(ds1, aBlockGroup);
|
||||
aBlockInfoStriped.addStorage(ds2, new Block(blockId + 1, 0, 0));
|
||||
aBlockInfoStriped.addStorage(ds3, new Block(blockId + 2, 0, 0));
|
||||
aBlockInfoStriped.addStorage(ds4, new Block(blockId + 3, 0, 0));
|
||||
|
||||
addEcBlockToBM(blockId, ecPolicy);
|
||||
aBlockInfoStriped.setBlockCollectionId(mockINodeId);
|
||||
|
||||
// Reconstruction should be scheduled.
|
||||
BlockReconstructionWork work = bm.scheduleReconstruction(aBlockInfoStriped, 3);
|
||||
assertNotNull(work);
|
||||
|
||||
ExtendedBlock dummyBlock = new ExtendedBlock("bpid", 1, 1, 1);
|
||||
DatanodeDescriptor dummyDD = ds1.getDatanodeDescriptor();
|
||||
DatanodeDescriptor[] dummyDDArray = new DatanodeDescriptor[]{dummyDD};
|
||||
DatanodeStorageInfo[] dummyDSArray = new DatanodeStorageInfo[]{ds1};
|
||||
// Simulate the 2 nodes reach maxReplicationStreams.
|
||||
for(int i = 0; i < bm.maxReplicationStreams; i++){ //Add some dummy EC reconstruction task.
|
||||
ds3.getDatanodeDescriptor().addBlockToBeErasureCoded(dummyBlock, dummyDDArray,
|
||||
dummyDSArray, new byte[0], new byte[0], ecPolicy);
|
||||
ds4.getDatanodeDescriptor().addBlockToBeErasureCoded(dummyBlock, dummyDDArray,
|
||||
dummyDSArray, new byte[0], new byte[0], ecPolicy);
|
||||
}
|
||||
|
||||
// Reconstruction should be skipped since the number of non-busy nodes are not enough.
|
||||
work = bm.scheduleReconstruction(aBlockInfoStriped, 3);
|
||||
assertNull(work);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFavorDecomUntilHardLimit() throws Exception {
|
||||
bm.maxReplicationStreams = 0;
|
||||
|
Loading…
Reference in New Issue
Block a user