HDFS-10453. ReplicationMonitor thread could stuck for long time due to the race between replication and delete of same file in a large cluster.. Contributed by He Xiaoqiao.

This commit is contained in:
Arpit Agarwal 2018-02-12 07:00:50 -08:00
parent 8cf88fcd1f
commit 96bb6a51ec
3 changed files with 9 additions and 6 deletions

View File

@ -33,6 +33,7 @@ abstract class BlockReconstructionWork {
private final BlockInfo block;
private final String srcPath;
private final long blockSize;
private final byte storagePolicyID;
/**
@ -59,6 +60,7 @@ public BlockReconstructionWork(BlockInfo block,
int priority) {
this.block = block;
this.srcPath = bc.getName();
this.blockSize = block.getNumBytes();
this.storagePolicyID = bc.getStoragePolicyID();
this.srcNodes = srcNodes;
this.containingNodes = containingNodes;
@ -100,6 +102,10 @@ public String getSrcPath() {
return srcPath;
}
public long getBlockSize() {
return blockSize;
}
public byte getStoragePolicyID() {
return storagePolicyID;
}

View File

@ -59,8 +59,7 @@ void chooseTargets(BlockPlacementPolicy blockplacement,
// TODO: new placement policy for EC considering multiple writers
DatanodeStorageInfo[] chosenTargets = blockplacement.chooseTarget(
getSrcPath(), getAdditionalReplRequired(), getSrcNodes()[0],
getLiveReplicaStorages(), false, excludedNodes,
getBlock().getNumBytes(),
getLiveReplicaStorages(), false, excludedNodes, getBlockSize(),
storagePolicySuite.getPolicy(getStoragePolicyID()), null);
setTargets(chosenTargets);
}

View File

@ -45,10 +45,8 @@ assert getSrcNodes().length > 0
try {
DatanodeStorageInfo[] chosenTargets = blockplacement.chooseTarget(
getSrcPath(), getAdditionalReplRequired(), getSrcNodes()[0],
getLiveReplicaStorages(), false, excludedNodes,
getBlock().getNumBytes(),
storagePolicySuite.getPolicy(getStoragePolicyID()),
null);
getLiveReplicaStorages(), false, excludedNodes, getBlockSize(),
storagePolicySuite.getPolicy(getStoragePolicyID()), null);
setTargets(chosenTargets);
} finally {
getSrcNodes()[0].decrementPendingReplicationWithoutTargets();