HDFS-10453. ReplicationMonitor thread could stuck for long time due to the race between replication and delete of same file in a large cluster.. Contributed by He Xiaoqiao.

2018-02-12 07:00:50 -08:00 · 2018-02-12 07:00:50 -08:00 · 96bb6a51ec
commit 96bb6a51ec
parent 8cf88fcd1f
3 changed files with 9 additions and 6 deletions
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockReconstructionWork.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockReconstructionWork.java
@ -33,6 +33,7 @@ abstract class BlockReconstructionWork {
  private final BlockInfo block;

  private final String srcPath;
+  private final long blockSize;
  private final byte storagePolicyID;

  /**
@ -59,6 +60,7 @@ abstract class BlockReconstructionWork {
      int priority) {
    this.block = block;
    this.srcPath = bc.getName();
+    this.blockSize = block.getNumBytes();
    this.storagePolicyID = bc.getStoragePolicyID();
    this.srcNodes = srcNodes;
    this.containingNodes = containingNodes;
@ -100,6 +102,10 @@ abstract class BlockReconstructionWork {
    return srcPath;
  }

+  public long getBlockSize() {
+    return blockSize;
+  }
+
  public byte getStoragePolicyID() {
    return storagePolicyID;
  }
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ErasureCodingWork.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ErasureCodingWork.java
@ -59,8 +59,7 @@ class ErasureCodingWork extends BlockReconstructionWork {
    // TODO: new placement policy for EC considering multiple writers
    DatanodeStorageInfo[] chosenTargets = blockplacement.chooseTarget(
        getSrcPath(), getAdditionalReplRequired(), getSrcNodes()[0],
-        getLiveReplicaStorages(), false, excludedNodes,
-        getBlock().getNumBytes(),
+        getLiveReplicaStorages(), false, excludedNodes, getBlockSize(),
        storagePolicySuite.getPolicy(getStoragePolicyID()), null);
    setTargets(chosenTargets);
  }
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ReplicationWork.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ReplicationWork.java
@ -45,10 +45,8 @@ class ReplicationWork extends BlockReconstructionWork {
    try {
      DatanodeStorageInfo[] chosenTargets = blockplacement.chooseTarget(
          getSrcPath(), getAdditionalReplRequired(), getSrcNodes()[0],
-          getLiveReplicaStorages(), false, excludedNodes,
-          getBlock().getNumBytes(),
-          storagePolicySuite.getPolicy(getStoragePolicyID()),
-          null);
+          getLiveReplicaStorages(), false, excludedNodes, getBlockSize(),
+          storagePolicySuite.getPolicy(getStoragePolicyID()), null);
      setTargets(chosenTargets);
    } finally {
      getSrcNodes()[0].decrementPendingReplicationWithoutTargets();