From 8cd4704e0a1ab2ba941e296b8799e288f901bc4f Mon Sep 17 00:00:00 2001 From: huhaiyang Date: Fri, 22 Mar 2024 21:43:12 +0800 Subject: [PATCH] HDFS-17430. RecoveringBlock will skip no live replicas when get block recovery command. (#6635) --- .../blockmanagement/DatanodeManager.java | 30 +++++++++++++------ 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java index ebd2fa992e..29327a1611 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java @@ -1729,12 +1729,13 @@ private BlockRecoveryCommand getBlockRecoveryCommand(String blockPoolId, " where it is not under construction."); } final DatanodeStorageInfo[] storages = uc.getExpectedStorageLocations(); - // Skip stale nodes during recovery - final List recoveryLocations = + // Skip stale and dead nodes during recovery. + List recoveryLocations = new ArrayList<>(storages.length); - final List storageIdx = new ArrayList<>(storages.length); + List storageIdx = new ArrayList<>(storages.length); for (int i = 0; i < storages.length; ++i) { - if (!storages[i].getDatanodeDescriptor().isStale(staleInterval)) { + if (!storages[i].getDatanodeDescriptor().isStale(staleInterval) && + storages[i].getDatanodeDescriptor().isAlive()) { recoveryLocations.add(storages[i]); storageIdx.add(i); } @@ -1747,20 +1748,31 @@ private BlockRecoveryCommand getBlockRecoveryCommand(String blockPoolId, ExtendedBlock primaryBlock = (copyOnTruncateRecovery) ? new ExtendedBlock(blockPoolId, uc.getTruncateBlock()) : new ExtendedBlock(blockPoolId, b); - // If we only get 1 replica after eliminating stale nodes, choose all + // If we only get 1 replica after eliminating stale and dead nodes, choose all live // replicas for recovery and let the primary data node handle failures. DatanodeInfo[] recoveryInfos; if (recoveryLocations.size() > 1) { if (recoveryLocations.size() != storages.length) { - LOG.info("Skipped stale nodes for recovery : " + LOG.info("Skipped stale and dead nodes for recovery : " + (storages.length - recoveryLocations.size())); } - recoveryInfos = DatanodeStorageInfo.toDatanodeInfos(recoveryLocations); } else { - // If too many replicas are stale, then choose all replicas to + // If too many replicas are stale, then choose all live replicas to // participate in block recovery. - recoveryInfos = DatanodeStorageInfo.toDatanodeInfos(storages); + recoveryLocations.clear(); + storageIdx.clear(); + for (int i = 0; i < storages.length; ++i) { + if (storages[i].getDatanodeDescriptor().isAlive()) { + recoveryLocations.add(storages[i]); + storageIdx.add(i); + } + } + if (recoveryLocations.size() != storages.length) { + LOG.info("Skipped dead nodes for recovery : {}", + storages.length - recoveryLocations.size()); + } } + recoveryInfos = DatanodeStorageInfo.toDatanodeInfos(recoveryLocations); RecoveringBlock rBlock; if (truncateRecovery) { Block recoveryBlock = (copyOnTruncateRecovery) ? b : uc.getTruncateBlock();