From 429da635ec70f9abe5ab71e24c9f2eec0aa36e18 Mon Sep 17 00:00:00 2001 From: Ayush Saxena Date: Fri, 28 Feb 2020 00:22:37 +0530 Subject: [PATCH] HDFS-15186. Erasure Coding: Decommission may generate the parity block's content with all 0 in some case. Contributed by Yao Guangdong. --- .../server/blockmanagement/BlockManager.java | 6 +- .../blockmanagement/DatanodeDescriptor.java | 3 +- .../hdfs/TestDecommissionWithStriped.java | 63 +++++++++++++++++++ 3 files changed, 69 insertions(+), 3 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index 605f502c02..262e0c2968 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -2445,14 +2445,16 @@ DatanodeDescriptor[] chooseSourceDatanodes(BlockInfo block, if (priority != LowRedundancyBlocks.QUEUE_HIGHEST_PRIORITY && (!node.isDecommissionInProgress() && !node.isEnteringMaintenance()) && node.getNumberOfBlocksToBeReplicated() >= maxReplicationStreams) { - if (isStriped && state == StoredReplicaState.LIVE) { + if (isStriped && (state == StoredReplicaState.LIVE + || state == StoredReplicaState.DECOMMISSIONING)) { liveBusyBlockIndices.add(blockIndex); } continue; // already reached replication limit } if (node.getNumberOfBlocksToBeReplicated() >= replicationStreamsHardLimit) { - if (isStriped && state == StoredReplicaState.LIVE) { + if (isStriped && (state == StoredReplicaState.LIVE + || state == StoredReplicaState.DECOMMISSIONING)) { liveBusyBlockIndices.add(blockIndex); } continue; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java index 9035fd36f4..3fa9b3ad51 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java @@ -639,7 +639,8 @@ public void incrementPendingReplicationWithoutTargets() { pendingReplicationWithoutTargets++; } - void decrementPendingReplicationWithoutTargets() { + @VisibleForTesting + public void decrementPendingReplicationWithoutTargets() { pendingReplicationWithoutTargets--; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommissionWithStriped.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommissionWithStriped.java index be3ababfec..f7dbe50629 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommissionWithStriped.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommissionWithStriped.java @@ -350,6 +350,69 @@ public void testDecommissionWithBusyNode() throws Exception { fileChecksum1.equals(fileChecksum2)); } + /** + * Decommission may generate the parity block's content with all 0 + * in some case. + * @throws Exception + */ + @Test(timeout = 120000) + public void testDecommission2NodeWithBusyNode() throws Exception { + byte busyDNIndex = 6; + byte decommissionDNIndex = 6; + byte decommissionDNIndex2 = 8; + //1. create EC file + final Path ecFile = new Path(ecDir, "testDecommission2NodeWithBusyNode"); + int writeBytes = cellSize * dataBlocks; + writeStripedFile(dfs, ecFile, writeBytes); + + Assert.assertEquals(0, bm.numOfUnderReplicatedBlocks()); + FileChecksum fileChecksum1 = dfs.getFileChecksum(ecFile, writeBytes); + + //2. make once DN busy + final INodeFile fileNode = cluster.getNamesystem().getFSDirectory() + .getINode4Write(ecFile.toString()).asFile(); + BlockInfo firstBlock = fileNode.getBlocks()[0]; + DatanodeStorageInfo[] dnStorageInfos = bm.getStorages(firstBlock); + DatanodeDescriptor busyNode = dnStorageInfos[busyDNIndex] + .getDatanodeDescriptor(); + for (int j = 0; j < replicationStreamsHardLimit; j++) { + busyNode.incrementPendingReplicationWithoutTargets(); + } + + //3. decommissioning one node + List decommissionNodes = new ArrayList<>(); + decommissionNodes.add(dnStorageInfos[decommissionDNIndex] + .getDatanodeDescriptor()); + decommissionNodes.add(dnStorageInfos[decommissionDNIndex2] + .getDatanodeDescriptor()); + decommissionNode(0, decommissionNodes, AdminStates.DECOMMISSION_INPROGRESS); + + //4. wait for decommissioning and not busy block to replicate(9-2+1=8) + GenericTestUtils.waitFor( + () -> bm.countNodes(firstBlock).liveReplicas() >= 8, + 100, 60000); + + //5. release busy DN, make the decommissioning and busy block can replicate + busyNode.decrementPendingReplicationWithoutTargets(); + + //6. decommissioned one node,make the decommission finished + decommissionNode(0, decommissionNodes, AdminStates.DECOMMISSIONED); + + //7. Busy DN shouldn't be reconstructed + DatanodeStorageInfo[] newDnStorageInfos = bm.getStorages(firstBlock); + Assert.assertEquals("Busy DN shouldn't be reconstructed", + dnStorageInfos[busyDNIndex].getStorageID(), + newDnStorageInfos[busyDNIndex].getStorageID()); + + //8. check the checksum of a file + FileChecksum fileChecksum2 = dfs.getFileChecksum(ecFile, writeBytes); + Assert.assertEquals("Checksum mismatches!", fileChecksum1, fileChecksum2); + + //9. check the data is correct + StripedFileTestUtil.checkData(dfs, ecFile, writeBytes, decommissionNodes, + null, blockGroupSize); + } + /** * Tests to verify that the file checksum should be able to compute after the * decommission operation.