HDFS-17052. Improve BlockPlacementPolicyRackFaultTolerant to avoid choose nodes failed when no enough Rack. (#5759). Contributed by Hualong Zhang and Shuyan Zhang.

Signed-off-by: He Xiaoqiao <hexiaoqiao@apache.org>
This commit is contained in:
zhtttylz 2023-07-02 15:37:19 +08:00 committed by GitHub
parent 59a7836d13
commit 750c0fc631
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 75 additions and 3 deletions

View File

@ -170,7 +170,7 @@ private void chooseEvenlyFromRemainingRacks(Node writer,
NotEnoughReplicasException lastException = e; NotEnoughReplicasException lastException = e;
int bestEffortMaxNodesPerRack = maxNodesPerRack; int bestEffortMaxNodesPerRack = maxNodesPerRack;
while (results.size() != totalReplicaExpected && while (results.size() != totalReplicaExpected &&
numResultsOflastChoose != results.size()) { bestEffortMaxNodesPerRack < totalReplicaExpected) {
// Exclude the chosen nodes // Exclude the chosen nodes
final Set<Node> newExcludeNodes = new HashSet<>(); final Set<Node> newExcludeNodes = new HashSet<>();
for (DatanodeStorageInfo resultStorage : results) { for (DatanodeStorageInfo resultStorage : results) {
@ -192,11 +192,22 @@ private void chooseEvenlyFromRemainingRacks(Node writer,
} finally { } finally {
excludedNodes.addAll(newExcludeNodes); excludedNodes.addAll(newExcludeNodes);
} }
// To improve performance, the maximum value of 'bestEffortMaxNodesPerRack'
// is calculated only when it is not possible to select a node.
if (numResultsOflastChoose == results.size()) {
Map<String, Integer> nodesPerRack = new HashMap<>();
for (DatanodeStorageInfo dsInfo : results) {
String rackName = dsInfo.getDatanodeDescriptor().getNetworkLocation();
nodesPerRack.merge(rackName, 1, Integer::sum);
}
bestEffortMaxNodesPerRack =
Math.max(bestEffortMaxNodesPerRack, Collections.max(nodesPerRack.values()));
}
} }
if (numResultsOflastChoose != totalReplicaExpected) { if (results.size() != totalReplicaExpected) {
LOG.debug("Best effort placement failed: expecting {} replicas, only " LOG.debug("Best effort placement failed: expecting {} replicas, only "
+ "chose {}.", totalReplicaExpected, numResultsOflastChoose); + "chose {}.", totalReplicaExpected, results.size());
throw lastException; throw lastException;
} }
} }

View File

@ -19,6 +19,7 @@
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.StorageType;
import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.DistributedFileSystem;
@ -52,6 +53,7 @@
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.util.Arrays;
import java.util.BitSet; import java.util.BitSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
@ -515,4 +517,63 @@ public void testReconstrutionWithBusyBlock1() throws Exception {
assertEquals(9, bm.countNodes(blockInfo).liveReplicas()); assertEquals(9, bm.countNodes(blockInfo).liveReplicas());
} }
@Test
public void testReconstructionWithStorageTypeNotEnough() throws Exception {
final HdfsConfiguration conf = new HdfsConfiguration();
conf.setInt(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY, 1);
// Nine disk node eleven archive node.
int numDn = groupSize * 2 + 2;
StorageType[][] storageTypes = new StorageType[numDn][];
Arrays.fill(storageTypes, 0, groupSize,
new StorageType[]{StorageType.DISK, StorageType.DISK});
Arrays.fill(storageTypes, groupSize, numDn,
new StorageType[]{StorageType.ARCHIVE, StorageType.ARCHIVE});
// Nine disk racks and one archive rack.
String[] racks = {
"/rack1", "/rack2", "/rack3", "/rack4", "/rack5", "/rack6", "/rack7", "/rack8",
"/rack9", "/rack0", "/rack0", "/rack0", "/rack0", "/rack0", "/rack0", "/rack0",
"/rack0", "/rack0", "/rack0", "/rack0"};
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDn)
.storageTypes(storageTypes)
.racks(racks)
.build();
cluster.waitActive();
DistributedFileSystem fs = cluster.getFileSystem();
fs.enableErasureCodingPolicy(
StripedFileTestUtil.getDefaultECPolicy().getName());
try {
fs.mkdirs(dirPath);
fs.setStoragePolicy(dirPath, "COLD");
fs.setErasureCodingPolicy(dirPath,
StripedFileTestUtil.getDefaultECPolicy().getName());
DFSTestUtil.createFile(fs, filePath,
cellSize * dataBlocks * 2, (short) 1, 0L);
// Stop one dn.
LocatedBlocks blks = fs.getClient().getLocatedBlocks(filePath.toString(), 0);
LocatedStripedBlock block = (LocatedStripedBlock) blks.getLastLocatedBlock();
DatanodeInfo dnToStop = block.getLocations()[0];
cluster.stopDataNode(dnToStop.getXferAddr());
cluster.setDataNodeDead(dnToStop);
// Wait for reconstruction to happen.
StripedFileTestUtil.waitForReconstructionFinished(filePath, fs, groupSize);
blks = fs.getClient().getLocatedBlocks(filePath.toString(), 0);
block = (LocatedStripedBlock) blks.getLastLocatedBlock();
BitSet bitSet = new BitSet(groupSize);
for (byte index : block.getBlockIndices()) {
bitSet.set(index);
}
for (int i = 0; i < groupSize; i++) {
Assert.assertTrue(bitSet.get(i));
}
} finally {
cluster.shutdown();
}
}
} }