HDFS-16277. improve decision in AvailableSpaceBlockPlacementPolicy (#3559). Contributed by guo.

Signed-off-by: Ayush Saxena <ayushsaxena@apache.org>
This commit is contained in:
GuoPhilipse 2021-10-22 10:29:27 +08:00 committed by GitHub
parent a73ff6915a
commit 1385cdfeb9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 193 additions and 5 deletions

View File

@ -1195,6 +1195,12 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
"dfs.namenode.available-space-block-placement-policy.balanced-space-preference-fraction";
public static final float DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_DEFAULT =
0.6f;
public static final String
DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_KEY =
"dfs.namenode.available-space-block-placement-policy.balanced-space-tolerance";
public static final int
DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_DEFAULT =
5;
public static final String
DFS_NAMENODE_AVAILABLE_SPACE_RACK_FAULT_TOLERANT_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY =
"dfs.namenode.available-space-rack-fault-tolerant-block-placement-policy"
@ -1202,6 +1208,13 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final float
DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_RACK_FAULT_TOLERANT_PLACEMENT_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_DEFAULT =
0.6f;
public static final String
DFS_NAMENODE_AVAILABLE_SPACE_RACK_FAULT_TOLERANT_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_KEY =
"dfs.namenode.available-space-rack-fault-tolerant-block-placement-policy"
+ ".balanced-space-tolerance";
public static final int
DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_RACK_FAULT_TOLERANT_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_DEFAULT =
5;
public static final String
DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCE_LOCAL_NODE_KEY =
"dfs.namenode.available-space-block-placement-policy.balance-local-node";

View File

@ -20,6 +20,8 @@
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_KEY;
import java.util.Collection;
import java.util.EnumMap;
@ -47,6 +49,8 @@ public class AvailableSpaceBlockPlacementPolicy extends
private static final Random RAND = new Random();
private int balancedPreference =
(int) (100 * DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_DEFAULT);
private int balancedSpaceTolerance =
DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_DEFAULT;
private boolean optimizeLocal;
@Override
@ -59,9 +63,14 @@ public void initialize(Configuration conf, FSClusterStats stats,
DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_DEFAULT);
LOG.info("Available space block placement policy initialized: "
+ DFSConfigKeys.DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY
+ DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY
+ " = " + balancedPreferencePercent);
balancedSpaceTolerance =
conf.getInt(
DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_KEY,
DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_DEFAULT);
optimizeLocal = conf.getBoolean(
DFSConfigKeys.DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCE_LOCAL_NODE_KEY,
DFSConfigKeys.DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCE_LOCAL_NODE_DEFAULT);
@ -77,6 +86,16 @@ public void initialize(Configuration conf, FSClusterStats stats,
+ " is less than 0.5 so datanodes with more used percent will"
+ " receive more block allocations.");
}
if (balancedSpaceTolerance > 20 || balancedSpaceTolerance < 0) {
LOG.warn("The value of "
+ DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_KEY
+ " is invalid, Current value is " + balancedSpaceTolerance + ", Default value " +
DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_DEFAULT
+ " will be used instead.");
balancedSpaceTolerance =
DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_DEFAULT;
}
balancedPreference = (int) (100 * balancedPreferencePercent);
}
@ -183,7 +202,7 @@ private DatanodeDescriptor select(DatanodeDescriptor a, DatanodeDescriptor b,
protected int compareDataNode(final DatanodeDescriptor a,
final DatanodeDescriptor b, boolean isBalanceLocal) {
if (a.equals(b)
|| Math.abs(a.getDfsUsedPercent() - b.getDfsUsedPercent()) < 5 || ((
|| Math.abs(a.getDfsUsedPercent() - b.getDfsUsedPercent()) < balancedSpaceTolerance || ((
isBalanceLocal && a.getDfsUsedPercent() < 50))) {
return 0;
}

View File

@ -32,7 +32,9 @@
import java.util.Random;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_RACK_FAULT_TOLERANT_PLACEMENT_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_RACK_FAULT_TOLERANT_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AVAILABLE_SPACE_RACK_FAULT_TOLERANT_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AVAILABLE_SPACE_RACK_FAULT_TOLERANT_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_KEY;
/**
* Space balanced rack fault tolerant block placement policy.
@ -45,7 +47,8 @@ public class AvailableSpaceRackFaultTolerantBlockPlacementPolicy
private static final Random RAND = new Random();
private int balancedPreference = (int) (100
* DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_RACK_FAULT_TOLERANT_PLACEMENT_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_DEFAULT);
private int balancedSpaceTolerance =
DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_RACK_FAULT_TOLERANT_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_DEFAULT;
@Override
public void initialize(Configuration conf, FSClusterStats stats,
NetworkTopology clusterMap, Host2NodesMap host2datanodeMap) {
@ -54,6 +57,10 @@ public void initialize(Configuration conf, FSClusterStats stats,
DFS_NAMENODE_AVAILABLE_SPACE_RACK_FAULT_TOLERANT_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY,
DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_RACK_FAULT_TOLERANT_PLACEMENT_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_DEFAULT);
balancedSpaceTolerance = conf.getInt(
DFS_NAMENODE_AVAILABLE_SPACE_RACK_FAULT_TOLERANT_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_KEY,
DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_RACK_FAULT_TOLERANT_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_DEFAULT);
LOG.info("Available space rack fault tolerant block placement policy "
+ "initialized: "
+ DFSConfigKeys.DFS_NAMENODE_AVAILABLE_SPACE_RACK_FAULT_TOLERANT_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY
@ -70,6 +77,18 @@ public void initialize(Configuration conf, FSClusterStats stats,
+ " is less than 0.5 so datanodes with more used percent will"
+ " receive more block allocations.");
}
if (balancedSpaceTolerance > 20 || balancedSpaceTolerance < 0) {
LOG.warn("The value of "
+ DFS_NAMENODE_AVAILABLE_SPACE_RACK_FAULT_TOLERANT_BLOCK_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_KEY
+ " is invalid, Current value is " + balancedSpaceTolerance + ", Default value " +
DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_RACK_FAULT_TOLERANT_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_DEFAULT
+ " will be used instead.");
balancedSpaceTolerance =
DFS_NAMENODE_AVAILABLE_SPACE_BLOCK_RACK_FAULT_TOLERANT_PLACEMENT_POLICY_BALANCED_SPACE_TOLERANCE_DEFAULT;
}
balancedPreference = (int) (100 * balancedPreferencePercent);
}
@ -118,7 +137,7 @@ private DatanodeDescriptor select(DatanodeDescriptor a,
protected int compareDataNode(final DatanodeDescriptor a,
final DatanodeDescriptor b) {
if (a.equals(b)
|| Math.abs(a.getDfsUsedPercent() - b.getDfsUsedPercent()) < 5) {
|| Math.abs(a.getDfsUsedPercent() - b.getDfsUsedPercent()) < balancedSpaceTolerance) {
return 0;
}
return a.getDfsUsedPercent() < b.getDfsUsedPercent() ? -1 : 1;

View File

@ -5067,6 +5067,18 @@
</description>
</property>
<property>
<name>dfs.namenode.available-space-block-placement-policy.balanced-space-tolerance</name>
<value>5</value>
<description>
Only used when the dfs.block.replicator.classname is set to
org.apache.hadoop.hdfs.server.blockmanagement.AvailableSpaceBlockPlacementPolicy.
Special value between 0 and 20, inclusive. if the value is set beyond the scope,
this value will be set as 5 by default, Increases tolerance of
placing blocks on Datanodes with similar disk space used.
</description>
</property>
<property>
<name>
dfs.namenode.available-space-block-placement-policy.balance-local-node
@ -5092,7 +5104,17 @@
high load increases as the value reaches near 0.
</description>
</property>
<property>
<name>dfs.namenode.available-space-rack-fault-tolerant-block-placement-policy.balanced-space-tolerance</name>
<value>5</value>
<description>
Only used when the dfs.block.replicator.classname is set to
org.apache.hadoop.hdfs.server.blockmanagement.AvailableSpaceRackFaultTolerantBlockPlacementPolicy.
Special value between 0 and 20, inclusive. if the value is set beyond the scope,
this value will be set as 5 by default, Increases tolerance of
placing blocks on Datanodes with similar disk space used.
</description>
</property>
<property>
<name>dfs.namenode.backup.dnrpc-address</name>
<value></value>

View File

@ -119,6 +119,16 @@ The AvailableSpaceBlockPlacementPolicy is a space balanced block placement polic
</description>
</property>
<property>
<name>dfs.namenode.available-space-block-placement-policy.balanced-space-tolerance</name>
<value>5</value>
<description>
Special value between 0 and 20, inclusive. if the value is set beyond the scope,
this value will be set as 5 by default, Increases tolerance of
placing blocks on Datanodes with similar disk space used.
</description>
</property>
<property>
<name>
dfs.namenode.available-space-block-placement-policy.balance-local-node
@ -160,6 +170,18 @@ amongst maximum number of racks possible and at the same time will try to choose
high load increases as the value reaches near 0.
</description>
</property>
<property>
<name>dfs.namenode.available-space-rack-fault-tolerant-block-placement-policy.balanced-space-tolerance</name>
<value>5</value>
<description>
Only used when the dfs.block.replicator.classname is set to
org.apache.hadoop.hdfs.server.blockmanagement.AvailableSpaceRackFaultTolerantBlockPlacementPolicy.
Special value between 0 and 20, inclusive. if the value is set beyond the scope,
this value will be set as 5 by default, Increases tolerance of
placing blocks on Datanodes with similar disk space used.
</description>
</property>
```
For more details check [HDFS-15288](https://issues.apache.org/jira/browse/HDFS-15288)

View File

@ -173,6 +173,52 @@ public void testChooseDataNode() {
}
}
@Test
public void testChooseSimilarDataNode() {
DatanodeDescriptor[] tolerateDataNodes;
DatanodeStorageInfo[] tolerateStorages;
int capacity = 3;
Collection<Node> allTolerateNodes = new ArrayList<>(capacity);
String[] ownerRackOfTolerateNodes = new String[capacity];
for (int i = 0; i < capacity; i++) {
ownerRackOfTolerateNodes[i] = "rack"+i;
}
tolerateStorages = DFSTestUtil.createDatanodeStorageInfos(ownerRackOfTolerateNodes);
tolerateDataNodes = DFSTestUtil.toDatanodeDescriptor(tolerateStorages);
Collections.addAll(allTolerateNodes, tolerateDataNodes);
final BlockManager bm = namenode.getNamesystem().getBlockManager();
AvailableSpaceBlockPlacementPolicy toleratePlacementPolicy =
(AvailableSpaceBlockPlacementPolicy)bm.getBlockPlacementPolicy();
updateHeartbeatWithUsage(tolerateDataNodes[0],
20 * HdfsServerConstants.MIN_BLOCKS_FOR_WRITE * blockSize,
1 * HdfsServerConstants.MIN_BLOCKS_FOR_WRITE * blockSize,
HdfsServerConstants.MIN_BLOCKS_FOR_WRITE
* blockSize, 0L, 0L, 0L, 0, 0);
updateHeartbeatWithUsage(tolerateDataNodes[1],
11 * HdfsServerConstants.MIN_BLOCKS_FOR_WRITE * blockSize,
1 * HdfsServerConstants.MIN_BLOCKS_FOR_WRITE * blockSize,
HdfsServerConstants.MIN_BLOCKS_FOR_WRITE
* blockSize, 0L, 0L, 0L, 0, 0);
updateHeartbeatWithUsage(tolerateDataNodes[2],
10 * HdfsServerConstants.MIN_BLOCKS_FOR_WRITE * blockSize,
1 * HdfsServerConstants.MIN_BLOCKS_FOR_WRITE * blockSize,
HdfsServerConstants.MIN_BLOCKS_FOR_WRITE
* blockSize, 0L, 0L, 0L, 0, 0);
assertTrue(toleratePlacementPolicy.compareDataNode(tolerateDataNodes[0],
tolerateDataNodes[1], false) == 0);
assertTrue(toleratePlacementPolicy.compareDataNode(tolerateDataNodes[1],
tolerateDataNodes[0], false) == 0);
assertTrue(toleratePlacementPolicy.compareDataNode(tolerateDataNodes[0],
tolerateDataNodes[2], false) == -1);
assertTrue(toleratePlacementPolicy.compareDataNode(tolerateDataNodes[2],
tolerateDataNodes[0], false) == 1);
}
@AfterClass
public static void teardownCluster() {
if (namenode != null) {

View File

@ -41,6 +41,7 @@
import java.util.HashSet;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
/**
* Tests AvailableSpaceRackFaultTolerant block placement policy.
@ -206,6 +207,52 @@ public void testMaxRackAllocation() {
assertEquals(REPLICA, racks.size());
}
@Test
public void testChooseSimilarDataNode() {
DatanodeDescriptor[] tolerateDataNodes;
DatanodeStorageInfo[] tolerateStorages;
int capacity = 3;
Collection<Node> allTolerateNodes = new ArrayList<>(capacity);
String[] ownerRackOfTolerateNodes = new String[capacity];
for (int i = 0; i < capacity; i++) {
ownerRackOfTolerateNodes[i] = "rack"+i;
}
tolerateStorages = DFSTestUtil.createDatanodeStorageInfos(ownerRackOfTolerateNodes);
tolerateDataNodes = DFSTestUtil.toDatanodeDescriptor(tolerateStorages);
Collections.addAll(allTolerateNodes, tolerateDataNodes);
final BlockManager bm = namenode.getNamesystem().getBlockManager();
AvailableSpaceRackFaultTolerantBlockPlacementPolicy toleratePlacementPolicy =
(AvailableSpaceRackFaultTolerantBlockPlacementPolicy)bm.getBlockPlacementPolicy();
updateHeartbeatWithUsage(tolerateDataNodes[0],
20 * HdfsServerConstants.MIN_BLOCKS_FOR_WRITE * BLOCK_SIZE,
1 * HdfsServerConstants.MIN_BLOCKS_FOR_WRITE * BLOCK_SIZE,
HdfsServerConstants.MIN_BLOCKS_FOR_WRITE
* BLOCK_SIZE, 0L, 0L, 0L, 0, 0);
updateHeartbeatWithUsage(tolerateDataNodes[1],
11 * HdfsServerConstants.MIN_BLOCKS_FOR_WRITE * BLOCK_SIZE,
1 * HdfsServerConstants.MIN_BLOCKS_FOR_WRITE * BLOCK_SIZE,
HdfsServerConstants.MIN_BLOCKS_FOR_WRITE
* BLOCK_SIZE, 0L, 0L, 0L, 0, 0);
updateHeartbeatWithUsage(tolerateDataNodes[2],
10 * HdfsServerConstants.MIN_BLOCKS_FOR_WRITE * BLOCK_SIZE,
1 * HdfsServerConstants.MIN_BLOCKS_FOR_WRITE * BLOCK_SIZE,
HdfsServerConstants.MIN_BLOCKS_FOR_WRITE
* BLOCK_SIZE, 0L, 0L, 0L, 0, 0);
assertTrue(toleratePlacementPolicy.compareDataNode(tolerateDataNodes[0],
tolerateDataNodes[1]) == 0);
assertTrue(toleratePlacementPolicy.compareDataNode(tolerateDataNodes[1],
tolerateDataNodes[0]) == 0);
assertTrue(toleratePlacementPolicy.compareDataNode(tolerateDataNodes[0],
tolerateDataNodes[2]) == -1);
assertTrue(toleratePlacementPolicy.compareDataNode(tolerateDataNodes[2],
tolerateDataNodes[0]) == 1);
}
@AfterClass
public static void teardownCluster() {
if (namenode != null) {