HDFS-16735. Reduce the number of HeartbeatManager loops. (#4780). Contributed by Shuyan Zhang.

Signed-off-by: Inigo Goiri <inigoiri@apache.org>
Signed-off-by: He Xiaoqiao <hexiaoqiao@apache.org>
This commit is contained in:
zhangshuyan0 2022-08-29 11:30:21 +08:00 committed by GitHub
parent c60a900583
commit 71778a6cc5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 35 additions and 15 deletions

View File

@ -557,7 +557,9 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
// This value uses the times of heartbeat interval to define the minimum value for stale interval. // This value uses the times of heartbeat interval to define the minimum value for stale interval.
public static final String DFS_NAMENODE_STALE_DATANODE_MINIMUM_INTERVAL_KEY = "dfs.namenode.stale.datanode.minimum.interval"; public static final String DFS_NAMENODE_STALE_DATANODE_MINIMUM_INTERVAL_KEY = "dfs.namenode.stale.datanode.minimum.interval";
public static final int DFS_NAMENODE_STALE_DATANODE_MINIMUM_INTERVAL_DEFAULT = 3; // i.e. min_interval is 3 * heartbeat_interval = 9s public static final int DFS_NAMENODE_STALE_DATANODE_MINIMUM_INTERVAL_DEFAULT = 3; // i.e. min_interval is 3 * heartbeat_interval = 9s
public static final String DFS_NAMENODE_REMOVE_DEAD_DATANODE_BATCHNUM_KEY
= "dfs.namenode.remove.dead.datanode.batchnum";
public static final int DFS_NAMENODE_REMOVE_BAD_BATCH_NUM_DEFAULT = 10;
// When the percentage of stale datanodes reaches this ratio, // When the percentage of stale datanodes reaches this ratio,
// allow writing to stale nodes to prevent hotspots. // allow writing to stale nodes to prevent hotspots.
public static final String DFS_NAMENODE_USE_STALE_DATANODE_FOR_WRITE_RATIO_KEY = "dfs.namenode.write.stale.datanode.ratio"; public static final String DFS_NAMENODE_USE_STALE_DATANODE_FOR_WRITE_RATIO_KEY = "dfs.namenode.write.stale.datanode.ratio";

View File

@ -71,6 +71,7 @@ class HeartbeatManager implements DatanodeStatistics {
/** Heartbeat monitor thread. */ /** Heartbeat monitor thread. */
private final Daemon heartbeatThread = new Daemon(new Monitor()); private final Daemon heartbeatThread = new Daemon(new Monitor());
private final StopWatch heartbeatStopWatch = new StopWatch(); private final StopWatch heartbeatStopWatch = new StopWatch();
private final int numOfDeadDatanodesRemove;
final Namesystem namesystem; final Namesystem namesystem;
final BlockManager blockManager; final BlockManager blockManager;
@ -96,6 +97,9 @@ class HeartbeatManager implements DatanodeStatistics {
enableLogStaleNodes = conf.getBoolean( enableLogStaleNodes = conf.getBoolean(
DFSConfigKeys.DFS_NAMENODE_ENABLE_LOG_STALE_DATANODE_KEY, DFSConfigKeys.DFS_NAMENODE_ENABLE_LOG_STALE_DATANODE_KEY,
DFSConfigKeys.DFS_NAMENODE_ENABLE_LOG_STALE_DATANODE_DEFAULT); DFSConfigKeys.DFS_NAMENODE_ENABLE_LOG_STALE_DATANODE_DEFAULT);
this.numOfDeadDatanodesRemove = conf.getInt(
DFSConfigKeys.DFS_NAMENODE_REMOVE_DEAD_DATANODE_BATCHNUM_KEY,
DFSConfigKeys.DFS_NAMENODE_REMOVE_BAD_BATCH_NUM_DEFAULT);
if (avoidStaleDataNodesForWrite && staleInterval < recheckInterval) { if (avoidStaleDataNodesForWrite && staleInterval < recheckInterval) {
this.heartbeatRecheckInterval = staleInterval; this.heartbeatRecheckInterval = staleInterval;
@ -404,7 +408,7 @@ private void dumpStaleNodes(List<DatanodeDescriptor> staleNodes) {
/** /**
* Check if there are any expired heartbeats, and if so, * Check if there are any expired heartbeats, and if so,
* whether any blocks have to be re-replicated. * whether any blocks have to be re-replicated.
* While removing dead datanodes, make sure that only one datanode is marked * While removing dead datanodes, make sure that limited datanodes is marked
* dead at a time within the synchronized section. Otherwise, a cascading * dead at a time within the synchronized section. Otherwise, a cascading
* effect causes more datanodes to be declared dead. * effect causes more datanodes to be declared dead.
* Check if there are any failed storage and if so, * Check if there are any failed storage and if so,
@ -436,12 +440,17 @@ void heartbeatCheck() {
return; return;
} }
boolean allAlive = false; boolean allAlive = false;
while (!allAlive) { // Locate limited dead nodes.
// locate the first dead node. List<DatanodeDescriptor> deadDatanodes = new ArrayList<>(
DatanodeDescriptor dead = null; numOfDeadDatanodesRemove);
// Locate limited failed storages that isn't on a dead node.
List<DatanodeStorageInfo> failedStorages = new ArrayList<>(
numOfDeadDatanodesRemove);
// locate the first failed storage that isn't on a dead node. while (!allAlive) {
DatanodeStorageInfo failedStorage = null;
deadDatanodes.clear();
failedStorages.clear();
// check the number of stale storages // check the number of stale storages
int numOfStaleStorages = 0; int numOfStaleStorages = 0;
@ -452,9 +461,10 @@ void heartbeatCheck() {
if (shouldAbortHeartbeatCheck(0)) { if (shouldAbortHeartbeatCheck(0)) {
return; return;
} }
if (dead == null && dm.isDatanodeDead(d)) { if (deadDatanodes.size() < numOfDeadDatanodesRemove &&
dm.isDatanodeDead(d)) {
stats.incrExpiredHeartbeats(); stats.incrExpiredHeartbeats();
dead = d; deadDatanodes.add(d);
// remove the node from stale list to adjust the stale list size // remove the node from stale list to adjust the stale list size
// before setting the stale count of the DatanodeManager // before setting the stale count of the DatanodeManager
removeNodeFromStaleList(d); removeNodeFromStaleList(d);
@ -476,10 +486,10 @@ void heartbeatCheck() {
numOfStaleStorages++; numOfStaleStorages++;
} }
if (failedStorage == null && if (failedStorages.size() < numOfDeadDatanodesRemove &&
storageInfo.areBlocksOnFailedStorage() && storageInfo.areBlocksOnFailedStorage() &&
d != dead) { !deadDatanodes.contains(d)) {
failedStorage = storageInfo; failedStorages.add(storageInfo);
} }
} }
} }
@ -492,12 +502,12 @@ void heartbeatCheck() {
// log nodes detected as stale since last heartBeat // log nodes detected as stale since last heartBeat
dumpStaleNodes(staleNodes); dumpStaleNodes(staleNodes);
allAlive = dead == null && failedStorage == null; allAlive = deadDatanodes.isEmpty() && failedStorages.isEmpty();
if (!allAlive && namesystem.isInStartupSafeMode()) { if (!allAlive && namesystem.isInStartupSafeMode()) {
return; return;
} }
if (dead != null) { for (DatanodeDescriptor dead : deadDatanodes) {
// acquire the fsnamesystem lock, and then remove the dead node. // acquire the fsnamesystem lock, and then remove the dead node.
namesystem.writeLock(); namesystem.writeLock();
try { try {
@ -506,7 +516,7 @@ void heartbeatCheck() {
namesystem.writeUnlock("removeDeadDatanode"); namesystem.writeUnlock("removeDeadDatanode");
} }
} }
if (failedStorage != null) { for (DatanodeStorageInfo failedStorage : failedStorages) {
// acquire the fsnamesystem lock, and remove blocks on the storage. // acquire the fsnamesystem lock, and remove blocks on the storage.
namesystem.writeLock(); namesystem.writeLock();
try { try {

View File

@ -5365,6 +5365,14 @@
</description> </description>
</property> </property>
<property>
<name>dfs.namenode.remove.dead.datanode.batchnum</name>
<value>10</value>
<description>
Maximum number of datanodes removed by HeartbeatManager per scan.
</description>
</property>
<property> <property>
<name>dfs.namenode.snapshot.capture.openfiles</name> <name>dfs.namenode.snapshot.capture.openfiles</name>
<value>false</value> <value>false</value>