HDFS-16735. Reduce the number of HeartbeatManager loops. (#4780). Contributed by Shuyan Zhang.
Signed-off-by: Inigo Goiri <inigoiri@apache.org> Signed-off-by: He Xiaoqiao <hexiaoqiao@apache.org>
This commit is contained in:
parent
c60a900583
commit
71778a6cc5
@ -557,7 +557,9 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
|
|||||||
// This value uses the times of heartbeat interval to define the minimum value for stale interval.
|
// This value uses the times of heartbeat interval to define the minimum value for stale interval.
|
||||||
public static final String DFS_NAMENODE_STALE_DATANODE_MINIMUM_INTERVAL_KEY = "dfs.namenode.stale.datanode.minimum.interval";
|
public static final String DFS_NAMENODE_STALE_DATANODE_MINIMUM_INTERVAL_KEY = "dfs.namenode.stale.datanode.minimum.interval";
|
||||||
public static final int DFS_NAMENODE_STALE_DATANODE_MINIMUM_INTERVAL_DEFAULT = 3; // i.e. min_interval is 3 * heartbeat_interval = 9s
|
public static final int DFS_NAMENODE_STALE_DATANODE_MINIMUM_INTERVAL_DEFAULT = 3; // i.e. min_interval is 3 * heartbeat_interval = 9s
|
||||||
|
public static final String DFS_NAMENODE_REMOVE_DEAD_DATANODE_BATCHNUM_KEY
|
||||||
|
= "dfs.namenode.remove.dead.datanode.batchnum";
|
||||||
|
public static final int DFS_NAMENODE_REMOVE_BAD_BATCH_NUM_DEFAULT = 10;
|
||||||
// When the percentage of stale datanodes reaches this ratio,
|
// When the percentage of stale datanodes reaches this ratio,
|
||||||
// allow writing to stale nodes to prevent hotspots.
|
// allow writing to stale nodes to prevent hotspots.
|
||||||
public static final String DFS_NAMENODE_USE_STALE_DATANODE_FOR_WRITE_RATIO_KEY = "dfs.namenode.write.stale.datanode.ratio";
|
public static final String DFS_NAMENODE_USE_STALE_DATANODE_FOR_WRITE_RATIO_KEY = "dfs.namenode.write.stale.datanode.ratio";
|
||||||
|
@ -71,6 +71,7 @@ class HeartbeatManager implements DatanodeStatistics {
|
|||||||
/** Heartbeat monitor thread. */
|
/** Heartbeat monitor thread. */
|
||||||
private final Daemon heartbeatThread = new Daemon(new Monitor());
|
private final Daemon heartbeatThread = new Daemon(new Monitor());
|
||||||
private final StopWatch heartbeatStopWatch = new StopWatch();
|
private final StopWatch heartbeatStopWatch = new StopWatch();
|
||||||
|
private final int numOfDeadDatanodesRemove;
|
||||||
|
|
||||||
final Namesystem namesystem;
|
final Namesystem namesystem;
|
||||||
final BlockManager blockManager;
|
final BlockManager blockManager;
|
||||||
@ -96,6 +97,9 @@ class HeartbeatManager implements DatanodeStatistics {
|
|||||||
enableLogStaleNodes = conf.getBoolean(
|
enableLogStaleNodes = conf.getBoolean(
|
||||||
DFSConfigKeys.DFS_NAMENODE_ENABLE_LOG_STALE_DATANODE_KEY,
|
DFSConfigKeys.DFS_NAMENODE_ENABLE_LOG_STALE_DATANODE_KEY,
|
||||||
DFSConfigKeys.DFS_NAMENODE_ENABLE_LOG_STALE_DATANODE_DEFAULT);
|
DFSConfigKeys.DFS_NAMENODE_ENABLE_LOG_STALE_DATANODE_DEFAULT);
|
||||||
|
this.numOfDeadDatanodesRemove = conf.getInt(
|
||||||
|
DFSConfigKeys.DFS_NAMENODE_REMOVE_DEAD_DATANODE_BATCHNUM_KEY,
|
||||||
|
DFSConfigKeys.DFS_NAMENODE_REMOVE_BAD_BATCH_NUM_DEFAULT);
|
||||||
|
|
||||||
if (avoidStaleDataNodesForWrite && staleInterval < recheckInterval) {
|
if (avoidStaleDataNodesForWrite && staleInterval < recheckInterval) {
|
||||||
this.heartbeatRecheckInterval = staleInterval;
|
this.heartbeatRecheckInterval = staleInterval;
|
||||||
@ -404,7 +408,7 @@ private void dumpStaleNodes(List<DatanodeDescriptor> staleNodes) {
|
|||||||
/**
|
/**
|
||||||
* Check if there are any expired heartbeats, and if so,
|
* Check if there are any expired heartbeats, and if so,
|
||||||
* whether any blocks have to be re-replicated.
|
* whether any blocks have to be re-replicated.
|
||||||
* While removing dead datanodes, make sure that only one datanode is marked
|
* While removing dead datanodes, make sure that limited datanodes is marked
|
||||||
* dead at a time within the synchronized section. Otherwise, a cascading
|
* dead at a time within the synchronized section. Otherwise, a cascading
|
||||||
* effect causes more datanodes to be declared dead.
|
* effect causes more datanodes to be declared dead.
|
||||||
* Check if there are any failed storage and if so,
|
* Check if there are any failed storage and if so,
|
||||||
@ -436,12 +440,17 @@ void heartbeatCheck() {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
boolean allAlive = false;
|
boolean allAlive = false;
|
||||||
while (!allAlive) {
|
// Locate limited dead nodes.
|
||||||
// locate the first dead node.
|
List<DatanodeDescriptor> deadDatanodes = new ArrayList<>(
|
||||||
DatanodeDescriptor dead = null;
|
numOfDeadDatanodesRemove);
|
||||||
|
// Locate limited failed storages that isn't on a dead node.
|
||||||
|
List<DatanodeStorageInfo> failedStorages = new ArrayList<>(
|
||||||
|
numOfDeadDatanodesRemove);
|
||||||
|
|
||||||
// locate the first failed storage that isn't on a dead node.
|
while (!allAlive) {
|
||||||
DatanodeStorageInfo failedStorage = null;
|
|
||||||
|
deadDatanodes.clear();
|
||||||
|
failedStorages.clear();
|
||||||
|
|
||||||
// check the number of stale storages
|
// check the number of stale storages
|
||||||
int numOfStaleStorages = 0;
|
int numOfStaleStorages = 0;
|
||||||
@ -452,9 +461,10 @@ void heartbeatCheck() {
|
|||||||
if (shouldAbortHeartbeatCheck(0)) {
|
if (shouldAbortHeartbeatCheck(0)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (dead == null && dm.isDatanodeDead(d)) {
|
if (deadDatanodes.size() < numOfDeadDatanodesRemove &&
|
||||||
|
dm.isDatanodeDead(d)) {
|
||||||
stats.incrExpiredHeartbeats();
|
stats.incrExpiredHeartbeats();
|
||||||
dead = d;
|
deadDatanodes.add(d);
|
||||||
// remove the node from stale list to adjust the stale list size
|
// remove the node from stale list to adjust the stale list size
|
||||||
// before setting the stale count of the DatanodeManager
|
// before setting the stale count of the DatanodeManager
|
||||||
removeNodeFromStaleList(d);
|
removeNodeFromStaleList(d);
|
||||||
@ -476,10 +486,10 @@ void heartbeatCheck() {
|
|||||||
numOfStaleStorages++;
|
numOfStaleStorages++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (failedStorage == null &&
|
if (failedStorages.size() < numOfDeadDatanodesRemove &&
|
||||||
storageInfo.areBlocksOnFailedStorage() &&
|
storageInfo.areBlocksOnFailedStorage() &&
|
||||||
d != dead) {
|
!deadDatanodes.contains(d)) {
|
||||||
failedStorage = storageInfo;
|
failedStorages.add(storageInfo);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -492,12 +502,12 @@ void heartbeatCheck() {
|
|||||||
// log nodes detected as stale since last heartBeat
|
// log nodes detected as stale since last heartBeat
|
||||||
dumpStaleNodes(staleNodes);
|
dumpStaleNodes(staleNodes);
|
||||||
|
|
||||||
allAlive = dead == null && failedStorage == null;
|
allAlive = deadDatanodes.isEmpty() && failedStorages.isEmpty();
|
||||||
if (!allAlive && namesystem.isInStartupSafeMode()) {
|
if (!allAlive && namesystem.isInStartupSafeMode()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dead != null) {
|
for (DatanodeDescriptor dead : deadDatanodes) {
|
||||||
// acquire the fsnamesystem lock, and then remove the dead node.
|
// acquire the fsnamesystem lock, and then remove the dead node.
|
||||||
namesystem.writeLock();
|
namesystem.writeLock();
|
||||||
try {
|
try {
|
||||||
@ -506,7 +516,7 @@ void heartbeatCheck() {
|
|||||||
namesystem.writeUnlock("removeDeadDatanode");
|
namesystem.writeUnlock("removeDeadDatanode");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (failedStorage != null) {
|
for (DatanodeStorageInfo failedStorage : failedStorages) {
|
||||||
// acquire the fsnamesystem lock, and remove blocks on the storage.
|
// acquire the fsnamesystem lock, and remove blocks on the storage.
|
||||||
namesystem.writeLock();
|
namesystem.writeLock();
|
||||||
try {
|
try {
|
||||||
|
@ -5365,6 +5365,14 @@
|
|||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>dfs.namenode.remove.dead.datanode.batchnum</name>
|
||||||
|
<value>10</value>
|
||||||
|
<description>
|
||||||
|
Maximum number of datanodes removed by HeartbeatManager per scan.
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>dfs.namenode.snapshot.capture.openfiles</name>
|
<name>dfs.namenode.snapshot.capture.openfiles</name>
|
||||||
<value>false</value>
|
<value>false</value>
|
||||||
|
Loading…
Reference in New Issue
Block a user