HDFS-15273. CacheReplicationMonitor hold lock for long time and lead to NN out of service. Contributed by Xiaoqiao He.

This commit is contained in:
Wei-Chiu Chuang 2023-10-26 10:35:10 -07:00
parent d18410221b
commit 821ed83873
No known key found for this signature in database
GPG Key ID: B362E1C021854B9D
4 changed files with 98 additions and 0 deletions

View File

@ -192,6 +192,18 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
"dfs.namenode.path.based.cache.block.map.allocation.percent";
public static final float DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT_DEFAULT = 0.25f;
public static final String DFS_NAMENODE_CRM_CHECKLOCKTIME_ENABLE =
"dfs.namenode.crm.checklocktime.enable";
public static final boolean DFS_NAMENODE_CRM_CHECKLOCKTIME_DEFAULT = false;
public static final String DFS_NAMENODE_CRM_MAXLOCKTIME_MS =
"dfs.namenode.crm.maxlocktime.ms";
public static final long DFS_NAMENODE_CRM_MAXLOCKTIME_MS_DEFAULT = 1000;
public static final String DFS_NAMENODE_CRM_SLEEP_TIME_MS =
"dfs.namenode.crm.sleeptime.ms";
public static final long DFS_NAMENODE_CRM_SLEEP_TIME_MS_DEFAULT = 300;
public static final int DFS_NAMENODE_HTTP_PORT_DEFAULT =
HdfsClientConfigKeys.DFS_NAMENODE_HTTP_PORT_DEFAULT;
public static final String DFS_NAMENODE_HTTP_ADDRESS_KEY =

View File

@ -140,6 +140,11 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
*/
private long scannedBlocks;
/**
* Avoid to hold global lock for long times.
*/
private long lastScanTimeMs;
public CacheReplicationMonitor(FSNamesystem namesystem,
CacheManager cacheManager, long intervalMs, ReentrantLock lock) {
this.namesystem = namesystem;
@ -284,6 +289,7 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
private void rescan() throws InterruptedException {
scannedDirectives = 0;
scannedBlocks = 0;
lastScanTimeMs = Time.monotonicNow();
try {
namesystem.writeLock();
try {
@ -315,6 +321,19 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
}
}
private void reacquireLock(long last) {
long now = Time.monotonicNow();
if (now - last > cacheManager.getMaxLockTimeMs()) {
try {
namesystem.writeUnlock();
Thread.sleep(cacheManager.getSleepTimeMs());
} catch (InterruptedException e) {
} finally {
namesystem.writeLock();
}
}
}
/**
* Scan all CacheDirectives. Use the information to figure out
* what cache replication factor each block should have.
@ -447,6 +466,10 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
if (cachedTotal == neededTotal) {
directive.addFilesCached(1);
}
if (cacheManager.isCheckLockTimeEnable()) {
reacquireLock(lastScanTimeMs);
lastScanTimeMs = Time.monotonicNow();
}
LOG.debug("Directive {}: caching {}: {}/{} bytes", directive.getId(),
file.getFullPathName(), cachedTotal, neededTotal);
}
@ -518,6 +541,10 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
}
}
}
if (cacheManager.isCheckLockTimeEnable()) {
reacquireLock(lastScanTimeMs);
lastScanTimeMs = Time.monotonicNow();
}
for (Iterator<CachedBlock> cbIter = cachedBlocks.iterator();
cbIter.hasNext(); ) {
scannedBlocks++;
@ -603,6 +630,10 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
);
cbIter.remove();
}
if (cacheManager.isCheckLockTimeEnable()) {
reacquireLock(lastScanTimeMs);
lastScanTimeMs = Time.monotonicNow();
}
}
}

View File

@ -17,6 +17,12 @@
*/
package org.apache.hadoop.hdfs.server.namenode;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CRM_CHECKLOCKTIME_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CRM_CHECKLOCKTIME_ENABLE;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CRM_MAXLOCKTIME_MS;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CRM_MAXLOCKTIME_MS_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CRM_SLEEP_TIME_MS;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CRM_SLEEP_TIME_MS_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIST_CACHE_DIRECTIVES_NUM_RESPONSES;
@ -194,6 +200,9 @@ public class CacheManager {
* The CacheReplicationMonitor.
*/
private CacheReplicationMonitor monitor;
private boolean isCheckLockTimeEnable;
private long maxLockTimeMs;
private long sleepTimeMs;
public static final class PersistState {
public final CacheManagerSection section;
@ -235,12 +244,31 @@ public class CacheManager {
this.cachedBlocks = enabled ? new LightWeightGSet<CachedBlock, CachedBlock>(
LightWeightGSet.computeCapacity(cachedBlocksPercent,
"cachedBlocks")) : new LightWeightGSet<>(0);
this.isCheckLockTimeEnable = conf.getBoolean(
DFS_NAMENODE_CRM_CHECKLOCKTIME_ENABLE,
DFS_NAMENODE_CRM_CHECKLOCKTIME_DEFAULT);
this.maxLockTimeMs = conf.getLong(DFS_NAMENODE_CRM_MAXLOCKTIME_MS,
DFS_NAMENODE_CRM_MAXLOCKTIME_MS_DEFAULT);
this.sleepTimeMs = conf.getLong(DFS_NAMENODE_CRM_SLEEP_TIME_MS,
DFS_NAMENODE_CRM_SLEEP_TIME_MS_DEFAULT);
}
public boolean isEnabled() {
return enabled;
}
public boolean isCheckLockTimeEnable() {
return isCheckLockTimeEnable;
}
public long getMaxLockTimeMs() {
return this.maxLockTimeMs;
}
public long getSleepTimeMs() {
return this.sleepTimeMs;
}
/**
* Resets all tracked directives and pools. Called during 2NN checkpointing to
* reset FSNamesystem state. See {@link FSNamesystem#clear()}.

View File

@ -2940,6 +2940,33 @@
</description>
</property>
<property>
<name>dfs.namenode.crm.checklocktime.enable</name>
<value>false</value>
<description>
Set to true to enable CacheManager to check amount of time to hold the
global rwlock.
</description>
</property>
<property>
<name>dfs.namenode.crm.maxlocktime.ms</name>
<value>1000</value>
<description>
The maximum amount of time that CacheManager should hold the global rwlock.
This configuration enable when set `dfs.namenode.crm.checklocktime.enable`.
</description>
</property>
<property>
<name>dfs.namenode.crm.sleeptime.ms</name>
<value>300</value>
<description>
The amount of time that CacheManager should relase the global rwlock.
This configuration enable when set `dfs.namenode.crm.checklocktime.enable`.
</description>
</property>
<property>
<name>dfs.datanode.max.locked.memory</name>
<value>0</value>