HDFS-10798. Make the threshold of reporting FSNamesystem lock contention configurable. Contributed by Erik Krogen.
This commit is contained in:
parent
8b7adf4ddf
commit
407b519fb1
@ -407,6 +407,11 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
|
|||||||
public static final long
|
public static final long
|
||||||
DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_DEFAULT = 25;
|
DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_DEFAULT = 25;
|
||||||
|
|
||||||
|
// Threshold for how long a write lock must be held for the event to be logged
|
||||||
|
public static final String DFS_NAMENODE_WRITE_LOCK_REPORTING_THRESHOLD_MS_KEY =
|
||||||
|
"dfs.namenode.write-lock-reporting-threshold-ms";
|
||||||
|
public static final long DFS_NAMENODE_WRITE_LOCK_REPORTING_THRESHOLD_MS_DEFAULT = 1000L;
|
||||||
|
|
||||||
public static final String DFS_UPGRADE_DOMAIN_FACTOR = "dfs.namenode.upgrade.domain.factor";
|
public static final String DFS_UPGRADE_DOMAIN_FACTOR = "dfs.namenode.upgrade.domain.factor";
|
||||||
public static final int DFS_UPGRADE_DOMAIN_FACTOR_DEFAULT = DFS_REPLICATION_DEFAULT;
|
public static final int DFS_UPGRADE_DOMAIN_FACTOR_DEFAULT = DFS_REPLICATION_DEFAULT;
|
||||||
|
|
||||||
|
@ -71,6 +71,8 @@
|
|||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY;
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT;
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY;
|
||||||
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_WRITE_LOCK_REPORTING_THRESHOLD_MS_KEY;
|
||||||
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_WRITE_LOCK_REPORTING_THRESHOLD_MS_DEFAULT;
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_DEFAULT;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_DEFAULT;
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_KEY;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_KEY;
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_DEFAULT;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_DEFAULT;
|
||||||
@ -819,6 +821,10 @@ static FSNamesystem loadFromDisk(Configuration conf) throws IOException {
|
|||||||
DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_KEY,
|
DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_KEY,
|
||||||
DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_DEFAULT);
|
DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_DEFAULT);
|
||||||
|
|
||||||
|
this.writeLockReportingThreshold = conf.getLong(
|
||||||
|
DFS_NAMENODE_WRITE_LOCK_REPORTING_THRESHOLD_MS_KEY,
|
||||||
|
DFS_NAMENODE_WRITE_LOCK_REPORTING_THRESHOLD_MS_DEFAULT);
|
||||||
|
|
||||||
// For testing purposes, allow the DT secret manager to be started regardless
|
// For testing purposes, allow the DT secret manager to be started regardless
|
||||||
// of whether security is enabled.
|
// of whether security is enabled.
|
||||||
alwaysUseDelegationTokensForTests = conf.getBoolean(
|
alwaysUseDelegationTokensForTests = conf.getBoolean(
|
||||||
@ -1498,7 +1504,7 @@ public static List<URI> getSharedEditsDirs(Configuration conf) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** Threshold (ms) for long holding write lock report. */
|
/** Threshold (ms) for long holding write lock report. */
|
||||||
static final short WRITELOCK_REPORTING_THRESHOLD = 1000;
|
private long writeLockReportingThreshold;
|
||||||
/** Last time stamp for write lock. Keep the longest one for multi-entrance.*/
|
/** Last time stamp for write lock. Keep the longest one for multi-entrance.*/
|
||||||
private long writeLockHeldTimeStamp;
|
private long writeLockHeldTimeStamp;
|
||||||
|
|
||||||
@ -1532,7 +1538,7 @@ public void writeUnlock() {
|
|||||||
|
|
||||||
this.fsLock.writeLock().unlock();
|
this.fsLock.writeLock().unlock();
|
||||||
|
|
||||||
if (needReport && writeLockInterval >= WRITELOCK_REPORTING_THRESHOLD) {
|
if (needReport && writeLockInterval >= this.writeLockReportingThreshold) {
|
||||||
LOG.info("FSNamesystem write lock held for " + writeLockInterval +
|
LOG.info("FSNamesystem write lock held for " + writeLockInterval +
|
||||||
" ms via\n" + StringUtils.getStackTrace(Thread.currentThread()));
|
" ms via\n" + StringUtils.getStackTrace(Thread.currentThread()));
|
||||||
}
|
}
|
||||||
|
@ -2614,6 +2614,15 @@
|
|||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>dfs.namenode.write-lock-reporting-threshold-ms</name>
|
||||||
|
<value>1000</value>
|
||||||
|
<description>When a write lock is held on the namenode for a long time,
|
||||||
|
this will be logged as the lock is released. This sets how long the
|
||||||
|
lock must be held for logging to occur.
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>dfs.namenode.startup.delay.block.deletion.sec</name>
|
<name>dfs.namenode.startup.delay.block.deletion.sec</name>
|
||||||
<value>0</value>
|
<value>0</value>
|
||||||
|
@ -290,7 +290,10 @@ public Boolean get() {
|
|||||||
*/
|
*/
|
||||||
@Test(timeout=45000)
|
@Test(timeout=45000)
|
||||||
public void testFSLockLongHoldingReport() throws Exception {
|
public void testFSLockLongHoldingReport() throws Exception {
|
||||||
|
final long writeLockReportingThreshold = 100L;
|
||||||
Configuration conf = new Configuration();
|
Configuration conf = new Configuration();
|
||||||
|
conf.setLong(DFSConfigKeys.DFS_NAMENODE_WRITE_LOCK_REPORTING_THRESHOLD_MS_KEY,
|
||||||
|
writeLockReportingThreshold);
|
||||||
FSImage fsImage = Mockito.mock(FSImage.class);
|
FSImage fsImage = Mockito.mock(FSImage.class);
|
||||||
FSEditLog fsEditLog = Mockito.mock(FSEditLog.class);
|
FSEditLog fsEditLog = Mockito.mock(FSEditLog.class);
|
||||||
Mockito.when(fsImage.getEditLog()).thenReturn(fsEditLog);
|
Mockito.when(fsImage.getEditLog()).thenReturn(fsEditLog);
|
||||||
@ -301,32 +304,32 @@ public void testFSLockLongHoldingReport() throws Exception {
|
|||||||
|
|
||||||
// Don't report if the write lock is held for a short time
|
// Don't report if the write lock is held for a short time
|
||||||
fsn.writeLock();
|
fsn.writeLock();
|
||||||
Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2);
|
Thread.sleep(writeLockReportingThreshold / 2);
|
||||||
fsn.writeUnlock();
|
fsn.writeUnlock();
|
||||||
assertFalse(logs.getOutput().contains(GenericTestUtils.getMethodName()));
|
assertFalse(logs.getOutput().contains(GenericTestUtils.getMethodName()));
|
||||||
|
|
||||||
|
|
||||||
// Report if the write lock is held for a long time
|
// Report if the write lock is held for a long time
|
||||||
fsn.writeLock();
|
fsn.writeLock();
|
||||||
Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD + 100);
|
Thread.sleep(writeLockReportingThreshold + 10);
|
||||||
logs.clearOutput();
|
logs.clearOutput();
|
||||||
fsn.writeUnlock();
|
fsn.writeUnlock();
|
||||||
assertTrue(logs.getOutput().contains(GenericTestUtils.getMethodName()));
|
assertTrue(logs.getOutput().contains(GenericTestUtils.getMethodName()));
|
||||||
|
|
||||||
// Report if the write lock is held (interruptibly) for a long time
|
// Report if the write lock is held (interruptibly) for a long time
|
||||||
fsn.writeLockInterruptibly();
|
fsn.writeLockInterruptibly();
|
||||||
Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD + 100);
|
Thread.sleep(writeLockReportingThreshold + 10);
|
||||||
logs.clearOutput();
|
logs.clearOutput();
|
||||||
fsn.writeUnlock();
|
fsn.writeUnlock();
|
||||||
assertTrue(logs.getOutput().contains(GenericTestUtils.getMethodName()));
|
assertTrue(logs.getOutput().contains(GenericTestUtils.getMethodName()));
|
||||||
|
|
||||||
// Report if it's held for a long time when re-entering write lock
|
// Report if it's held for a long time when re-entering write lock
|
||||||
fsn.writeLock();
|
fsn.writeLock();
|
||||||
Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2 + 1);
|
Thread.sleep(writeLockReportingThreshold/ 2 + 1);
|
||||||
fsn.writeLockInterruptibly();
|
fsn.writeLockInterruptibly();
|
||||||
Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2 + 1);
|
Thread.sleep(writeLockReportingThreshold / 2 + 1);
|
||||||
fsn.writeLock();
|
fsn.writeLock();
|
||||||
Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2);
|
Thread.sleep(writeLockReportingThreshold / 2);
|
||||||
logs.clearOutput();
|
logs.clearOutput();
|
||||||
fsn.writeUnlock();
|
fsn.writeUnlock();
|
||||||
assertFalse(logs.getOutput().contains(GenericTestUtils.getMethodName()));
|
assertFalse(logs.getOutput().contains(GenericTestUtils.getMethodName()));
|
||||||
|
Loading…
Reference in New Issue
Block a user