From 407b519fb14f79f19ebc4fbdf08204336a7acf77 Mon Sep 17 00:00:00 2001 From: Zhe Zhang Date: Fri, 26 Aug 2016 14:19:55 -0700 Subject: [PATCH] HDFS-10798. Make the threshold of reporting FSNamesystem lock contention configurable. Contributed by Erik Krogen. --- .../org/apache/hadoop/hdfs/DFSConfigKeys.java | 5 +++++ .../hadoop/hdfs/server/namenode/FSNamesystem.java | 10 ++++++++-- .../src/main/resources/hdfs-default.xml | 9 +++++++++ .../hdfs/server/namenode/TestFSNamesystem.java | 15 +++++++++------ 4 files changed, 31 insertions(+), 8 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index 3385751b59..b4cce4af66 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -407,6 +407,11 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final long DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_DEFAULT = 25; + // Threshold for how long a write lock must be held for the event to be logged + public static final String DFS_NAMENODE_WRITE_LOCK_REPORTING_THRESHOLD_MS_KEY = + "dfs.namenode.write-lock-reporting-threshold-ms"; + public static final long DFS_NAMENODE_WRITE_LOCK_REPORTING_THRESHOLD_MS_DEFAULT = 1000L; + public static final String DFS_UPGRADE_DOMAIN_FACTOR = "dfs.namenode.upgrade.domain.factor"; public static final int DFS_UPGRADE_DOMAIN_FACTOR_DEFAULT = DFS_REPLICATION_DEFAULT; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 29f09b9692..05fd709cfb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -71,6 +71,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_WRITE_LOCK_REPORTING_THRESHOLD_MS_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_WRITE_LOCK_REPORTING_THRESHOLD_MS_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_DEFAULT; @@ -819,6 +821,10 @@ static FSNamesystem loadFromDisk(Configuration conf) throws IOException { DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_KEY, DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_DEFAULT); + this.writeLockReportingThreshold = conf.getLong( + DFS_NAMENODE_WRITE_LOCK_REPORTING_THRESHOLD_MS_KEY, + DFS_NAMENODE_WRITE_LOCK_REPORTING_THRESHOLD_MS_DEFAULT); + // For testing purposes, allow the DT secret manager to be started regardless // of whether security is enabled. alwaysUseDelegationTokensForTests = conf.getBoolean( @@ -1498,7 +1504,7 @@ public static List getSharedEditsDirs(Configuration conf) { } /** Threshold (ms) for long holding write lock report. */ - static final short WRITELOCK_REPORTING_THRESHOLD = 1000; + private long writeLockReportingThreshold; /** Last time stamp for write lock. Keep the longest one for multi-entrance.*/ private long writeLockHeldTimeStamp; @@ -1532,7 +1538,7 @@ public void writeUnlock() { this.fsLock.writeLock().unlock(); - if (needReport && writeLockInterval >= WRITELOCK_REPORTING_THRESHOLD) { + if (needReport && writeLockInterval >= this.writeLockReportingThreshold) { LOG.info("FSNamesystem write lock held for " + writeLockInterval + " ms via\n" + StringUtils.getStackTrace(Thread.currentThread())); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index e6fde8c393..ce880d332e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -2614,6 +2614,15 @@ + + dfs.namenode.write-lock-reporting-threshold-ms + 1000 + When a write lock is held on the namenode for a long time, + this will be logged as the lock is released. This sets how long the + lock must be held for logging to occur. + + + dfs.namenode.startup.delay.block.deletion.sec 0 diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java index 572b40d0f3..df9001da27 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java @@ -290,7 +290,10 @@ public Boolean get() { */ @Test(timeout=45000) public void testFSLockLongHoldingReport() throws Exception { + final long writeLockReportingThreshold = 100L; Configuration conf = new Configuration(); + conf.setLong(DFSConfigKeys.DFS_NAMENODE_WRITE_LOCK_REPORTING_THRESHOLD_MS_KEY, + writeLockReportingThreshold); FSImage fsImage = Mockito.mock(FSImage.class); FSEditLog fsEditLog = Mockito.mock(FSEditLog.class); Mockito.when(fsImage.getEditLog()).thenReturn(fsEditLog); @@ -301,32 +304,32 @@ public void testFSLockLongHoldingReport() throws Exception { // Don't report if the write lock is held for a short time fsn.writeLock(); - Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2); + Thread.sleep(writeLockReportingThreshold / 2); fsn.writeUnlock(); assertFalse(logs.getOutput().contains(GenericTestUtils.getMethodName())); // Report if the write lock is held for a long time fsn.writeLock(); - Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD + 100); + Thread.sleep(writeLockReportingThreshold + 10); logs.clearOutput(); fsn.writeUnlock(); assertTrue(logs.getOutput().contains(GenericTestUtils.getMethodName())); // Report if the write lock is held (interruptibly) for a long time fsn.writeLockInterruptibly(); - Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD + 100); + Thread.sleep(writeLockReportingThreshold + 10); logs.clearOutput(); fsn.writeUnlock(); assertTrue(logs.getOutput().contains(GenericTestUtils.getMethodName())); // Report if it's held for a long time when re-entering write lock fsn.writeLock(); - Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2 + 1); + Thread.sleep(writeLockReportingThreshold/ 2 + 1); fsn.writeLockInterruptibly(); - Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2 + 1); + Thread.sleep(writeLockReportingThreshold / 2 + 1); fsn.writeLock(); - Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2); + Thread.sleep(writeLockReportingThreshold / 2); logs.clearOutput(); fsn.writeUnlock(); assertFalse(logs.getOutput().contains(GenericTestUtils.getMethodName()));