From 6f4b0d33ca339e3724623a1d23c101f8cfd3cdd5 Mon Sep 17 00:00:00 2001 From: Zhe Zhang Date: Wed, 31 Aug 2016 15:40:01 -0700 Subject: [PATCH] HDFS-10817. Add Logging for Long-held NN Read Locks. Contributed by Erik Krogen. --- .../org/apache/hadoop/hdfs/DFSConfigKeys.java | 6 +- .../hdfs/server/namenode/FSNamesystem.java | 32 ++++ .../src/main/resources/hdfs-default.xml | 9 ++ .../server/namenode/TestFSNamesystem.java | 144 +++++++++++++++++- 4 files changed, 188 insertions(+), 3 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index 2eff3b0026..f2ab321c78 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -407,10 +407,14 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final long DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_DEFAULT = 25; - // Threshold for how long a write lock must be held for the event to be logged + // Threshold for how long namenode locks must be held for the + // event to be logged public static final String DFS_NAMENODE_WRITE_LOCK_REPORTING_THRESHOLD_MS_KEY = "dfs.namenode.write-lock-reporting-threshold-ms"; public static final long DFS_NAMENODE_WRITE_LOCK_REPORTING_THRESHOLD_MS_DEFAULT = 1000L; + public static final String DFS_NAMENODE_READ_LOCK_REPORTING_THRESHOLD_MS_KEY = + "dfs.namenode.read-lock-reporting-threshold-ms"; + public static final long DFS_NAMENODE_READ_LOCK_REPORTING_THRESHOLD_MS_DEFAULT = 5000L; public static final String DFS_UPGRADE_DOMAIN_FACTOR = "dfs.namenode.upgrade.domain.factor"; public static final int DFS_UPGRADE_DOMAIN_FACTOR_DEFAULT = DFS_REPLICATION_DEFAULT; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index f4b742e050..3b14eecfb1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -73,6 +73,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_WRITE_LOCK_REPORTING_THRESHOLD_MS_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_WRITE_LOCK_REPORTING_THRESHOLD_MS_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_READ_LOCK_REPORTING_THRESHOLD_MS_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_READ_LOCK_REPORTING_THRESHOLD_MS_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_DEFAULT; @@ -824,6 +826,9 @@ static FSNamesystem loadFromDisk(Configuration conf) throws IOException { this.writeLockReportingThreshold = conf.getLong( DFS_NAMENODE_WRITE_LOCK_REPORTING_THRESHOLD_MS_KEY, DFS_NAMENODE_WRITE_LOCK_REPORTING_THRESHOLD_MS_DEFAULT); + this.readLockReportingThreshold = conf.getLong( + DFS_NAMENODE_READ_LOCK_REPORTING_THRESHOLD_MS_KEY, + DFS_NAMENODE_READ_LOCK_REPORTING_THRESHOLD_MS_DEFAULT); // For testing purposes, allow the DT secret manager to be started regardless // of whether security is enabled. @@ -1507,14 +1512,41 @@ public static List getSharedEditsDirs(Configuration conf) { private long writeLockReportingThreshold; /** Last time stamp for write lock. Keep the longest one for multi-entrance.*/ private long writeLockHeldTimeStamp; + /** Threshold (ms) for long holding read lock report. */ + private long readLockReportingThreshold; + /** + * Last time stamp for read lock. Keep the longest one for + * multi-entrance. This is ThreadLocal since there could be + * many read locks held simultaneously. + */ + private static ThreadLocal readLockHeldTimeStamp = + new ThreadLocal() { + @Override + public Long initialValue() { + return Long.MAX_VALUE; + } + }; @Override public void readLock() { this.fsLock.readLock().lock(); + if (this.fsLock.getReadHoldCount() == 1) { + readLockHeldTimeStamp.set(monotonicNow()); + } } @Override public void readUnlock() { + final boolean needReport = this.fsLock.getReadHoldCount() == 1; + final long readLockInterval = monotonicNow() - readLockHeldTimeStamp.get(); this.fsLock.readLock().unlock(); + + if (needReport) { + readLockHeldTimeStamp.remove(); + if (readLockInterval > this.readLockReportingThreshold) { + LOG.info("FSNamesystem read lock held for " + readLockInterval + + " ms via\n" + StringUtils.getStackTrace(Thread.currentThread())); + } + } } @Override public void writeLock() { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index b9a081223f..0c2c7435e1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -2623,6 +2623,15 @@ + + dfs.namenode.read-lock-reporting-threshold-ms + 5000 + When a read lock is held on the namenode for a long time, + this will be logged as the lock is released. This sets how long the + lock must be held for logging to occur. + + + dfs.namenode.startup.delay.block.deletion.sec 0 diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java index df9001da27..ccc8a6e42c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java @@ -28,6 +28,7 @@ import java.io.IOException; import java.net.InetAddress; import java.net.URI; +import java.util.ArrayList; import java.util.Collection; import com.google.common.base.Supplier; @@ -58,6 +59,7 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeoutException; +import java.util.regex.Pattern; public class TestFSNamesystem { @@ -286,10 +288,11 @@ public Boolean get() { } /** - * Test when FSNamesystem lock is held for a long time, logger will report it. + * Test when FSNamesystem write lock is held for a long time, + * logger will report it. */ @Test(timeout=45000) - public void testFSLockLongHoldingReport() throws Exception { + public void testFSWriteLockLongHoldingReport() throws Exception { final long writeLockReportingThreshold = 100L; Configuration conf = new Configuration(); conf.setLong(DFSConfigKeys.DFS_NAMENODE_WRITE_LOCK_REPORTING_THRESHOLD_MS_KEY, @@ -341,6 +344,143 @@ public void testFSLockLongHoldingReport() throws Exception { assertTrue(logs.getOutput().contains(GenericTestUtils.getMethodName())); } + /** + * Test when FSNamesystem read lock is held for a long time, + * logger will report it. + */ + @Test(timeout=45000) + public void testFSReadLockLongHoldingReport() throws Exception { + final long readLockReportingThreshold = 100L; + final String readLockLogStmt = "FSNamesystem read lock held for "; + Configuration conf = new Configuration(); + conf.setLong( + DFSConfigKeys.DFS_NAMENODE_READ_LOCK_REPORTING_THRESHOLD_MS_KEY, + readLockReportingThreshold); + FSImage fsImage = Mockito.mock(FSImage.class); + FSEditLog fsEditLog = Mockito.mock(FSEditLog.class); + Mockito.when(fsImage.getEditLog()).thenReturn(fsEditLog); + FSNamesystem fsn = new FSNamesystem(conf, fsImage); + + LogCapturer logs = LogCapturer.captureLogs(FSNamesystem.LOG); + GenericTestUtils.setLogLevel(FSNamesystem.LOG, Level.INFO); + + // Don't report if the read lock is held for a short time + fsn.readLock(); + Thread.sleep(readLockReportingThreshold / 2); + fsn.readUnlock(); + assertFalse(logs.getOutput().contains(GenericTestUtils.getMethodName()) && + logs.getOutput().contains(readLockLogStmt)); + + // Report if the read lock is held for a long time + fsn.readLock(); + Thread.sleep(readLockReportingThreshold + 10); + logs.clearOutput(); + fsn.readUnlock(); + assertTrue(logs.getOutput().contains(GenericTestUtils.getMethodName()) + && logs.getOutput().contains(readLockLogStmt)); + + // Report if it's held for a long time when re-entering read lock + fsn.readLock(); + Thread.sleep(readLockReportingThreshold / 2 + 1); + fsn.readLock(); + Thread.sleep(readLockReportingThreshold / 2 + 1); + logs.clearOutput(); + fsn.readUnlock(); + assertFalse(logs.getOutput().contains(GenericTestUtils.getMethodName()) || + logs.getOutput().contains(readLockLogStmt)); + logs.clearOutput(); + fsn.readUnlock(); + assertTrue(logs.getOutput().contains(GenericTestUtils.getMethodName()) && + logs.getOutput().contains(readLockLogStmt)); + + // Report if it's held for a long time while another thread also has the + // read lock. Let one thread hold the lock long enough to activate an + // alert, then have another thread grab the read lock to ensure that this + // doesn't reset the timing. + logs.clearOutput(); + CountDownLatch barrier = new CountDownLatch(1); + CountDownLatch barrier2 = new CountDownLatch(1); + Thread t1 = new Thread() { + @Override + public void run() { + try { + fsn.readLock(); + Thread.sleep(readLockReportingThreshold + 1); + barrier.countDown(); // Allow for t2 to acquire the read lock + barrier2.await(); // Wait until t2 has the read lock + fsn.readUnlock(); + } catch (InterruptedException e) { + fail("Interrupted during testing"); + } + } + }; + Thread t2 = new Thread() { + @Override + public void run() { + try { + barrier.await(); // Wait until t1 finishes sleeping + fsn.readLock(); + barrier2.countDown(); // Allow for t1 to unlock + fsn.readUnlock(); + } catch (InterruptedException e) { + fail("Interrupted during testing"); + } + } + }; + t1.start(); + t2.start(); + t1.join(); + t2.join(); + Pattern t1Pattern = Pattern.compile( + String.format("\\Q%s\\E.+%s", t1.getName(), readLockLogStmt)); + assertTrue(t1Pattern.matcher(logs.getOutput()).find()); + Pattern t2Pattern = Pattern.compile( + String.format("\\Q%s\\E.+%s", t2.getName(), readLockLogStmt)); + assertFalse(t2Pattern.matcher(logs.getOutput()).find()); + + // Spin up a bunch of threads all grabbing the lock at once; assign some + // to go over threshold and some under. Check that they all log correctly. + logs.clearOutput(); + final int threadCount = 50; + List threads = new ArrayList<>(threadCount); + for (int i = 0; i < threadCount; i++) { + threads.add(new Thread() { + @Override + public void run() { + try { + long sleepTime; + if (this.getName().hashCode() % 2 == 0) { + sleepTime = readLockReportingThreshold + 10; + } else { + sleepTime = readLockReportingThreshold / 2; + } + fsn.readLock(); + Thread.sleep(sleepTime); + fsn.readUnlock(); + } catch (InterruptedException e) { + fail("Interrupted during testing"); + } + } + }); + } + for (Thread t : threads) { + t.start(); + } + for (Thread t : threads) { + t.join(); + } + for (Thread t : threads) { + Pattern p = Pattern.compile( + String.format("\\Q%s\\E.+%s", t.getName(), readLockLogStmt)); + boolean foundLog = p.matcher(logs.getOutput()).find(); + if (t.getName().hashCode() % 2 == 0) { + assertTrue(foundLog); + } else { + assertFalse(foundLog); + } + } + } + @Test public void testSafemodeReplicationConf() throws IOException { Configuration conf = new Configuration();