HDFS-9145. Tracking methods that hold FSNamesytemLock for too long. Contributed by Mingliang Liu.
This commit is contained in:
parent
59e9980117
commit
d1e1925bf6
@ -196,6 +196,10 @@ public void stopCapturing() {
|
||||
logger.removeAppender(appender);
|
||||
|
||||
}
|
||||
|
||||
public void clearOutput() {
|
||||
sw.getBuffer().setLength(0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -1547,6 +1547,9 @@ Release 2.8.0 - UNRELEASED
|
||||
HDFS-9139. Enable parallel JUnit tests for HDFS Pre-commit
|
||||
(Chris Nauroth and Vinayakumar B via vinayakumarb)
|
||||
|
||||
HDFS-9145. Tracking methods that hold FSNamesytemLock for too long.
|
||||
(Mingliang Liu via wheat9)
|
||||
|
||||
BUG FIXES
|
||||
|
||||
HDFS-7501. TransactionsSinceLastCheckpoint can be negative on SBNs.
|
||||
|
@ -1458,6 +1458,11 @@ public static List<URI> getSharedEditsDirs(Configuration conf) {
|
||||
return Util.stringCollectionAsURIs(dirNames);
|
||||
}
|
||||
|
||||
/** Threshold (ms) for long holding write lock report. */
|
||||
static final short WRITELOCK_REPORTING_THRESHOLD = 1000;
|
||||
/** Last time stamp for write lock. Keep the longest one for multi-entrance.*/
|
||||
private long writeLockHeldTimeStamp;
|
||||
|
||||
@Override
|
||||
public void readLock() {
|
||||
this.fsLock.readLock().lock();
|
||||
@ -1469,14 +1474,30 @@ public void readUnlock() {
|
||||
@Override
|
||||
public void writeLock() {
|
||||
this.fsLock.writeLock().lock();
|
||||
if (fsLock.getWriteHoldCount() == 1) {
|
||||
writeLockHeldTimeStamp = monotonicNow();
|
||||
}
|
||||
}
|
||||
@Override
|
||||
public void writeLockInterruptibly() throws InterruptedException {
|
||||
this.fsLock.writeLock().lockInterruptibly();
|
||||
if (fsLock.getWriteHoldCount() == 1) {
|
||||
writeLockHeldTimeStamp = monotonicNow();
|
||||
}
|
||||
}
|
||||
@Override
|
||||
public void writeUnlock() {
|
||||
final boolean needReport = fsLock.getWriteHoldCount() == 1 &&
|
||||
fsLock.isWriteLockedByCurrentThread();
|
||||
this.fsLock.writeLock().unlock();
|
||||
|
||||
if (needReport) {
|
||||
long writeLockInterval = monotonicNow() - writeLockHeldTimeStamp;
|
||||
if (writeLockInterval >= WRITELOCK_REPORTING_THRESHOLD) {
|
||||
LOG.info("FSNamesystem write lock held for " + writeLockInterval +
|
||||
" ms via\n" + StringUtils.getStackTrace(Thread.currentThread()));
|
||||
}
|
||||
}
|
||||
}
|
||||
@Override
|
||||
public boolean hasWriteLock() {
|
||||
|
@ -39,6 +39,9 @@
|
||||
import org.apache.hadoop.hdfs.server.namenode.ha.HAContext;
|
||||
import org.apache.hadoop.hdfs.server.namenode.ha.HAState;
|
||||
import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
|
||||
import org.apache.hadoop.test.GenericTestUtils;
|
||||
import org.apache.hadoop.test.GenericTestUtils.LogCapturer;
|
||||
import org.apache.log4j.Level;
|
||||
import org.junit.After;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
@ -268,6 +271,59 @@ public void run() {
|
||||
threadCount, rwLock.getQueueLength());
|
||||
}
|
||||
|
||||
/**
|
||||
* Test when FSNamesystem lock is held for a long time, logger will report it.
|
||||
*/
|
||||
@Test(timeout=45000)
|
||||
public void testFSLockLongHoldingReport() throws Exception {
|
||||
Configuration conf = new Configuration();
|
||||
FSImage fsImage = Mockito.mock(FSImage.class);
|
||||
FSEditLog fsEditLog = Mockito.mock(FSEditLog.class);
|
||||
Mockito.when(fsImage.getEditLog()).thenReturn(fsEditLog);
|
||||
FSNamesystem fsn = new FSNamesystem(conf, fsImage);
|
||||
|
||||
LogCapturer logs = LogCapturer.captureLogs(FSNamesystem.LOG);
|
||||
GenericTestUtils.setLogLevel(FSNamesystem.LOG, Level.INFO);
|
||||
|
||||
// Don't report if the write lock is held for a short time
|
||||
fsn.writeLock();
|
||||
Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2);
|
||||
fsn.writeUnlock();
|
||||
assertFalse(logs.getOutput().contains(GenericTestUtils.getMethodName()));
|
||||
|
||||
|
||||
// Report if the write lock is held for a long time
|
||||
fsn.writeLock();
|
||||
Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD + 100);
|
||||
logs.clearOutput();
|
||||
fsn.writeUnlock();
|
||||
assertTrue(logs.getOutput().contains(GenericTestUtils.getMethodName()));
|
||||
|
||||
// Report if the write lock is held (interruptibly) for a long time
|
||||
fsn.writeLockInterruptibly();
|
||||
Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD + 100);
|
||||
logs.clearOutput();
|
||||
fsn.writeUnlock();
|
||||
assertTrue(logs.getOutput().contains(GenericTestUtils.getMethodName()));
|
||||
|
||||
// Report if it's held for a long time when re-entering write lock
|
||||
fsn.writeLock();
|
||||
Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2 + 1);
|
||||
fsn.writeLockInterruptibly();
|
||||
Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2 + 1);
|
||||
fsn.writeLock();
|
||||
Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2);
|
||||
logs.clearOutput();
|
||||
fsn.writeUnlock();
|
||||
assertFalse(logs.getOutput().contains(GenericTestUtils.getMethodName()));
|
||||
logs.clearOutput();
|
||||
fsn.writeUnlock();
|
||||
assertFalse(logs.getOutput().contains(GenericTestUtils.getMethodName()));
|
||||
logs.clearOutput();
|
||||
fsn.writeUnlock();
|
||||
assertTrue(logs.getOutput().contains(GenericTestUtils.getMethodName()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSafemodeReplicationConf() throws IOException {
|
||||
Configuration conf = new Configuration();
|
||||
|
Loading…
Reference in New Issue
Block a user