HDFS-9145. Tracking methods that hold FSNamesytemLock for too long. Contributed by Mingliang Liu.
This commit is contained in:
parent
59e9980117
commit
d1e1925bf6
@ -196,6 +196,10 @@ public void stopCapturing() {
|
|||||||
logger.removeAppender(appender);
|
logger.removeAppender(appender);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void clearOutput() {
|
||||||
|
sw.getBuffer().setLength(0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1547,6 +1547,9 @@ Release 2.8.0 - UNRELEASED
|
|||||||
HDFS-9139. Enable parallel JUnit tests for HDFS Pre-commit
|
HDFS-9139. Enable parallel JUnit tests for HDFS Pre-commit
|
||||||
(Chris Nauroth and Vinayakumar B via vinayakumarb)
|
(Chris Nauroth and Vinayakumar B via vinayakumarb)
|
||||||
|
|
||||||
|
HDFS-9145. Tracking methods that hold FSNamesytemLock for too long.
|
||||||
|
(Mingliang Liu via wheat9)
|
||||||
|
|
||||||
BUG FIXES
|
BUG FIXES
|
||||||
|
|
||||||
HDFS-7501. TransactionsSinceLastCheckpoint can be negative on SBNs.
|
HDFS-7501. TransactionsSinceLastCheckpoint can be negative on SBNs.
|
||||||
|
@ -1458,6 +1458,11 @@ public static List<URI> getSharedEditsDirs(Configuration conf) {
|
|||||||
return Util.stringCollectionAsURIs(dirNames);
|
return Util.stringCollectionAsURIs(dirNames);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Threshold (ms) for long holding write lock report. */
|
||||||
|
static final short WRITELOCK_REPORTING_THRESHOLD = 1000;
|
||||||
|
/** Last time stamp for write lock. Keep the longest one for multi-entrance.*/
|
||||||
|
private long writeLockHeldTimeStamp;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void readLock() {
|
public void readLock() {
|
||||||
this.fsLock.readLock().lock();
|
this.fsLock.readLock().lock();
|
||||||
@ -1469,14 +1474,30 @@ public void readUnlock() {
|
|||||||
@Override
|
@Override
|
||||||
public void writeLock() {
|
public void writeLock() {
|
||||||
this.fsLock.writeLock().lock();
|
this.fsLock.writeLock().lock();
|
||||||
|
if (fsLock.getWriteHoldCount() == 1) {
|
||||||
|
writeLockHeldTimeStamp = monotonicNow();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
@Override
|
@Override
|
||||||
public void writeLockInterruptibly() throws InterruptedException {
|
public void writeLockInterruptibly() throws InterruptedException {
|
||||||
this.fsLock.writeLock().lockInterruptibly();
|
this.fsLock.writeLock().lockInterruptibly();
|
||||||
|
if (fsLock.getWriteHoldCount() == 1) {
|
||||||
|
writeLockHeldTimeStamp = monotonicNow();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
@Override
|
@Override
|
||||||
public void writeUnlock() {
|
public void writeUnlock() {
|
||||||
|
final boolean needReport = fsLock.getWriteHoldCount() == 1 &&
|
||||||
|
fsLock.isWriteLockedByCurrentThread();
|
||||||
this.fsLock.writeLock().unlock();
|
this.fsLock.writeLock().unlock();
|
||||||
|
|
||||||
|
if (needReport) {
|
||||||
|
long writeLockInterval = monotonicNow() - writeLockHeldTimeStamp;
|
||||||
|
if (writeLockInterval >= WRITELOCK_REPORTING_THRESHOLD) {
|
||||||
|
LOG.info("FSNamesystem write lock held for " + writeLockInterval +
|
||||||
|
" ms via\n" + StringUtils.getStackTrace(Thread.currentThread()));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
@Override
|
@Override
|
||||||
public boolean hasWriteLock() {
|
public boolean hasWriteLock() {
|
||||||
|
@ -39,6 +39,9 @@
|
|||||||
import org.apache.hadoop.hdfs.server.namenode.ha.HAContext;
|
import org.apache.hadoop.hdfs.server.namenode.ha.HAContext;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.ha.HAState;
|
import org.apache.hadoop.hdfs.server.namenode.ha.HAState;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
|
import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
|
||||||
|
import org.apache.hadoop.test.GenericTestUtils;
|
||||||
|
import org.apache.hadoop.test.GenericTestUtils.LogCapturer;
|
||||||
|
import org.apache.log4j.Level;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
@ -268,6 +271,59 @@ public void run() {
|
|||||||
threadCount, rwLock.getQueueLength());
|
threadCount, rwLock.getQueueLength());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test when FSNamesystem lock is held for a long time, logger will report it.
|
||||||
|
*/
|
||||||
|
@Test(timeout=45000)
|
||||||
|
public void testFSLockLongHoldingReport() throws Exception {
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
FSImage fsImage = Mockito.mock(FSImage.class);
|
||||||
|
FSEditLog fsEditLog = Mockito.mock(FSEditLog.class);
|
||||||
|
Mockito.when(fsImage.getEditLog()).thenReturn(fsEditLog);
|
||||||
|
FSNamesystem fsn = new FSNamesystem(conf, fsImage);
|
||||||
|
|
||||||
|
LogCapturer logs = LogCapturer.captureLogs(FSNamesystem.LOG);
|
||||||
|
GenericTestUtils.setLogLevel(FSNamesystem.LOG, Level.INFO);
|
||||||
|
|
||||||
|
// Don't report if the write lock is held for a short time
|
||||||
|
fsn.writeLock();
|
||||||
|
Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2);
|
||||||
|
fsn.writeUnlock();
|
||||||
|
assertFalse(logs.getOutput().contains(GenericTestUtils.getMethodName()));
|
||||||
|
|
||||||
|
|
||||||
|
// Report if the write lock is held for a long time
|
||||||
|
fsn.writeLock();
|
||||||
|
Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD + 100);
|
||||||
|
logs.clearOutput();
|
||||||
|
fsn.writeUnlock();
|
||||||
|
assertTrue(logs.getOutput().contains(GenericTestUtils.getMethodName()));
|
||||||
|
|
||||||
|
// Report if the write lock is held (interruptibly) for a long time
|
||||||
|
fsn.writeLockInterruptibly();
|
||||||
|
Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD + 100);
|
||||||
|
logs.clearOutput();
|
||||||
|
fsn.writeUnlock();
|
||||||
|
assertTrue(logs.getOutput().contains(GenericTestUtils.getMethodName()));
|
||||||
|
|
||||||
|
// Report if it's held for a long time when re-entering write lock
|
||||||
|
fsn.writeLock();
|
||||||
|
Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2 + 1);
|
||||||
|
fsn.writeLockInterruptibly();
|
||||||
|
Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2 + 1);
|
||||||
|
fsn.writeLock();
|
||||||
|
Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2);
|
||||||
|
logs.clearOutput();
|
||||||
|
fsn.writeUnlock();
|
||||||
|
assertFalse(logs.getOutput().contains(GenericTestUtils.getMethodName()));
|
||||||
|
logs.clearOutput();
|
||||||
|
fsn.writeUnlock();
|
||||||
|
assertFalse(logs.getOutput().contains(GenericTestUtils.getMethodName()));
|
||||||
|
logs.clearOutput();
|
||||||
|
fsn.writeUnlock();
|
||||||
|
assertTrue(logs.getOutput().contains(GenericTestUtils.getMethodName()));
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSafemodeReplicationConf() throws IOException {
|
public void testSafemodeReplicationConf() throws IOException {
|
||||||
Configuration conf = new Configuration();
|
Configuration conf = new Configuration();
|
||||||
|
Loading…
Reference in New Issue
Block a user