HDFS-12516. Suppress the fsnamesystem lock warning on nn startup. Contributed by Ajay Kumar.

This commit is contained in:
Anu Engineer 2017-09-24 21:03:06 -07:00
parent 415e5a157a
commit d0b2c5850b
3 changed files with 76 additions and 5 deletions

View File

@ -1095,7 +1095,7 @@ private void loadFSImage(StartupOption startOpt) throws IOException {
if (!success) {
fsImage.close();
}
writeUnlock("loadFSImage");
writeUnlock("loadFSImage", true);
}
imageLoadComplete();
}
@ -1586,6 +1586,11 @@ public void writeUnlock() {
public void writeUnlock(String opName) {
this.fsLock.writeUnlock(opName);
}
public void writeUnlock(String opName, boolean suppressWriteLockReport) {
this.fsLock.writeUnlock(opName, suppressWriteLockReport);
}
@Override
public boolean hasWriteLock() {
return this.fsLock.isWriteLockedByCurrentThread();

View File

@ -207,13 +207,34 @@ public void writeLockInterruptibly() throws InterruptedException {
}
}
/**
* Unlocks FSNameSystem write lock. This internally calls {@link
* FSNamesystemLock#writeUnlock(String, boolean)}
*/
public void writeUnlock() {
writeUnlock(OP_NAME_OTHER);
writeUnlock(OP_NAME_OTHER, false);
}
/**
* Unlocks FSNameSystem write lock. This internally calls {@link
* FSNamesystemLock#writeUnlock(String, boolean)}
*
* @param opName Operation name.
*/
public void writeUnlock(String opName) {
final boolean needReport = coarseLock.getWriteHoldCount() == 1 &&
coarseLock.isWriteLockedByCurrentThread();
writeUnlock(opName, false);
}
/**
* Unlocks FSNameSystem write lock.
*
* @param opName Operation name
* @param suppressWriteLockReport When false, event of write lock being held
* for long time will be logged in logs and metrics.
*/
public void writeUnlock(String opName, boolean suppressWriteLockReport) {
final boolean needReport = !suppressWriteLockReport && coarseLock
.getWriteHoldCount() == 1 && coarseLock.isWriteLockedByCurrentThread();
final long currentTimeNanos = timer.monotonicNowNanos();
final long writeLockIntervalNanos =
currentTimeNanos - writeLockHeldTimeStampNanos;

View File

@ -38,6 +38,7 @@
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.regex.Pattern;
import org.slf4j.LoggerFactory;
import static org.junit.Assert.*;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_FSLOCK_FAIR_KEY;
@ -347,7 +348,7 @@ public void testDetailedHoldMetrics() throws Exception {
fsLock.writeLock();
timer.advance(1);
fsLock.writeUnlock("baz");
fsLock.writeUnlock("baz", false);
MetricsRecordBuilder rb = MetricsAsserts.mockMetricsRecordBuilder();
rates.snapshot(rb, true);
@ -360,4 +361,48 @@ public void testDetailedHoldMetrics() throws Exception {
assertCounter("FSNWriteLockBazNanosNumOps", 1L, rb);
}
/**
* Test to suppress FSNameSystem write lock report when it is held for long
* time.
*/
@Test(timeout = 45000)
public void testFSWriteLockReportSuppressed() throws Exception {
final long writeLockReportingThreshold = 1L;
final long writeLockSuppressWarningInterval = 10L;
Configuration conf = new Configuration();
conf.setLong(
DFSConfigKeys.DFS_NAMENODE_WRITE_LOCK_REPORTING_THRESHOLD_MS_KEY,
writeLockReportingThreshold);
conf.setTimeDuration(DFSConfigKeys.DFS_LOCK_SUPPRESS_WARNING_INTERVAL_KEY,
writeLockSuppressWarningInterval, TimeUnit.MILLISECONDS);
final FakeTimer timer = new FakeTimer();
final FSNamesystemLock fsnLock = new FSNamesystemLock(conf, null, timer);
timer.advance(writeLockSuppressWarningInterval);
LogCapturer logs = LogCapturer.captureLogs(FSNamesystem.LOG);
GenericTestUtils
.setLogLevel(LoggerFactory.getLogger(FSNamesystem.class.getName()),
org.slf4j.event.Level.INFO);
// Should trigger the write lock report
fsnLock.writeLock();
timer.advance(writeLockReportingThreshold + 100);
fsnLock.writeUnlock();
assertTrue(logs.getOutput().contains(
"FSNamesystem write lock held for"));
logs.clearOutput();
// Suppress report if the write lock is held for a long time
fsnLock.writeLock();
timer.advance(writeLockReportingThreshold + 100);
fsnLock.writeUnlock("testFSWriteLockReportSuppressed", true);
assertFalse(logs.getOutput().contains(GenericTestUtils.getMethodName()));
assertFalse(logs.getOutput().contains(
"Number of suppressed write-lock reports:"));
assertFalse(logs.getOutput().contains(
"FSNamesystem write lock held for"));
}
}