From ff0b99eafeda035ebe0dc82cfe689808047a8893 Mon Sep 17 00:00:00 2001 From: Zhe Zhang Date: Mon, 14 Nov 2016 11:05:29 -0800 Subject: [PATCH] HDFS-10872. Add MutableRate metrics for FSNamesystemLock operations. Contributed by Erik Krogen. --- .../hadoop/metrics2/lib/MetricsRegistry.java | 8 + .../metrics2/lib/MutableMetricsFactory.java | 4 +- .../org/apache/hadoop/hdfs/DFSConfigKeys.java | 4 + .../hdfs/server/namenode/CacheManager.java | 2 +- .../hdfs/server/namenode/Checkpointer.java | 2 +- .../ContentSummaryComputationContext.java | 2 +- .../hdfs/server/namenode/FSEditLogLoader.java | 4 +- .../hdfs/server/namenode/FSNamesystem.java | 482 ++++++++++-------- .../server/namenode/FSNamesystemLock.java | 59 ++- .../hdfs/server/namenode/LeaseManager.java | 2 +- .../hdfs/server/namenode/NamenodeFsck.java | 2 +- .../src/main/resources/hdfs-default.xml | 8 + .../server/namenode/TestFSNamesystemLock.java | 59 ++- 13 files changed, 410 insertions(+), 228 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MetricsRegistry.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MetricsRegistry.java index 6e7e5ab7cc..1ef74f41c0 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MetricsRegistry.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MetricsRegistry.java @@ -281,6 +281,14 @@ public synchronized MutableRate newRate(String name, String desc, return ret; } + public synchronized MutableRatesWithAggregation newRatesWithAggregation( + String name) { + checkMetricName(name); + MutableRatesWithAggregation rates = new MutableRatesWithAggregation(); + metricsMap.put(name, rates); + return rates; + } + synchronized void add(String name, MutableMetric metric) { checkMetricName(name); metricsMap.put(name, metric); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableMetricsFactory.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableMetricsFactory.java index b926c4b9b0..a5767723ef 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableMetricsFactory.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableMetricsFactory.java @@ -68,9 +68,7 @@ MutableMetric newForField(Field field, Metric annotation, return new MutableRates(registry); } if (cls == MutableRatesWithAggregation.class) { - MutableRatesWithAggregation rates = new MutableRatesWithAggregation(); - registry.add(info.name(), rates); - return rates; + return registry.newRatesWithAggregation(info.name()); } if (cls == MutableStat.class) { return registry.newStat(info.name(), info.description(), diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index 63771846a4..b1fb3f4ec9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -415,6 +415,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final long DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_DEFAULT = 25; + public static final String DFS_NAMENODE_LOCK_DETAILED_METRICS_KEY = + "dfs.namenode.lock.detailed-metrics.enabled"; + public static final boolean DFS_NAMENODE_LOCK_DETAILED_METRICS_DEFAULT = + false; // Threshold for how long namenode locks must be held for the // event to be logged public static final String DFS_NAMENODE_WRITE_LOCK_REPORTING_THRESHOLD_MS_KEY = diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java index fa8f011139..515a3631d3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java @@ -959,7 +959,7 @@ public final void processCacheReport(final DatanodeID datanodeID, processCacheReportImpl(datanode, blockIds); } finally { endTime = Time.monotonicNow(); - namesystem.writeUnlock(); + namesystem.writeUnlock("processCacheReport"); } // Log the block report processing stats from Namenode perspective diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java index a7820496bd..9ff40cc95c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java @@ -264,7 +264,7 @@ void doCheckpoint() throws IOException { bnStorage.writeAll(); } } finally { - backupNode.namesystem.writeUnlock(); + backupNode.namesystem.writeUnlock("doCheckpoint"); } if(cpCmd.needToReturnImage()) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ContentSummaryComputationContext.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ContentSummaryComputationContext.java index 4208b53dfa..53fa552077 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ContentSummaryComputationContext.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ContentSummaryComputationContext.java @@ -115,7 +115,7 @@ public boolean yield() { // unlock dir.readUnlock(); - fsn.readUnlock(); + fsn.readUnlock("contentSummary"); try { Thread.sleep(sleepMilliSec, sleepNanoSec); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java index 9f8687bc44..2990344958 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java @@ -155,7 +155,7 @@ long loadFSEdits(EditLogInputStream edits, long expectedStartingTxId, return numEdits; } finally { edits.close(); - fsNamesys.writeUnlock(); + fsNamesys.writeUnlock("loadFSEdits"); prog.endStep(Phase.LOADING_EDITS, step); } } @@ -292,7 +292,7 @@ long loadEditRecords(EditLogInputStream in, boolean closeOnExit, in.close(); } fsDir.writeUnlock(); - fsNamesys.writeUnlock(); + fsNamesys.writeUnlock("loadEditRecords"); if (LOG.isTraceEnabled()) { LOG.trace("replaying edit log finished"); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 7e3fabe16c..06a0564fa7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -268,6 +268,8 @@ import org.apache.hadoop.metrics2.annotation.Metric; import org.apache.hadoop.metrics2.annotation.Metrics; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.apache.hadoop.metrics2.lib.MetricsRegistry; +import org.apache.hadoop.metrics2.lib.MutableRatesWithAggregation; import org.apache.hadoop.metrics2.util.MBeans; import org.apache.hadoop.net.Node; import org.apache.hadoop.security.AccessControlException; @@ -326,6 +328,9 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, NameNodeMXBean { public static final Log LOG = LogFactory.getLog(FSNamesystem.class); + private final MetricsRegistry registry = new MetricsRegistry("FSNamesystem"); + @Metric final MutableRatesWithAggregation detailedLockHoldTimeMetrics = + registry.newRatesWithAggregation("detailedLockHoldTimeMetrics"); boolean isAuditEnabled() { return (!isDefaultAuditLogger || auditLog.isInfoEnabled()) @@ -557,7 +562,7 @@ void setImageLoaded() { dir.markNameCacheInitialized(); cond.signalAll(); } finally { - writeUnlock(); + writeUnlock("setImageLoaded"); } } @@ -709,7 +714,7 @@ static FSNamesystem loadFromDisk(Configuration conf) throws IOException { LOG.info("Enabling async auditlog"); enableAsyncAuditLog(); } - fsLock = new FSNamesystemLock(conf); + fsLock = new FSNamesystemLock(conf, detailedLockHoldTimeMetrics); cond = fsLock.newWriteLockCondition(); cpLock = new ReentrantLock(); @@ -1046,7 +1051,7 @@ private void loadFSImage(StartupOption startOpt) throws IOException { if (!success) { fsImage.close(); } - writeUnlock(); + writeUnlock("loadFSImage"); } imageLoadComplete(); } @@ -1098,7 +1103,7 @@ void startCommonServices(Configuration conf, HAContext haContext) throws IOExcep completeBlocksTotal); blockManager.activate(conf, completeBlocksTotal); } finally { - writeUnlock(); + writeUnlock("startCommonServices"); } registerMXBean(); @@ -1122,7 +1127,7 @@ void stopCommonServices() { try { if (blockManager != null) blockManager.close(); } finally { - writeUnlock(); + writeUnlock("stopCommonServices"); } RetryCache.clear(retryCache); } @@ -1217,7 +1222,7 @@ void startActiveServices() throws IOException { } finally { startingActiveService = false; blockManager.checkSafeMode(); - writeUnlock(); + writeUnlock("startActiveServices"); } } @@ -1280,7 +1285,7 @@ void stopActiveServices() { blockManager.setInitializedReplQueues(false); } } finally { - writeUnlock(); + writeUnlock("stopActiveServices"); } } @@ -1511,6 +1516,9 @@ public void readLock() { public void readUnlock() { this.fsLock.readUnlock(); } + public void readUnlock(String opName) { + this.fsLock.readUnlock(opName); + } @Override public void writeLock() { this.fsLock.writeLock(); @@ -1523,6 +1531,9 @@ public void writeLockInterruptibly() throws InterruptedException { public void writeUnlock() { this.fsLock.writeUnlock(); } + public void writeUnlock(String opName) { + this.fsLock.writeUnlock(opName); + } @Override public boolean hasWriteLock() { return this.fsLock.isWriteLockedByCurrentThread(); @@ -1561,7 +1572,7 @@ NamespaceInfo getNamespaceInfo() { try { return unprotectedGetNamespaceInfo(); } finally { - readUnlock(); + readUnlock("getNamespaceInfo"); } } @@ -1639,7 +1650,7 @@ public BlocksWithLocations getBlocks(DatanodeID datanode, long size) checkOperation(OperationCategory.READ); return getBlockManager().getBlocksWithLocations(datanode, size); } finally { - readUnlock(); + readUnlock("getBlocks"); } } @@ -1659,7 +1670,7 @@ void metaSave(String filename) throws IOException { out.flush(); out.close(); } finally { - writeUnlock(); + writeUnlock("metaSave"); } } @@ -1697,6 +1708,7 @@ FsServerDefaults getServerDefaults() throws StandbyException { * @throws IOException */ void setPermission(String src, FsPermission permission) throws IOException { + final String operationName = "setPermission"; HdfsFileStatus auditStat; checkOperation(OperationCategory.WRITE); writeLock(); @@ -1705,13 +1717,13 @@ void setPermission(String src, FsPermission permission) throws IOException { checkNameNodeSafeMode("Cannot set permission for " + src); auditStat = FSDirAttrOp.setPermission(dir, src, permission); } catch (AccessControlException e) { - logAuditEvent(false, "setPermission", src); + logAuditEvent(false, operationName, src); throw e; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(true, "setPermission", src, null, auditStat); + logAuditEvent(true, operationName, src, null, auditStat); } /** @@ -1720,6 +1732,7 @@ void setPermission(String src, FsPermission permission) throws IOException { */ void setOwner(String src, String username, String group) throws IOException { + final String operationName = "setOwner"; HdfsFileStatus auditStat; checkOperation(OperationCategory.WRITE); writeLock(); @@ -1728,13 +1741,13 @@ void setOwner(String src, String username, String group) checkNameNodeSafeMode("Cannot set owner for " + src); auditStat = FSDirAttrOp.setOwner(dir, src, username, group); } catch (AccessControlException e) { - logAuditEvent(false, "setOwner", src); + logAuditEvent(false, operationName, src); throw e; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(true, "setOwner", src, null, auditStat); + logAuditEvent(true, operationName, src, null, auditStat); } /** @@ -1743,6 +1756,7 @@ void setOwner(String src, String username, String group) */ LocatedBlocks getBlockLocations(String clientMachine, String srcArg, long offset, long length) throws IOException { + final String operationName = "open"; checkOperation(OperationCategory.READ); GetBlockLocationsResult res = null; FSPermissionChecker pc = getPermissionChecker(); @@ -1767,13 +1781,13 @@ LocatedBlocks getBlockLocations(String clientMachine, String srcArg, } } } catch (AccessControlException e) { - logAuditEvent(false, "open", srcArg); + logAuditEvent(false, operationName, srcArg); throw e; } finally { - readUnlock(); + readUnlock(operationName); } - logAuditEvent(true, "open", srcArg); + logAuditEvent(true, operationName, srcArg); if (!isInSafeMode() && res.updateAccessTime()) { String src = srcArg; @@ -1814,7 +1828,7 @@ LocatedBlocks getBlockLocations(String clientMachine, String srcArg, } catch (Throwable e) { LOG.warn("Failed to update the access time of " + src, e); } finally { - writeUnlock(); + writeUnlock(operationName); } } @@ -1855,6 +1869,7 @@ private void sortLocatedBlocks(String clientMachine, LocatedBlocks blocks) { */ void concat(String target, String [] srcs, boolean logRetryCache) throws IOException { + final String operationName = "concat"; HdfsFileStatus stat = null; boolean success = false; writeLock(); @@ -1864,15 +1879,16 @@ void concat(String target, String [] srcs, boolean logRetryCache) stat = FSDirConcatOp.concat(dir, target, srcs, logRetryCache); success = true; } catch (AccessControlException ace) { - logAuditEvent(success, "concat", Arrays.toString(srcs), target, stat); + logAuditEvent(success, operationName, Arrays.toString(srcs), + target, stat); throw ace; } finally { - writeUnlock(); + writeUnlock(operationName); if (success) { getEditLog().logSync(); } } - logAuditEvent(success, "concat", Arrays.toString(srcs), target, stat); + logAuditEvent(success, operationName, Arrays.toString(srcs), target, stat); } /** @@ -1881,6 +1897,7 @@ void concat(String target, String [] srcs, boolean logRetryCache) * written to the edits log but is not flushed. */ void setTimes(String src, long mtime, long atime) throws IOException { + final String operationName = "setTimes"; HdfsFileStatus auditStat; checkOperation(OperationCategory.WRITE); writeLock(); @@ -1889,13 +1906,13 @@ void setTimes(String src, long mtime, long atime) throws IOException { checkNameNodeSafeMode("Cannot set times " + src); auditStat = FSDirAttrOp.setTimes(dir, src, mtime, atime); } catch (AccessControlException e) { - logAuditEvent(false, "setTimes", src); + logAuditEvent(false, operationName, src); throw e; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(true, "setTimes", src, null, auditStat); + logAuditEvent(true, operationName, src, null, auditStat); } /** @@ -1911,6 +1928,7 @@ boolean truncate(String src, long newLength, String clientName, String clientMachine, long mtime) throws IOException, UnresolvedLinkException { + final String operationName = "truncate"; requireEffectiveLayoutVersionForFeature(Feature.TRUNCATE); final FSDirTruncateOp.TruncateResult r; try { @@ -1930,16 +1948,16 @@ boolean truncate(String src, long newLength, String clientName, r = FSDirTruncateOp.truncate(this, src, newLength, clientName, clientMachine, mtime, toRemoveBlocks, pc); } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); if (!toRemoveBlocks.getToDeleteList().isEmpty()) { removeBlocks(toRemoveBlocks); toRemoveBlocks.clear(); } - logAuditEvent(true, "truncate", src, null, r.getFileStatus()); + logAuditEvent(true, operationName, src, null, r.getFileStatus()); } catch (AccessControlException e) { - logAuditEvent(false, "truncate", src); + logAuditEvent(false, operationName, src); throw e; } return r.getResult(); @@ -1951,6 +1969,7 @@ boolean truncate(String src, long newLength, String clientName, void createSymlink(String target, String link, PermissionStatus dirPerms, boolean createParent, boolean logRetryCache) throws IOException { + final String operationName = "createSymlink"; if (!FileSystem.areSymlinksEnabled()) { throw new UnsupportedOperationException("Symlinks not supported"); } @@ -1962,13 +1981,13 @@ void createSymlink(String target, String link, auditStat = FSDirSymlinkOp.createSymlinkInt(this, target, link, dirPerms, createParent, logRetryCache); } catch (AccessControlException e) { - logAuditEvent(false, "createSymlink", link, target, null); + logAuditEvent(false, operationName, link, target, null); throw e; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(true, "createSymlink", link, target, auditStat); + logAuditEvent(true, operationName, link, target, auditStat); } /** @@ -1986,6 +2005,7 @@ void createSymlink(String target, String link, */ boolean setReplication(final String src, final short replication) throws IOException { + final String operationName = "setReplication"; boolean success = false; checkOperation(OperationCategory.WRITE); writeLock(); @@ -1994,14 +2014,14 @@ boolean setReplication(final String src, final short replication) checkNameNodeSafeMode("Cannot set replication for " + src); success = FSDirAttrOp.setReplication(dir, blockManager, src, replication); } catch (AccessControlException e) { - logAuditEvent(false, "setReplication", src); + logAuditEvent(false, operationName, src); throw e; } finally { - writeUnlock(); + writeUnlock(operationName); } if (success) { getEditLog().logSync(); - logAuditEvent(true, "setReplication", src); + logAuditEvent(true, operationName, src); } return success; } @@ -2013,6 +2033,7 @@ boolean setReplication(final String src, final short replication) * @param policyName storage policy name */ void setStoragePolicy(String src, String policyName) throws IOException { + final String operationName = "setStoragePolicy"; HdfsFileStatus auditStat; checkOperation(OperationCategory.WRITE); writeLock(); @@ -2022,13 +2043,13 @@ void setStoragePolicy(String src, String policyName) throws IOException { auditStat = FSDirAttrOp.setStoragePolicy(dir, blockManager, src, policyName); } catch (AccessControlException e) { - logAuditEvent(false, "setStoragePolicy", src); + logAuditEvent(false, operationName, src); throw e; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(true, "setStoragePolicy", src, null, auditStat); + logAuditEvent(true, operationName, src, null, auditStat); } /** @@ -2037,6 +2058,7 @@ void setStoragePolicy(String src, String policyName) throws IOException { * @param src file/directory path */ void unsetStoragePolicy(String src) throws IOException { + final String operationName = "unsetStoragePolicy"; HdfsFileStatus auditStat; checkOperation(OperationCategory.WRITE); writeLock(); @@ -2045,13 +2067,13 @@ void unsetStoragePolicy(String src) throws IOException { checkNameNodeSafeMode("Cannot unset storage policy for " + src); auditStat = FSDirAttrOp.unsetStoragePolicy(dir, blockManager, src); } catch (AccessControlException e) { - logAuditEvent(false, "unsetStoragePolicy", src); + logAuditEvent(false, operationName, src); throw e; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(true, "unsetStoragePolicy", src, null, auditStat); + logAuditEvent(true, operationName, src, null, auditStat); } /** * Get the storage policy for a file or a directory. @@ -2067,7 +2089,7 @@ BlockStoragePolicy getStoragePolicy(String src) throws IOException { checkOperation(OperationCategory.READ); return FSDirAttrOp.getStoragePolicy(dir, blockManager, src); } finally { - readUnlock(); + readUnlock("getStoragePolicy"); } } @@ -2081,7 +2103,7 @@ BlockStoragePolicy[] getStoragePolicies() throws IOException { checkOperation(OperationCategory.READ); return FSDirAttrOp.getStoragePolicies(blockManager); } finally { - readUnlock(); + readUnlock("getStoragePolicies"); } } @@ -2092,7 +2114,7 @@ long getPreferredBlockSize(String src) throws IOException { checkOperation(OperationCategory.READ); return FSDirAttrOp.getPreferredBlockSize(dir, src); } finally { - readUnlock(); + readUnlock("getPreferredBlockSize"); } } @@ -2244,7 +2266,7 @@ private HdfsFileStatus startFileInt(String src, dir.writeUnlock(); } } finally { - writeUnlock(); + writeUnlock("create"); // There might be transactions logged while trying to recover the lease. // They need to be sync'ed even when an exception was thrown. if (!skipSync) { @@ -2296,7 +2318,7 @@ boolean recoverLease(String src, String holder, String clientMachine) skipSync = true; throw se; } finally { - writeUnlock(); + writeUnlock("recoverLease"); // There might be transactions logged while trying to recover the lease. // They need to be sync'ed even when an exception was thrown. if (!skipSync) { @@ -2402,6 +2424,7 @@ boolean recoverLeaseInternal(RecoverLeaseOp op, INodesInPath iip, LastBlockWithStatus appendFile(String srcArg, String holder, String clientMachine, EnumSet flag, boolean logRetryCache) throws IOException { + final String operationName = "append"; boolean newBlock = flag.contains(CreateFlag.NEW_BLOCK); if (newBlock) { requireEffectiveLayoutVersionForFeature(Feature.APPEND_NEW_BLOCK); @@ -2425,17 +2448,17 @@ LastBlockWithStatus appendFile(String srcArg, String holder, skipSync = true; throw se; } finally { - writeUnlock(); + writeUnlock(operationName); // There might be transactions logged while trying to recover the lease // They need to be sync'ed even when an exception was thrown. if (!skipSync) { getEditLog().logSync(); } } - logAuditEvent(true, "append", srcArg); + logAuditEvent(true, operationName, srcArg); return lbs; } catch (AccessControlException e) { - logAuditEvent(false, "append", srcArg); + logAuditEvent(false, operationName, srcArg); throw e; } } @@ -2463,6 +2486,7 @@ LocatedBlock getAdditionalBlock( String src, long fileId, String clientName, ExtendedBlock previous, DatanodeInfo[] excludedNodes, String[] favoredNodes, EnumSet flags) throws IOException { + final String operationName = "getAdditionalBlock"; NameNode.stateChangeLog.debug("BLOCK* getAdditionalBlock: {} inodeId {}" + " for {}", src, fileId, clientName); @@ -2476,7 +2500,7 @@ LocatedBlock getAdditionalBlock( r = FSDirWriteFileOp.validateAddBlock(this, pc, src, fileId, clientName, previous, onRetryBlock); } finally { - readUnlock(); + readUnlock(operationName); } if (r == null) { @@ -2496,7 +2520,7 @@ LocatedBlock getAdditionalBlock( lb = FSDirWriteFileOp.storeAllocatedBlock( this, src, fileId, clientName, previous, targets); } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); return lb; @@ -2542,7 +2566,7 @@ LocatedBlock getAdditionalDatanode(String src, long fileId, "src=%s, fileId=%d, blk=%s, clientName=%s, clientMachine=%s", src, fileId, blk, clientName, clientMachine)); } finally { - readUnlock(); + readUnlock("getAdditionalDatanode"); } if (clientnode == null) { @@ -2576,7 +2600,7 @@ void abandonBlock(ExtendedBlock b, long fileId, String src, String holder) NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: {} is " + "removed from pendingCreates", b); } finally { - writeUnlock(); + writeUnlock("abandonBlock"); } getEditLog().logSync(); } @@ -2640,7 +2664,7 @@ boolean completeFile(final String src, String holder, success = FSDirWriteFileOp.completeFile(this, pc, src, holder, last, fileId); } finally { - writeUnlock(); + writeUnlock("completeFile"); } getEditLog().logSync(); if (success) { @@ -2708,6 +2732,7 @@ private boolean checkBlocksComplete(String src, boolean allowCommittedBlock, @Deprecated boolean renameTo(String src, String dst, boolean logRetryCache) throws IOException { + final String operationName = "rename"; FSDirRenameOp.RenameResult ret = null; writeLock(); try { @@ -2715,15 +2740,15 @@ boolean renameTo(String src, String dst, boolean logRetryCache) checkNameNodeSafeMode("Cannot rename " + src); ret = FSDirRenameOp.renameToInt(dir, src, dst, logRetryCache); } catch (AccessControlException e) { - logAuditEvent(false, "rename", src, dst, null); + logAuditEvent(false, operationName, src, dst, null); throw e; } finally { - writeUnlock(); + writeUnlock(operationName); } boolean success = ret.success; if (success) { getEditLog().logSync(); - logAuditEvent(success, "rename", src, dst, ret.auditStat); + logAuditEvent(success, operationName, src, dst, ret.auditStat); } return success; } @@ -2731,6 +2756,7 @@ boolean renameTo(String src, String dst, boolean logRetryCache) void renameTo(final String src, final String dst, boolean logRetryCache, Options.Rename... options) throws IOException { + final String operationName = "rename"; FSDirRenameOp.RenameResult res = null; writeLock(); try { @@ -2738,11 +2764,11 @@ void renameTo(final String src, final String dst, checkNameNodeSafeMode("Cannot rename " + src); res = FSDirRenameOp.renameToInt(dir, src, dst, logRetryCache, options); } catch (AccessControlException e) { - logAuditEvent(false, "rename (options=" + Arrays.toString(options) + - ")", src, dst, null); + logAuditEvent(false, operationName + " (options=" + + Arrays.toString(options) + ")", src, dst, null); throw e; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); @@ -2753,8 +2779,8 @@ void renameTo(final String src, final String dst, collectedBlocks.clear(); } - logAuditEvent(true, "rename (options=" + Arrays.toString(options) + - ")", src, dst, res.auditStat); + logAuditEvent(true, operationName + " (options=" + + Arrays.toString(options) + ")", src, dst, res.auditStat); } /** @@ -2765,6 +2791,7 @@ void renameTo(final String src, final String dst, */ boolean delete(String src, boolean recursive, boolean logRetryCache) throws IOException { + final String operationName = "delete"; BlocksMapUpdateInfo toRemovedBlocks = null; writeLock(); boolean ret = false; @@ -2775,16 +2802,16 @@ boolean delete(String src, boolean recursive, boolean logRetryCache) this, src, recursive, logRetryCache); ret = toRemovedBlocks != null; } catch (AccessControlException e) { - logAuditEvent(false, "delete", src); + logAuditEvent(false, operationName, src); throw e; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); if (toRemovedBlocks != null) { removeBlocks(toRemovedBlocks); // Incremental deletion of blocks } - logAuditEvent(true, "delete", src); + logAuditEvent(true, operationName, src); return ret; } @@ -2812,7 +2839,7 @@ void removeBlocks(BlocksMapUpdateInfo blocks) { blockManager.removeBlock(iter.next()); } } finally { - writeUnlock(); + writeUnlock("removeBlocks"); } } } @@ -2863,6 +2890,7 @@ void removeLeasesAndINodes(List removedUCFiles, */ HdfsFileStatus getFileInfo(final String src, boolean resolveLink) throws IOException { + final String operationName = "getfileinfo"; checkOperation(OperationCategory.READ); HdfsFileStatus stat = null; readLock(); @@ -2870,12 +2898,12 @@ HdfsFileStatus getFileInfo(final String src, boolean resolveLink) checkOperation(OperationCategory.READ); stat = FSDirStatAndListingOp.getFileInfo(dir, src, resolveLink); } catch (AccessControlException e) { - logAuditEvent(false, "getfileinfo", src); + logAuditEvent(false, operationName, src); throw e; } finally { - readUnlock(); + readUnlock(operationName); } - logAuditEvent(true, "getfileinfo", src); + logAuditEvent(true, operationName, src); return stat; } @@ -2883,16 +2911,17 @@ HdfsFileStatus getFileInfo(final String src, boolean resolveLink) * Returns true if the file is closed */ boolean isFileClosed(final String src) throws IOException { + final String operationName = "isFileClosed"; checkOperation(OperationCategory.READ); readLock(); try { checkOperation(OperationCategory.READ); return FSDirStatAndListingOp.isFileClosed(dir, src); } catch (AccessControlException e) { - logAuditEvent(false, "isFileClosed", src); + logAuditEvent(false, operationName, src); throw e; } finally { - readUnlock(); + readUnlock(operationName); } } @@ -2901,6 +2930,7 @@ boolean isFileClosed(final String src) throws IOException { */ boolean mkdirs(String src, PermissionStatus permissions, boolean createParent) throws IOException { + final String operationName = "mkdirs"; HdfsFileStatus auditStat = null; checkOperation(OperationCategory.WRITE); writeLock(); @@ -2909,13 +2939,13 @@ boolean mkdirs(String src, PermissionStatus permissions, checkNameNodeSafeMode("Cannot create directory " + src); auditStat = FSDirMkdirOp.mkdirs(this, src, permissions, createParent); } catch (AccessControlException e) { - logAuditEvent(false, "mkdirs", src); + logAuditEvent(false, operationName, src); throw e; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(true, "mkdirs", src, null, auditStat); + logAuditEvent(true, operationName, src, null, auditStat); return true; } @@ -2935,6 +2965,7 @@ boolean mkdirs(String src, PermissionStatus permissions, */ ContentSummary getContentSummary(final String src) throws IOException { checkOperation(OperationCategory.READ); + final String operationName = "contentSummary"; readLock(); boolean success = true; ContentSummary cs; @@ -2943,12 +2974,12 @@ ContentSummary getContentSummary(final String src) throws IOException { cs = FSDirStatAndListingOp.getContentSummary(dir, src); } catch (AccessControlException ace) { success = false; - logAuditEvent(success, "contentSummary", src); + logAuditEvent(success, operationName, src); throw ace; } finally { - readUnlock(); + readUnlock(operationName); } - logAuditEvent(success, "contentSummary", src); + logAuditEvent(success, operationName, src); return cs; } @@ -2968,6 +2999,7 @@ ContentSummary getContentSummary(final String src) throws IOException { */ QuotaUsage getQuotaUsage(final String src) throws IOException { checkOperation(OperationCategory.READ); + final String operationName = "quotaUsage"; QuotaUsage quotaUsage; readLock(); boolean success = true; @@ -2976,12 +3008,12 @@ QuotaUsage getQuotaUsage(final String src) throws IOException { quotaUsage = FSDirStatAndListingOp.getQuotaUsage(dir, src); } catch (AccessControlException ace) { success = false; - logAuditEvent(success, "quotaUsage", src); + logAuditEvent(success, operationName, src); throw ace; } finally { - readUnlock(); + readUnlock(operationName); } - logAuditEvent(success, "quotaUsage", src); + logAuditEvent(success, operationName, src); return quotaUsage; } @@ -2998,6 +3030,7 @@ void setQuota(String src, long nsQuota, long ssQuota, StorageType type) requireEffectiveLayoutVersionForFeature(Feature.QUOTA_BY_STORAGE_TYPE); } checkOperation(OperationCategory.WRITE); + final String operationName = "setQuota"; writeLock(); boolean success = false; try { @@ -3006,15 +3039,15 @@ void setQuota(String src, long nsQuota, long ssQuota, StorageType type) FSDirAttrOp.setQuota(dir, src, nsQuota, ssQuota, type); success = true; } catch (AccessControlException ace) { - logAuditEvent(success, "setQuota", src); + logAuditEvent(success, operationName, src); throw ace; } finally { - writeUnlock(); + writeUnlock(operationName); if (success) { getEditLog().logSync(); } } - logAuditEvent(success, "setQuota", src); + logAuditEvent(success, operationName, src); } /** Persist all metadata about this file. @@ -3045,7 +3078,7 @@ void fsync(String src, long fileId, String clientName, long lastBlockLength) } FSDirWriteFileOp.persistBlocks(dir, src, pendingFile, false); } finally { - writeUnlock(); + writeUnlock("fsync"); } getEditLog().logSync(); } @@ -3463,7 +3496,7 @@ void commitBlockSynchronization(ExtendedBlock oldBlock, FSDirWriteFileOp.persistBlocks(dir, src, iFile, false); } } finally { - writeUnlock(); + writeUnlock("commitBlockSynchronization"); } getEditLog().logSync(); if (closeFile) { @@ -3507,7 +3540,7 @@ void renewLease(String holder) throws IOException { checkNameNodeSafeMode("Cannot renew lease for " + holder); leaseManager.renewLease(holder); } finally { - readUnlock(); + readUnlock("renewLease"); } } @@ -3527,18 +3560,19 @@ DirectoryListing getListing(String src, byte[] startAfter, boolean needLocation) throws IOException { checkOperation(OperationCategory.READ); + final String operationName = "listStatus"; DirectoryListing dl = null; readLock(); try { checkOperation(NameNode.OperationCategory.READ); dl = getListingInt(dir, src, startAfter, needLocation); } catch (AccessControlException e) { - logAuditEvent(false, "listStatus", src); + logAuditEvent(false, operationName, src); throw e; } finally { - readUnlock(); + readUnlock(operationName); } - logAuditEvent(true, "listStatus", src); + logAuditEvent(true, operationName, src); return dl; } @@ -3575,7 +3609,7 @@ void registerDatanode(DatanodeRegistration nodeReg) throws IOException { try { blockManager.registerDatanode(nodeReg); } finally { - writeUnlock(); + writeUnlock("registerDatanode"); } } @@ -3625,7 +3659,7 @@ nodeReg, reports, getBlockPoolId(), cacheCapacity, cacheUsed, return new HeartbeatResponse(cmds, haState, rollingUpgradeInfo, blockReportLeaseId); } finally { - readUnlock(); + readUnlock("handleHeartbeat"); } } @@ -3819,7 +3853,7 @@ private void clearCorruptLazyPersistFiles() } } } finally { - writeUnlock(); + writeUnlock("clearCorruptLazyPersistFiles"); } if (changed) { getEditLog().logSync(); @@ -4025,7 +4059,7 @@ int getNumberOfDatanodes(DatanodeReportType type) { return getBlockManager().getDatanodeManager().getDatanodeListForReport( type).size(); } finally { - readUnlock(); + readUnlock("getNumberOfDatanodes"); } } @@ -4046,7 +4080,7 @@ DatanodeInfo[] datanodeReport(final DatanodeReportType type } return arr; } finally { - readUnlock(); + readUnlock("datanodeReport"); } } @@ -4069,7 +4103,7 @@ DatanodeStorageReport[] getDatanodeStorageReport(final DatanodeReportType type } return reports; } finally { - readUnlock(); + readUnlock("getDatanodeStorageReport"); } } @@ -4095,7 +4129,7 @@ boolean saveNamespace(final long timeWindow, final long txGap) } saved = getFSImage().saveNamespace(timeWindow, txGap, this); } finally { - readUnlock(); + readUnlock("saveNamespace"); cpUnlock(); } if (saved) { @@ -4128,7 +4162,7 @@ boolean restoreFailedStorage(String arg) throws AccessControlException, return val; } finally { - writeUnlock(); + writeUnlock("restoreFailedStorage"); cpUnlock(); } } @@ -4146,7 +4180,7 @@ void finalizeUpgrade() throws IOException { checkOperation(OperationCategory.UNCHECKED); getFSImage().finalizeUpgrade(this.isHaEnabled() && inActiveState()); } finally { - writeUnlock(); + writeUnlock("finalizeUpgrade"); cpUnlock(); } } @@ -4222,7 +4256,7 @@ public long getCompleteBlocksTotal() { numUCBlocks = leaseManager.getNumUnderConstructionBlocks(); return getBlocksTotal() - numUCBlocks; } finally { - readUnlock(); + readUnlock("getCompleteBlocksTotal"); } } @@ -4261,7 +4295,7 @@ void enterSafeMode(boolean resourcesLow) throws IOException { NameNode.stateChangeLog.info("STATE* Safe mode is ON.\n" + getSafeModeTip()); } finally { - writeUnlock(); + writeUnlock("enterSafeMode"); } } @@ -4281,7 +4315,7 @@ void leaveSafeMode(boolean force) { startSecretManagerIfNecessary(); } } finally { - writeUnlock(); + writeUnlock("leaveSafeMode"); } } @@ -4326,7 +4360,7 @@ CheckpointSignature rollEditLog() throws IOException { } return getFSImage().rollEditLog(getEffectiveLayoutVersion()); } finally { - writeUnlock(); + writeUnlock("rollEditLog"); } } @@ -4344,7 +4378,7 @@ NamenodeCommand startCheckpoint(NamenodeRegistration backupNode, getEditLog().logSync(); return cmd; } finally { - writeUnlock(); + writeUnlock("startCheckpoint"); } } @@ -4355,7 +4389,7 @@ public void processIncrementalBlockReport(final DatanodeID nodeID, try { blockManager.processIncrementalBlockReport(nodeID, srdb); } finally { - writeUnlock(); + writeUnlock("processIncrementalBlockReport"); } } @@ -4369,7 +4403,7 @@ void endCheckpoint(NamenodeRegistration registration, LOG.info("End checkpoint for " + registration.getAddress()); getFSImage().endCheckpoint(sig); } finally { - readUnlock(); + readUnlock("endCheckpoint"); } } @@ -4763,7 +4797,7 @@ void reportBadBlocks(LocatedBlock[] blocks) throws IOException { } } } finally { - writeUnlock(); + writeUnlock("reportBadBlocks"); } } @@ -4799,7 +4833,7 @@ LocatedBlock bumpBlockGenerationStamp(ExtendedBlock block, blockManager.setBlockToken(locatedBlock, BlockTokenIdentifier.AccessMode.WRITE); } finally { - writeUnlock(); + writeUnlock("bumpBlockGenerationStamp"); } // Ensure we record the new generation stamp getEditLog().logSync(); @@ -4834,7 +4868,7 @@ void updatePipeline( updatePipelineInternal(clientName, oldBlock, newBlock, newNodes, newStorageIDs, logRetryCache); } finally { - writeUnlock(); + writeUnlock("updatePipeline"); } getEditLog().logSync(); LOG.info("updatePipeline(" + oldBlock.getLocalBlock() + " => " @@ -4908,7 +4942,7 @@ void registerBackupNode(NamenodeRegistration bnReg, bnReg, nnReg); } } finally { - writeUnlock(); + writeUnlock("registerBackupNode"); } } @@ -4933,7 +4967,7 @@ void releaseBackupNode(NamenodeRegistration registration) " node namespaceID = " + registration.getNamespaceID()); getEditLog().releaseBackupStream(registration); } finally { - writeUnlock(); + writeUnlock("releaseBackupNode"); } } @@ -5018,7 +5052,7 @@ Collection listCorruptFileBlocks(String path, } return corruptFiles; } finally { - readUnlock(); + readUnlock("listCorruptFileBlocks"); } } @@ -5102,7 +5136,7 @@ Token getDelegationToken(Text renewer) long expiryTime = dtSecretManager.getTokenExpiryTime(dtId); getEditLog().logGetDelegationToken(dtId, expiryTime); } finally { - writeUnlock(); + writeUnlock("getDelegationToken"); } getEditLog().logSync(); return token; @@ -5136,7 +5170,7 @@ long renewDelegationToken(Token token) id.readFields(in); getEditLog().logRenewDelegationToken(id, expiryTime); } finally { - writeUnlock(); + writeUnlock("renewDelegationToken"); } getEditLog().logSync(); return expiryTime; @@ -5160,7 +5194,7 @@ void cancelDelegationToken(Token token) .cancelToken(token, canceller); getEditLog().logCancelDelegationToken(id); } finally { - writeUnlock(); + writeUnlock("cancelDelegationToken"); } getEditLog().logSync(); } @@ -5803,6 +5837,7 @@ public SnapshotManager getSnapshotManager() { /** Allow snapshot on a directory. */ void allowSnapshot(String path) throws IOException { checkOperation(OperationCategory.WRITE); + final String operationName = "allowSnapshot"; boolean success = false; writeLock(); try { @@ -5812,15 +5847,16 @@ void allowSnapshot(String path) throws IOException { FSDirSnapshotOp.allowSnapshot(dir, snapshotManager, path); success = true; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(success, "allowSnapshot", path, null, null); + logAuditEvent(success, operationName, path, null, null); } /** Disallow snapshot on a directory. */ void disallowSnapshot(String path) throws IOException { checkOperation(OperationCategory.WRITE); + final String operationName = "disallowSnapshot"; boolean success = false; writeLock(); try { @@ -5830,10 +5866,10 @@ void disallowSnapshot(String path) throws IOException { FSDirSnapshotOp.disallowSnapshot(dir, snapshotManager, path); success = true; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(success, "disallowSnapshot", path, null, null); + logAuditEvent(success, operationName, path, null, null); } /** @@ -5843,6 +5879,7 @@ void disallowSnapshot(String path) throws IOException { */ String createSnapshot(String snapshotRoot, String snapshotName, boolean logRetryCache) throws IOException { + final String operationName = "createSnapshot"; String snapshotPath = null; boolean success = false; writeLock(); @@ -5853,14 +5890,14 @@ String createSnapshot(String snapshotRoot, String snapshotName, snapshotManager, snapshotRoot, snapshotName, logRetryCache); success = true; } catch (AccessControlException ace) { - logAuditEvent(success, "createSnapshot", snapshotRoot, + logAuditEvent(success, operationName, snapshotRoot, snapshotPath, null); throw ace; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(success, "createSnapshot", snapshotRoot, + logAuditEvent(success, operationName, snapshotRoot, snapshotPath, null); return snapshotPath; } @@ -5876,6 +5913,7 @@ String createSnapshot(String snapshotRoot, String snapshotName, void renameSnapshot( String path, String snapshotOldName, String snapshotNewName, boolean logRetryCache) throws IOException { + final String operationName = "renameSnapshot"; boolean success = false; String oldSnapshotRoot = Snapshot.getSnapshotPath(path, snapshotOldName); String newSnapshotRoot = Snapshot.getSnapshotPath(path, snapshotNewName); @@ -5887,14 +5925,14 @@ void renameSnapshot( snapshotOldName, snapshotNewName, logRetryCache); success = true; } catch (AccessControlException ace) { - logAuditEvent(success, "renameSnapshot", oldSnapshotRoot, + logAuditEvent(success, operationName, oldSnapshotRoot, newSnapshotRoot, null); throw ace; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(success, "renameSnapshot", oldSnapshotRoot, + logAuditEvent(success, operationName, oldSnapshotRoot, newSnapshotRoot, null); } @@ -5907,6 +5945,7 @@ void renameSnapshot( */ public SnapshottableDirectoryStatus[] getSnapshottableDirListing() throws IOException { + final String operationName = "listSnapshottableDirectory"; SnapshottableDirectoryStatus[] status = null; checkOperation(OperationCategory.READ); boolean success = false; @@ -5916,12 +5955,12 @@ public SnapshottableDirectoryStatus[] getSnapshottableDirListing() status = FSDirSnapshotOp.getSnapshottableDirListing(dir, snapshotManager); success = true; } catch (AccessControlException ace) { - logAuditEvent(success, "listSnapshottableDirectory", null, null, null); + logAuditEvent(success, operationName, null, null, null); throw ace; } finally { - readUnlock(); + readUnlock(operationName); } - logAuditEvent(success, "listSnapshottableDirectory", null, null, null); + logAuditEvent(success, operationName, null, null, null); return status; } @@ -5942,6 +5981,7 @@ public SnapshottableDirectoryStatus[] getSnapshottableDirListing() */ SnapshotDiffReport getSnapshotDiffReport(String path, String fromSnapshot, String toSnapshot) throws IOException { + final String operationName = "computeSnapshotDiff"; SnapshotDiffReport diffs = null; checkOperation(OperationCategory.READ); boolean success = false; @@ -5956,13 +5996,13 @@ SnapshotDiffReport getSnapshotDiffReport(String path, path, fromSnapshot, toSnapshot); success = true; } catch (AccessControlException ace) { - logAuditEvent(success, "computeSnapshotDiff", fromSnapshotRoot, + logAuditEvent(success, operationName, fromSnapshotRoot, toSnapshotRoot, null); throw ace; } finally { - readUnlock(); + readUnlock(operationName); } - logAuditEvent(success, "computeSnapshotDiff", fromSnapshotRoot, + logAuditEvent(success, operationName, fromSnapshotRoot, toSnapshotRoot, null); return diffs; } @@ -5976,6 +6016,7 @@ SnapshotDiffReport getSnapshotDiffReport(String path, */ void deleteSnapshot(String snapshotRoot, String snapshotName, boolean logRetryCache) throws IOException { + final String operationName = "deleteSnapshot"; boolean success = false; String rootPath = null; writeLock(); @@ -5988,10 +6029,10 @@ void deleteSnapshot(String snapshotRoot, String snapshotName, snapshotRoot, snapshotName, logRetryCache); success = true; } catch (AccessControlException ace) { - logAuditEvent(success, "deleteSnapshot", rootPath, null, null); + logAuditEvent(success, operationName, rootPath, null, null); throw ace; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); @@ -6000,7 +6041,7 @@ void deleteSnapshot(String snapshotRoot, String snapshotName, if (blocksToBeDeleted != null) { removeBlocks(blocksToBeDeleted); } - logAuditEvent(success, "deleteSnapshot", rootPath, null, null); + logAuditEvent(success, operationName, rootPath, null, null); } /** @@ -6026,11 +6067,12 @@ RollingUpgradeInfo queryRollingUpgrade() throws IOException { rollingUpgradeInfo.setCreatedRollbackImages(hasRollbackImage); return rollingUpgradeInfo; } finally { - readUnlock(); + readUnlock("queryRollingUpgrade"); } } RollingUpgradeInfo startRollingUpgrade() throws IOException { + final String operationName = "startRollingUpgrade"; checkSuperuserPrivilege(); checkOperation(OperationCategory.WRITE); writeLock(); @@ -6053,11 +6095,11 @@ RollingUpgradeInfo startRollingUpgrade() throws IOException { getFSImage().rollEditLog(getEffectiveLayoutVersion()); } } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(true, "startRollingUpgrade", null, null, null); + logAuditEvent(true, operationName, null, null, null); return rollingUpgradeInfo; } @@ -6141,7 +6183,7 @@ public RollingUpgradeInfo.Bean getRollingUpgradeStatus() { } catch (IOException ioe) { LOG.warn("Encountered exception setting Rollback Image", ioe); } finally { - readUnlock(); + readUnlock("getRollingUpgradeStatus"); } return new RollingUpgradeInfo.Bean(upgradeInfo); } @@ -6221,6 +6263,7 @@ void checkRollingUpgrade(String action) throws RollingUpgradeException { } RollingUpgradeInfo finalizeRollingUpgrade() throws IOException { + final String operationName = "finalizeRollingUpgrade"; checkSuperuserPrivilege(); checkOperation(OperationCategory.WRITE); writeLock(); @@ -6241,14 +6284,14 @@ RollingUpgradeInfo finalizeRollingUpgrade() throws IOException { getFSImage().renameCheckpoint(NameNodeFile.IMAGE_ROLLBACK, NameNodeFile.IMAGE); } finally { - writeUnlock(); + writeUnlock(operationName); } if (!haEnabled) { // Sync not needed for ha since the edit was rolled after logging. getEditLog().logSync(); } - logAuditEvent(true, "finalizeRollingUpgrade", null, null, null); + logAuditEvent(true, operationName, null, null, null); return rollingUpgradeInfo; } @@ -6260,6 +6303,7 @@ void finalizeRollingUpgradeInternal(long finalizeTime) { long addCacheDirective(CacheDirectiveInfo directive, EnumSet flags, boolean logRetryCache) throws IOException { + final String operationName = "addCacheDirective"; CacheDirectiveInfo effectiveDirective = null; boolean success = false; String effectiveDirectiveStr; @@ -6274,23 +6318,24 @@ long addCacheDirective(CacheDirectiveInfo directive, directive, flags, logRetryCache); success = true; } catch (AccessControlException ace) { - logAuditEvent(success, "addCacheDirective", null, + logAuditEvent(success, operationName, null, null, null); throw ace; } finally { - writeUnlock(); + writeUnlock(operationName); if (success) { getEditLog().logSync(); } } effectiveDirectiveStr = effectiveDirective.toString(); - logAuditEvent(success, "addCacheDirective", effectiveDirectiveStr, + logAuditEvent(success, operationName, effectiveDirectiveStr, null, null); return effectiveDirective.getId(); } void modifyCacheDirective(CacheDirectiveInfo directive, EnumSet flags, boolean logRetryCache) throws IOException { + final String operationName = "modifyCacheDirective"; boolean success = false; final String idStr = "{id: " + directive.getId() + "}"; if (!flags.contains(CacheFlag.FORCE)) { @@ -6304,20 +6349,21 @@ void modifyCacheDirective(CacheDirectiveInfo directive, logRetryCache); success = true; } catch (AccessControlException ace) { - logAuditEvent(success, "modifyCacheDirective", idStr, + logAuditEvent(success, operationName, idStr, directive.toString(), null); throw ace; } finally { - writeUnlock(); + writeUnlock(operationName); if (success) { getEditLog().logSync(); } } - logAuditEvent(success, "modifyCacheDirective", idStr, + logAuditEvent(success, operationName, idStr, directive.toString(), null); } void removeCacheDirective(long id, boolean logRetryCache) throws IOException { + final String operationName = "removeCacheDirective"; boolean success = false; String idStr = "{id: " + Long.toString(id) + "}"; writeLock(); @@ -6327,17 +6373,18 @@ void removeCacheDirective(long id, boolean logRetryCache) throws IOException { FSNDNCacheOp.removeCacheDirective(this, cacheManager, id, logRetryCache); success = true; } catch (AccessControlException ace) { - logAuditEvent(success, "removeCacheDirective", idStr, null, null); + logAuditEvent(success, operationName, idStr, null, null); throw ace; } finally { - writeUnlock(); + writeUnlock(operationName); } - logAuditEvent(success, "removeCacheDirective", idStr, null, null); + logAuditEvent(success, operationName, idStr, null, null); getEditLog().logSync(); } BatchedListEntries listCacheDirectives( long startId, CacheDirectiveInfo filter) throws IOException { + final String operationName = "listCacheDirectives"; checkOperation(OperationCategory.READ); BatchedListEntries results; cacheManager.waitForRescanIfNeeded(); @@ -6349,19 +6396,20 @@ BatchedListEntries listCacheDirectives( filter); success = true; } catch (AccessControlException ace) { - logAuditEvent(success, "listCacheDirectives", filter.toString(), null, + logAuditEvent(success, operationName, filter.toString(), null, null); throw ace; } finally { - readUnlock(); + readUnlock(operationName); } - logAuditEvent(success, "listCacheDirectives", filter.toString(), null, + logAuditEvent(success, operationName, filter.toString(), null, null); return results; } void addCachePool(CachePoolInfo req, boolean logRetryCache) throws IOException { + final String operationName = "addCachePool"; writeLock(); boolean success = false; String poolInfoStr = null; @@ -6374,17 +6422,18 @@ void addCachePool(CachePoolInfo req, boolean logRetryCache) poolInfoStr = info.toString(); success = true; } catch (AccessControlException ace) { - logAuditEvent(success, "addCachePool", poolInfoStr, null, null); + logAuditEvent(success, operationName, poolInfoStr, null, null); throw ace; } finally { - writeUnlock(); + writeUnlock(operationName); } - logAuditEvent(success, "addCachePool", poolInfoStr, null, null); + logAuditEvent(success, operationName, poolInfoStr, null, null); getEditLog().logSync(); } void modifyCachePool(CachePoolInfo req, boolean logRetryCache) throws IOException { + final String operationName = "modifyCachePool"; writeLock(); boolean success = false; String poolNameStr = "{poolName: " + @@ -6396,13 +6445,13 @@ void modifyCachePool(CachePoolInfo req, boolean logRetryCache) FSNDNCacheOp.modifyCachePool(this, cacheManager, req, logRetryCache); success = true; } catch (AccessControlException ace) { - logAuditEvent(success, "modifyCachePool", poolNameStr, + logAuditEvent(success, operationName, poolNameStr, req == null ? null : req.toString(), null); throw ace; } finally { - writeUnlock(); + writeUnlock(operationName); } - logAuditEvent(success, "modifyCachePool", poolNameStr, + logAuditEvent(success, operationName, poolNameStr, req == null ? null : req.toString(), null); getEditLog().logSync(); @@ -6410,6 +6459,7 @@ void modifyCachePool(CachePoolInfo req, boolean logRetryCache) void removeCachePool(String cachePoolName, boolean logRetryCache) throws IOException { + final String operationName = "removeCachePool"; writeLock(); boolean success = false; String poolNameStr = "{poolName: " + cachePoolName + "}"; @@ -6420,17 +6470,18 @@ void removeCachePool(String cachePoolName, boolean logRetryCache) logRetryCache); success = true; } catch (AccessControlException ace) { - logAuditEvent(success, "removeCachePool", poolNameStr, null, null); + logAuditEvent(success, operationName, poolNameStr, null, null); throw ace; } finally { - writeUnlock(); + writeUnlock(operationName); } - logAuditEvent(success, "removeCachePool", poolNameStr, null, null); + logAuditEvent(success, operationName, poolNameStr, null, null); getEditLog().logSync(); } BatchedListEntries listCachePools(String prevKey) throws IOException { + final String operationName = "listCachePools"; BatchedListEntries results; checkOperation(OperationCategory.READ); boolean success = false; @@ -6441,17 +6492,18 @@ BatchedListEntries listCachePools(String prevKey) results = FSNDNCacheOp.listCachePools(this, cacheManager, prevKey); success = true; } catch (AccessControlException ace) { - logAuditEvent(success, "listCachePools", null, null, null); + logAuditEvent(success, operationName, null, null, null); throw ace; } finally { - readUnlock(); + readUnlock(operationName); } - logAuditEvent(success, "listCachePools", null, null, null); + logAuditEvent(success, operationName, null, null, null); return results; } void modifyAclEntries(final String src, List aclSpec) throws IOException { + final String operationName = "modifyAclEntries"; HdfsFileStatus auditStat = null; checkOperation(OperationCategory.WRITE); writeLock(); @@ -6460,17 +6512,18 @@ void modifyAclEntries(final String src, List aclSpec) checkNameNodeSafeMode("Cannot modify ACL entries on " + src); auditStat = FSDirAclOp.modifyAclEntries(dir, src, aclSpec); } catch (AccessControlException e) { - logAuditEvent(false, "modifyAclEntries", src); + logAuditEvent(false, operationName, src); throw e; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(true, "modifyAclEntries", src, null, auditStat); + logAuditEvent(true, operationName, src, null, auditStat); } void removeAclEntries(final String src, List aclSpec) throws IOException { + final String operationName = "removeAclEntries"; checkOperation(OperationCategory.WRITE); HdfsFileStatus auditStat = null; writeLock(); @@ -6479,16 +6532,17 @@ void removeAclEntries(final String src, List aclSpec) checkNameNodeSafeMode("Cannot remove ACL entries on " + src); auditStat = FSDirAclOp.removeAclEntries(dir, src, aclSpec); } catch (AccessControlException e) { - logAuditEvent(false, "removeAclEntries", src); + logAuditEvent(false, operationName, src); throw e; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(true, "removeAclEntries", src, null, auditStat); + logAuditEvent(true, operationName, src, null, auditStat); } void removeDefaultAcl(final String src) throws IOException { + final String operationName = "removeDefaultAcl"; HdfsFileStatus auditStat = null; checkOperation(OperationCategory.WRITE); writeLock(); @@ -6497,16 +6551,17 @@ void removeDefaultAcl(final String src) throws IOException { checkNameNodeSafeMode("Cannot remove default ACL entries on " + src); auditStat = FSDirAclOp.removeDefaultAcl(dir, src); } catch (AccessControlException e) { - logAuditEvent(false, "removeDefaultAcl", src); + logAuditEvent(false, operationName, src); throw e; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(true, "removeDefaultAcl", src, null, auditStat); + logAuditEvent(true, operationName, src, null, auditStat); } void removeAcl(final String src) throws IOException { + final String operationName = "removeAcl"; HdfsFileStatus auditStat = null; checkOperation(OperationCategory.WRITE); writeLock(); @@ -6515,16 +6570,17 @@ void removeAcl(final String src) throws IOException { checkNameNodeSafeMode("Cannot remove ACL on " + src); auditStat = FSDirAclOp.removeAcl(dir, src); } catch (AccessControlException e) { - logAuditEvent(false, "removeAcl", src); + logAuditEvent(false, operationName, src); throw e; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(true, "removeAcl", src, null, auditStat); + logAuditEvent(true, operationName, src, null, auditStat); } void setAcl(final String src, List aclSpec) throws IOException { + final String operationName = "setAcl"; HdfsFileStatus auditStat = null; checkOperation(OperationCategory.WRITE); writeLock(); @@ -6533,16 +6589,17 @@ void setAcl(final String src, List aclSpec) throws IOException { checkNameNodeSafeMode("Cannot set ACL on " + src); auditStat = FSDirAclOp.setAcl(dir, src, aclSpec); } catch (AccessControlException e) { - logAuditEvent(false, "setAcl", src); + logAuditEvent(false, operationName, src); throw e; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(true, "setAcl", src, null, auditStat); + logAuditEvent(true, operationName, src, null, auditStat); } AclStatus getAclStatus(String src) throws IOException { + final String operationName = "getAclStatus"; checkOperation(OperationCategory.READ); final AclStatus ret; readLock(); @@ -6550,12 +6607,12 @@ AclStatus getAclStatus(String src) throws IOException { checkOperation(OperationCategory.READ); ret = FSDirAclOp.getAclStatus(dir, src); } catch(AccessControlException ace) { - logAuditEvent(false, "getAclStatus", src); + logAuditEvent(false, operationName, src); throw ace; } finally { - readUnlock(); + readUnlock(operationName); } - logAuditEvent(true, "getAclStatus", src); + logAuditEvent(true, operationName, src); return ret; } @@ -6573,6 +6630,7 @@ AclStatus getAclStatus(String src) throws IOException { void createEncryptionZone(final String src, final String keyName, boolean logRetryCache) throws IOException, UnresolvedLinkException, SafeModeException, AccessControlException { + final String operationName = "createEncryptionZone"; try { Metadata metadata = FSDirEncryptionZoneOp.ensureKeyIsInitialized(dir, keyName, src); @@ -6588,13 +6646,13 @@ void createEncryptionZone(final String src, final String keyName, resultingStat = FSDirEncryptionZoneOp.createEncryptionZone(dir, src, pc, metadata.getCipher(), keyName, logRetryCache); } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(true, "createEncryptionZone", src, null, resultingStat); + logAuditEvent(true, operationName, src, null, resultingStat); } catch (AccessControlException e) { - logAuditEvent(false, "createEncryptionZone", src); + logAuditEvent(false, operationName, src); throw e; } } @@ -6609,6 +6667,7 @@ void createEncryptionZone(final String src, final String keyName, */ EncryptionZone getEZForPath(final String srcArg) throws AccessControlException, UnresolvedLinkException, IOException { + final String operationName = "getEZForPath"; HdfsFileStatus resultingStat = null; boolean success = false; EncryptionZone encryptionZone; @@ -6623,17 +6682,18 @@ EncryptionZone getEZForPath(final String srcArg) resultingStat = ezForPath.getValue(); encryptionZone = ezForPath.getKey(); } catch (AccessControlException ace) { - logAuditEvent(success, "getEZForPath", srcArg, null, resultingStat); + logAuditEvent(success, operationName, srcArg, null, resultingStat); throw ace; } finally { - readUnlock(); + readUnlock(operationName); } - logAuditEvent(success, "getEZForPath", srcArg, null, resultingStat); + logAuditEvent(success, operationName, srcArg, null, resultingStat); return encryptionZone; } BatchedListEntries listEncryptionZones(long prevId) throws IOException { + final String operationName = "listEncryptionZones"; boolean success = false; checkSuperuserPrivilege(); checkOperation(OperationCategory.READ); @@ -6646,8 +6706,8 @@ BatchedListEntries listEncryptionZones(long prevId) success = true; return ret; } finally { - readUnlock(); - logAuditEvent(success, "listEncryptionZones", null); + readUnlock(operationName); + logAuditEvent(success, operationName, null); } } @@ -6662,6 +6722,7 @@ BatchedListEntries listEncryptionZones(long prevId) void setErasureCodingPolicy(final String srcArg, final ErasureCodingPolicy ecPolicy, final boolean logRetryCache) throws IOException, UnresolvedLinkException, SafeModeException, AccessControlException { + final String operationName = "setErasureCodingPolicy"; checkSuperuserPrivilege(); checkOperation(OperationCategory.WRITE); HdfsFileStatus resultingStat = null; @@ -6674,16 +6735,16 @@ void setErasureCodingPolicy(final String srcArg, final ErasureCodingPolicy srcArg, ecPolicy, logRetryCache); success = true; } catch (AccessControlException ace) { - logAuditEvent(success, "setErasureCodingPolicy", srcArg, null, + logAuditEvent(success, operationName, srcArg, null, resultingStat); throw ace; } finally { - writeUnlock(); + writeUnlock(operationName); if (success) { getEditLog().logSync(); } } - logAuditEvent(success, "setErasureCodingPolicy", srcArg, null, + logAuditEvent(success, operationName, srcArg, null, resultingStat); } @@ -6698,7 +6759,7 @@ ErasureCodingPolicy getErasureCodingPolicy(String src) checkOperation(OperationCategory.READ); return FSDirErasureCodingOp.getErasureCodingPolicy(this, src); } finally { - readUnlock(); + readUnlock("getErasureCodingPolicy"); } } @@ -6712,13 +6773,14 @@ ErasureCodingPolicy[] getErasureCodingPolicies() throws IOException { checkOperation(OperationCategory.READ); return FSDirErasureCodingOp.getErasureCodingPolicies(this); } finally { - readUnlock(); + readUnlock("getErasureCodingPolicies"); } } void setXAttr(String src, XAttr xAttr, EnumSet flag, boolean logRetryCache) throws IOException { + final String operationName = "setXAttr"; HdfsFileStatus auditStat = null; writeLock(); try { @@ -6726,17 +6788,18 @@ void setXAttr(String src, XAttr xAttr, EnumSet flag, checkNameNodeSafeMode("Cannot set XAttr on " + src); auditStat = FSDirXAttrOp.setXAttr(dir, src, xAttr, flag, logRetryCache); } catch (AccessControlException e) { - logAuditEvent(false, "setXAttr", src); + logAuditEvent(false, operationName, src); throw e; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(true, "setXAttr", src, null, auditStat); + logAuditEvent(true, operationName, src, null, auditStat); } List getXAttrs(final String src, List xAttrs) throws IOException { + final String operationName = "getXAttrs"; checkOperation(OperationCategory.READ); List fsXattrs; readLock(); @@ -6744,16 +6807,17 @@ List getXAttrs(final String src, List xAttrs) checkOperation(OperationCategory.READ); fsXattrs = FSDirXAttrOp.getXAttrs(dir, src, xAttrs); } catch (AccessControlException e) { - logAuditEvent(false, "getXAttrs", src); + logAuditEvent(false, operationName, src); throw e; } finally { - readUnlock(); + readUnlock(operationName); } - logAuditEvent(true, "getXAttrs", src); + logAuditEvent(true, operationName, src); return fsXattrs; } List listXAttrs(String src) throws IOException { + final String operationName = "listXAttrs"; checkOperation(OperationCategory.READ); List fsXattrs; readLock(); @@ -6761,17 +6825,18 @@ List listXAttrs(String src) throws IOException { checkOperation(OperationCategory.READ); fsXattrs = FSDirXAttrOp.listXAttrs(dir, src); } catch (AccessControlException e) { - logAuditEvent(false, "listXAttrs", src); + logAuditEvent(false, operationName, src); throw e; } finally { - readUnlock(); + readUnlock(operationName); } - logAuditEvent(true, "listXAttrs", src); + logAuditEvent(true, operationName, src); return fsXattrs; } void removeXAttr(String src, XAttr xAttr, boolean logRetryCache) throws IOException { + final String operationName = "removeXAttr"; HdfsFileStatus auditStat = null; writeLock(); try { @@ -6779,16 +6844,17 @@ void removeXAttr(String src, XAttr xAttr, boolean logRetryCache) checkNameNodeSafeMode("Cannot remove XAttr entry on " + src); auditStat = FSDirXAttrOp.removeXAttr(dir, src, xAttr, logRetryCache); } catch (AccessControlException e) { - logAuditEvent(false, "removeXAttr", src); + logAuditEvent(false, operationName, src); throw e; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(true, "removeXAttr", src, null, auditStat); + logAuditEvent(true, operationName, src, null, auditStat); } void checkAccess(String src, FsAction mode) throws IOException { + final String operationName = "checkAccess"; checkOperation(OperationCategory.READ); FSPermissionChecker pc = getPermissionChecker(); readLock(); @@ -6804,10 +6870,10 @@ void checkAccess(String src, FsAction mode) throws IOException { dir.checkPathAccess(pc, iip, mode); } } catch (AccessControlException e) { - logAuditEvent(false, "checkAccess", src); + logAuditEvent(false, operationName, src); throw e; } finally { - readUnlock(); + readUnlock(operationName); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystemLock.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystemLock.java index 043f569382..d90e1d0ab5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystemLock.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystemLock.java @@ -27,11 +27,14 @@ import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.metrics2.lib.MutableRatesWithAggregation; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Timer; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_LOCK_SUPPRESS_WARNING_INTERVAL_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_LOCK_SUPPRESS_WARNING_INTERVAL_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LOCK_DETAILED_METRICS_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LOCK_DETAILED_METRICS_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_READ_LOCK_REPORTING_THRESHOLD_MS_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_READ_LOCK_REPORTING_THRESHOLD_MS_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_WRITE_LOCK_REPORTING_THRESHOLD_MS_DEFAULT; @@ -40,11 +43,20 @@ /** * Mimics a ReentrantReadWriteLock but does not directly implement the interface * so more sophisticated locking capabilities and logging/metrics are possible. + * {@link org.apache.hadoop.hdfs.DFSConfigKeys#DFS_NAMENODE_LOCK_DETAILED_METRICS_KEY} + * to be true, metrics will be emitted into the FSNamesystem metrics registry + * for each operation which acquires this lock indicating how long the operation + * held the lock for. Note that if a thread dies, metrics produced after the + * most recent snapshot will be lost due to the use of + * {@link MutableRatesWithAggregation}. However since threads are re-used + * between operations this should not generally be an issue. */ class FSNamesystemLock { @VisibleForTesting protected ReentrantReadWriteLock coarseLock; + private final boolean metricsEnabled; + private final MutableRatesWithAggregation detailedHoldTimeMetrics; private final Timer timer; /** @@ -80,12 +92,19 @@ public Long initialValue() { private final AtomicLong timeStampOfLastReadLockReport = new AtomicLong(0); private final AtomicLong longestReadLockHeldInterval = new AtomicLong(0); - FSNamesystemLock(Configuration conf) { - this(conf, new Timer()); + @VisibleForTesting + static final String OP_NAME_OTHER = "OTHER"; + private static final String READ_LOCK_METRIC_PREFIX = "FSNReadLock"; + private static final String WRITE_LOCK_METRIC_PREFIX = "FSNWriteLock"; + + FSNamesystemLock(Configuration conf, + MutableRatesWithAggregation detailedHoldTimeMetrics) { + this(conf, detailedHoldTimeMetrics, new Timer()); } @VisibleForTesting - FSNamesystemLock(Configuration conf, Timer timer) { + FSNamesystemLock(Configuration conf, + MutableRatesWithAggregation detailedHoldTimeMetrics, Timer timer) { boolean fair = conf.getBoolean("dfs.namenode.fslock.fair", true); FSNamesystem.LOG.info("fsLock is fair: " + fair); this.coarseLock = new ReentrantReadWriteLock(fair); @@ -100,6 +119,12 @@ public Long initialValue() { this.lockSuppressWarningInterval = conf.getTimeDuration( DFS_LOCK_SUPPRESS_WARNING_INTERVAL_KEY, DFS_LOCK_SUPPRESS_WARNING_INTERVAL_DEFAULT, TimeUnit.MILLISECONDS); + this.metricsEnabled = conf.getBoolean( + DFS_NAMENODE_LOCK_DETAILED_METRICS_KEY, + DFS_NAMENODE_LOCK_DETAILED_METRICS_DEFAULT); + FSNamesystem.LOG.info("Detailed lock hold time metrics enabled: " + + this.metricsEnabled); + this.detailedHoldTimeMetrics = detailedHoldTimeMetrics; } public void readLock() { @@ -110,12 +135,17 @@ public void readLock() { } public void readUnlock() { + readUnlock(OP_NAME_OTHER); + } + + public void readUnlock(String opName) { final boolean needReport = coarseLock.getReadHoldCount() == 1; final long readLockInterval = timer.monotonicNow() - readLockHeldTimeStamp.get(); coarseLock.readLock().unlock(); if (needReport) { + addMetric(opName, readLockInterval, false); readLockHeldTimeStamp.remove(); } if (needReport && readLockInterval >= this.readLockReportingThreshold) { @@ -164,6 +194,10 @@ public void writeLockInterruptibly() throws InterruptedException { } public void writeUnlock() { + writeUnlock(OP_NAME_OTHER); + } + + public void writeUnlock(String opName) { final boolean needReport = coarseLock.getWriteHoldCount() == 1 && coarseLock.isWriteLockedByCurrentThread(); final long currentTime = timer.monotonicNow(); @@ -191,6 +225,10 @@ public void writeUnlock() { coarseLock.writeLock().unlock(); + if (needReport) { + addMetric(opName, writeLockInterval, true); + } + if (logReport) { FSNamesystem.LOG.info("FSNamesystem write lock held for " + writeLockInterval + " ms via\n" + @@ -227,4 +265,19 @@ public Condition newWriteLockCondition() { public int getQueueLength() { return coarseLock.getQueueLength(); } + + /** + * Add the lock hold time for a recent operation to the metrics. + * @param operationName Name of the operation for which to record the time + * @param value Length of time the lock was held + */ + private void addMetric(String operationName, long value, boolean isWrite) { + if (metricsEnabled) { + String metricName = + (isWrite ? WRITE_LOCK_METRIC_PREFIX : READ_LOCK_METRIC_PREFIX) + + org.apache.commons.lang.StringUtils.capitalize(operationName); + detailedHoldTimeMetrics.add(metricName, value); + } + } + } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java index 06f6586564..a9fb24bfc7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java @@ -329,7 +329,7 @@ public void run() { needSync = checkLeases(); } } finally { - fsnamesystem.writeUnlock(); + fsnamesystem.writeUnlock("leaseManager"); // lease reassignments should to be sync'ed. if (needSync) { fsnamesystem.getEditLog().logSync(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java index 97a62485cc..756a2f66e1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java @@ -522,7 +522,7 @@ private LocatedBlocks getBlockLocations(String path, HdfsFileStatus file) } catch (FileNotFoundException fnfe) { blocks = null; } finally { - fsn.readUnlock(); + fsn.readUnlock("fsckGetBlockLocations"); } return blocks; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index be6d75088f..a605972a1a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -2678,6 +2678,14 @@ + + dfs.namenode.lock.detailed-metrics.enabled + false + If true, the namenode will keep track of how long various + operations hold the Namesystem lock for and emit this as metrics. + + + dfs.namenode.startup.delay.block.deletion.sec 0 diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystemLock.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystemLock.java index 08900ecba8..c97e0f0ac2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystemLock.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystemLock.java @@ -21,8 +21,12 @@ import com.google.common.base.Supplier; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.metrics2.MetricsRecordBuilder; +import org.apache.hadoop.metrics2.lib.MetricsRegistry; +import org.apache.hadoop.metrics2.lib.MutableRatesWithAggregation; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.GenericTestUtils.LogCapturer; +import org.apache.hadoop.test.MetricsAsserts; import org.apache.hadoop.util.FakeTimer; import org.apache.log4j.Level; import org.junit.Test; @@ -36,6 +40,8 @@ import java.util.regex.Pattern; import static org.junit.Assert.*; +import static org.apache.hadoop.test.MetricsAsserts.assertCounter; +import static org.apache.hadoop.test.MetricsAsserts.assertGauge; /** * Tests the FSNamesystemLock, looking at lock compatibilities and @@ -48,17 +54,17 @@ public void testFsLockFairness() throws IOException, InterruptedException{ Configuration conf = new Configuration(); conf.setBoolean("dfs.namenode.fslock.fair", true); - FSNamesystemLock fsnLock = new FSNamesystemLock(conf); + FSNamesystemLock fsnLock = new FSNamesystemLock(conf, null); assertTrue(fsnLock.coarseLock.isFair()); conf.setBoolean("dfs.namenode.fslock.fair", false); - fsnLock = new FSNamesystemLock(conf); + fsnLock = new FSNamesystemLock(conf, null); assertFalse(fsnLock.coarseLock.isFair()); } @Test public void testFSNamesystemLockCompatibility() { - FSNamesystemLock rwLock = new FSNamesystemLock(new Configuration()); + FSNamesystemLock rwLock = new FSNamesystemLock(new Configuration(), null); assertEquals(0, rwLock.getReadHoldCount()); rwLock.readLock(); @@ -98,7 +104,7 @@ public void testFSLockGetWaiterCount() throws InterruptedException { final CountDownLatch latch = new CountDownLatch(threadCount); final Configuration conf = new Configuration(); conf.setBoolean("dfs.namenode.fslock.fair", true); - final FSNamesystemLock rwLock = new FSNamesystemLock(conf); + final FSNamesystemLock rwLock = new FSNamesystemLock(conf, null); rwLock.writeLock(); ExecutorService helper = Executors.newFixedThreadPool(threadCount); @@ -141,7 +147,7 @@ public void testFSWriteLockLongHoldingReport() throws Exception { writeLockSuppressWarningInterval, TimeUnit.MILLISECONDS); final FakeTimer timer = new FakeTimer(); - final FSNamesystemLock fsnLock = new FSNamesystemLock(conf, timer); + final FSNamesystemLock fsnLock = new FSNamesystemLock(conf, null, timer); timer.advance(writeLockSuppressWarningInterval); LogCapturer logs = LogCapturer.captureLogs(FSNamesystem.LOG); @@ -216,7 +222,7 @@ public void testFSReadLockLongHoldingReport() throws Exception { readLockSuppressWarningInterval, TimeUnit.MILLISECONDS); final FakeTimer timer = new FakeTimer(); - final FSNamesystemLock fsnLock = new FSNamesystemLock(conf, timer); + final FSNamesystemLock fsnLock = new FSNamesystemLock(conf, null, timer); timer.advance(readLockSuppressWarningInterval); LogCapturer logs = LogCapturer.captureLogs(FSNamesystem.LOG); @@ -303,7 +309,7 @@ public void run() { t2.join(); // Look for the differentiating class names in the stack trace String stackTracePatternString = - String.format("INFO.+%s(.+\n){4}\\Q%%s\\E\\.run", readLockLogStmt); + String.format("INFO.+%s(.+\n){5}\\Q%%s\\E\\.run", readLockLogStmt); Pattern t1Pattern = Pattern.compile( String.format(stackTracePatternString, t1.getClass().getName())); assertTrue(t1Pattern.matcher(logs.getOutput()).find()); @@ -314,4 +320,43 @@ public void run() { "Number of suppressed read-lock reports: 2")); } + @Test + public void testDetailedHoldMetrics() throws Exception { + Configuration conf = new Configuration(); + conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_LOCK_DETAILED_METRICS_KEY, true); + FakeTimer timer = new FakeTimer(); + MetricsRegistry registry = new MetricsRegistry("Test"); + MutableRatesWithAggregation rates = + registry.newRatesWithAggregation("Test"); + FSNamesystemLock fsLock = new FSNamesystemLock(conf, rates, timer); + + fsLock.readLock(); + timer.advance(1); + fsLock.readUnlock("foo"); + fsLock.readLock(); + timer.advance(2); + fsLock.readUnlock("foo"); + + fsLock.readLock(); + timer.advance(1); + fsLock.readLock(); + timer.advance(1); + fsLock.readUnlock("bar"); + fsLock.readUnlock("bar"); + + fsLock.writeLock(); + timer.advance(1); + fsLock.writeUnlock("baz"); + + MetricsRecordBuilder rb = MetricsAsserts.mockMetricsRecordBuilder(); + rates.snapshot(rb, true); + + assertGauge("FSNReadLockFooAvgTime", 1.5, rb); + assertCounter("FSNReadLockFooNumOps", 2L, rb); + assertGauge("FSNReadLockBarAvgTime", 2.0, rb); + assertCounter("FSNReadLockBarNumOps", 1L, rb); + assertGauge("FSNWriteLockBazAvgTime", 1.0, rb); + assertCounter("FSNWriteLockBazNumOps", 1L, rb); + } + } \ No newline at end of file