HDFS-8883. NameNode Metrics : Add FSNameSystem lock Queue Length. Contributed by Anu Engineer.
This commit is contained in:
parent
13604bd5f1
commit
a7862d5fe4
@ -236,6 +236,7 @@ Each metrics record contains tags such as HAState and Hostname as additional inf
|
|||||||
| `NumActiveClients` | Current number of active clients holding lease |
|
| `NumActiveClients` | Current number of active clients holding lease |
|
||||||
| `HAState` | (HA-only) Current state of the NameNode: initializing or active or standby or stopping state |
|
| `HAState` | (HA-only) Current state of the NameNode: initializing or active or standby or stopping state |
|
||||||
| `FSState` | Current state of the file system: Safemode or Operational |
|
| `FSState` | Current state of the file system: Safemode or Operational |
|
||||||
|
| `LockQueueLength` | Number of threads waiting to acquire FSNameSystem lock |
|
||||||
|
|
||||||
JournalNode
|
JournalNode
|
||||||
-----------
|
-----------
|
||||||
|
@ -788,6 +788,9 @@ Release 2.8.0 - UNRELEASED
|
|||||||
|
|
||||||
HDFS-8824. Do not use small blocks for balancing the cluster. (szetszwo)
|
HDFS-8824. Do not use small blocks for balancing the cluster. (szetszwo)
|
||||||
|
|
||||||
|
HDFS-8883. NameNode Metrics : Add FSNameSystem lock Queue Length.
|
||||||
|
(Anu Engineer via xyao)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than
|
HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than
|
||||||
|
@ -3911,6 +3911,21 @@ public int getNumEncryptionZones() {
|
|||||||
return dir.ezManager.getNumEncryptionZones();
|
return dir.ezManager.getNumEncryptionZones();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the length of the wait Queue for the FSNameSystemLock.
|
||||||
|
*
|
||||||
|
* A larger number here indicates lots of threads are waiting for
|
||||||
|
* FSNameSystemLock.
|
||||||
|
*
|
||||||
|
* @return int - Number of Threads waiting to acquire FSNameSystemLock
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
@Metric({"LockQueueLength", "Number of threads waiting to " +
|
||||||
|
"acquire FSNameSystemLock"})
|
||||||
|
public int getFsLockQueueLength() {
|
||||||
|
return fsLock.getQueueLength();
|
||||||
|
}
|
||||||
|
|
||||||
int getNumberOfDatanodes(DatanodeReportType type) {
|
int getNumberOfDatanodes(DatanodeReportType type) {
|
||||||
readLock();
|
readLock();
|
||||||
try {
|
try {
|
||||||
|
@ -59,4 +59,15 @@ public int getWriteHoldCount() {
|
|||||||
public boolean isWriteLockedByCurrentThread() {
|
public boolean isWriteLockedByCurrentThread() {
|
||||||
return coarseLock.isWriteLockedByCurrentThread();
|
return coarseLock.isWriteLockedByCurrentThread();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the QueueLength of waiting threads.
|
||||||
|
*
|
||||||
|
* A larger number indicates greater lock contention.
|
||||||
|
*
|
||||||
|
* @return int - Number of threads waiting on this lock
|
||||||
|
*/
|
||||||
|
public int getQueueLength() {
|
||||||
|
return coarseLock.getQueueLength();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -189,4 +189,13 @@ public interface FSNamesystemMBean {
|
|||||||
* Return the number of encryption zones in the system.
|
* Return the number of encryption zones in the system.
|
||||||
*/
|
*/
|
||||||
int getNumEncryptionZones();
|
int getNumEncryptionZones();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the length of the wait Queue for the FSNameSystemLock.
|
||||||
|
*
|
||||||
|
* A larger number here indicates lots of threads are waiting for
|
||||||
|
* FSNameSystemLock.
|
||||||
|
* @return int - Number of Threads waiting to acquire FSNameSystemLock
|
||||||
|
*/
|
||||||
|
int getFsLockQueueLength();
|
||||||
}
|
}
|
||||||
|
@ -37,10 +37,15 @@
|
|||||||
import org.apache.hadoop.hdfs.server.namenode.ha.HAState;
|
import org.apache.hadoop.hdfs.server.namenode.ha.HAState;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
|
import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
|
import org.junit.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.mockito.Mockito;
|
import org.mockito.Mockito;
|
||||||
import org.mockito.internal.util.reflection.Whitebox;
|
import org.mockito.internal.util.reflection.Whitebox;
|
||||||
|
|
||||||
|
import java.util.concurrent.CountDownLatch;
|
||||||
|
import java.util.concurrent.ExecutorService;
|
||||||
|
import java.util.concurrent.Executors;
|
||||||
|
|
||||||
public class TestFSNamesystem {
|
public class TestFSNamesystem {
|
||||||
|
|
||||||
@After
|
@After
|
||||||
@ -233,4 +238,27 @@ public void testGetEffectiveLayoutVersion() {
|
|||||||
assertEquals(-63,
|
assertEquals(-63,
|
||||||
FSNamesystem.getEffectiveLayoutVersion(false, -63, -61, -63));
|
FSNamesystem.getEffectiveLayoutVersion(false, -63, -61, -63));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFSLockGetWaiterCount() throws InterruptedException {
|
||||||
|
final int threadCount = 3;
|
||||||
|
final CountDownLatch latch = new CountDownLatch(threadCount);
|
||||||
|
final FSNamesystemLock rwLock = new FSNamesystemLock(true);
|
||||||
|
rwLock.writeLock().lock();
|
||||||
|
ExecutorService helper = Executors.newFixedThreadPool(threadCount);
|
||||||
|
|
||||||
|
for (int x = 0; x < threadCount; x++) {
|
||||||
|
helper.execute(new Runnable() {
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
latch.countDown();
|
||||||
|
rwLock.readLock().lock();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
latch.await();
|
||||||
|
Assert.assertEquals("Expected number of blocked thread not found",
|
||||||
|
threadCount, rwLock.getQueueLength());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -17,8 +17,8 @@
|
|||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hdfs.server.namenode;
|
package org.apache.hadoop.hdfs.server.namenode;
|
||||||
|
|
||||||
import org.apache.commons.io.FileUtils;
|
|
||||||
import com.google.common.util.concurrent.Uninterruptibles;
|
import com.google.common.util.concurrent.Uninterruptibles;
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.FileUtil;
|
import org.apache.hadoop.fs.FileUtil;
|
||||||
@ -173,7 +173,7 @@ public void testNameNodeMXBeanInfo() throws Exception {
|
|||||||
// This will cause the first dir to fail.
|
// This will cause the first dir to fail.
|
||||||
File failedNameDir = new File(nameDirUris.iterator().next());
|
File failedNameDir = new File(nameDirUris.iterator().next());
|
||||||
assertEquals(0, FileUtil.chmod(
|
assertEquals(0, FileUtil.chmod(
|
||||||
new File(failedNameDir, "current").getAbsolutePath(), "000"));
|
new File(failedNameDir, "current").getAbsolutePath(), "000"));
|
||||||
cluster.getNameNodeRpc().rollEditLog();
|
cluster.getNameNodeRpc().rollEditLog();
|
||||||
|
|
||||||
nameDirStatuses = (String) (mbs.getAttribute(mxbeanName,
|
nameDirStatuses = (String) (mbs.getAttribute(mxbeanName,
|
||||||
@ -376,4 +376,23 @@ public void testTopUsersNoPeriods() throws Exception {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(timeout = 120000)
|
||||||
|
public void testQueueLength() throws Exception {
|
||||||
|
final Configuration conf = new Configuration();
|
||||||
|
MiniDFSCluster cluster = null;
|
||||||
|
try {
|
||||||
|
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).build();
|
||||||
|
cluster.waitActive();
|
||||||
|
MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
|
||||||
|
ObjectName mxbeanNameFs =
|
||||||
|
new ObjectName("Hadoop:service=NameNode,name=FSNamesystem");
|
||||||
|
int queueLength = (int) mbs.getAttribute(mxbeanNameFs, "LockQueueLength");
|
||||||
|
assertEquals(0, queueLength);
|
||||||
|
} finally {
|
||||||
|
if (cluster != null) {
|
||||||
|
cluster.shutdown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user