diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index de4dad09d0..e7da77ba24 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -635,6 +635,9 @@ Release 2.9.0 - UNRELEASED IMPROVEMENTS + HADOOP-12321. Make JvmPauseMonitor an AbstractService. + (Sunil G via Stevel) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JvmPauseMonitor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JvmPauseMonitor.java index 73d730913b..882e4a7328 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JvmPauseMonitor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JvmPauseMonitor.java @@ -28,6 +28,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.service.AbstractService; import com.google.common.base.Joiner; import com.google.common.base.Preconditions; @@ -43,21 +44,21 @@ * detected, the thread logs a message. */ @InterfaceAudience.Private -public class JvmPauseMonitor { +public class JvmPauseMonitor extends AbstractService { private static final Log LOG = LogFactory.getLog( JvmPauseMonitor.class); /** The target sleep time */ private static final long SLEEP_INTERVAL_MS = 500; - + /** log WARN if we detect a pause longer than this threshold */ - private final long warnThresholdMs; + private long warnThresholdMs; private static final String WARN_THRESHOLD_KEY = "jvm.pause.warn-threshold.ms"; private static final long WARN_THRESHOLD_DEFAULT = 10000; - + /** log INFO if we detect a pause longer than this threshold */ - private final long infoThresholdMs; + private long infoThresholdMs; private static final String INFO_THRESHOLD_KEY = "jvm.pause.info-threshold.ms"; private static final long INFO_THRESHOLD_DEFAULT = 1000; @@ -65,29 +66,32 @@ public class JvmPauseMonitor { private long numGcWarnThresholdExceeded = 0; private long numGcInfoThresholdExceeded = 0; private long totalGcExtraSleepTime = 0; - + private Thread monitorThread; private volatile boolean shouldRun = true; - public JvmPauseMonitor(Configuration conf) { + public JvmPauseMonitor() { + super(JvmPauseMonitor.class.getName()); + } + + @Override + protected void serviceInit(Configuration conf) throws Exception { this.warnThresholdMs = conf.getLong(WARN_THRESHOLD_KEY, WARN_THRESHOLD_DEFAULT); this.infoThresholdMs = conf.getLong(INFO_THRESHOLD_KEY, INFO_THRESHOLD_DEFAULT); + super.serviceInit(conf); } - - public void start() { - Preconditions.checkState(monitorThread == null, - "Already started"); + + @Override + protected void serviceStart() throws Exception { monitorThread = new Daemon(new Monitor()); - if (shouldRun) { - monitorThread.start(); - } else { - LOG.warn("stop() was called before start() completed"); - } + monitorThread.start(); + super.serviceStart(); } - - public void stop() { + + @Override + protected void serviceStop() throws Exception { shouldRun = false; - if (isStarted()) { + if (monitorThread != null) { monitorThread.interrupt(); try { monitorThread.join(); @@ -95,12 +99,13 @@ public void stop() { Thread.currentThread().interrupt(); } } + super.serviceStop(); } public boolean isStarted() { return monitorThread != null; } - + public long getNumGcWarnThreadholdExceeded() { return numGcWarnThresholdExceeded; } @@ -212,8 +217,11 @@ public void run() { * with a 1GB heap will very quickly go into "GC hell" and result in * log messages about the GC pauses. */ + @SuppressWarnings("resource") public static void main(String []args) throws Exception { - new JvmPauseMonitor(new Configuration()).start(); + JvmPauseMonitor monitor = new JvmPauseMonitor(); + monitor.init(new Configuration()); + monitor.start(); List list = Lists.newArrayList(); int i = 0; while (true) { diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/source/TestJvmMetrics.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/source/TestJvmMetrics.java index 3cb3384d83..69e75655c9 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/source/TestJvmMetrics.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/source/TestJvmMetrics.java @@ -18,7 +18,11 @@ package org.apache.hadoop.metrics2.source; +import org.junit.After; +import org.junit.Assert; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.Timeout; import static org.mockito.Mockito.*; import static org.apache.hadoop.test.MetricsAsserts.*; @@ -26,6 +30,9 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.metrics2.MetricsCollector; import org.apache.hadoop.metrics2.MetricsRecordBuilder; +import org.apache.hadoop.service.ServiceOperations; +import org.apache.hadoop.service.ServiceStateException; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.JvmPauseMonitor; import static org.apache.hadoop.metrics2.source.JvmMetricsInfo.*; @@ -33,8 +40,23 @@ public class TestJvmMetrics { - @Test public void testPresence() { - JvmPauseMonitor pauseMonitor = new JvmPauseMonitor(new Configuration()); + @Rule + public Timeout timeout = new Timeout(30000); + private JvmPauseMonitor pauseMonitor; + + /** + * Robust shutdown of the pause monitor if it hasn't been stopped already. + */ + @After + public void teardown() { + ServiceOperations.stop(pauseMonitor); + } + + @Test + public void testPresence() { + pauseMonitor = new JvmPauseMonitor(); + pauseMonitor.init(new Configuration()); + pauseMonitor.start(); JvmMetrics jvmMetrics = new JvmMetrics("test", "test"); jvmMetrics.setPauseMonitor(pauseMonitor); MetricsRecordBuilder rb = getMetrics(jvmMetrics); @@ -54,4 +76,48 @@ else if (info.name().startsWith("Log")) verify(rb).addCounter(eq(info), anyLong()); } } + + @Test + public void testDoubleStop() throws Throwable { + pauseMonitor = new JvmPauseMonitor(); + pauseMonitor.init(new Configuration()); + pauseMonitor.start(); + pauseMonitor.stop(); + pauseMonitor.stop(); + } + + @Test + public void testDoubleStart() throws Throwable { + pauseMonitor = new JvmPauseMonitor(); + pauseMonitor.init(new Configuration()); + pauseMonitor.start(); + pauseMonitor.start(); + pauseMonitor.stop(); + } + + @Test + public void testStopBeforeStart() throws Throwable { + pauseMonitor = new JvmPauseMonitor(); + try { + pauseMonitor.init(new Configuration()); + pauseMonitor.stop(); + pauseMonitor.start(); + Assert.fail("Expected an exception, got " + pauseMonitor); + } catch (ServiceStateException e) { + GenericTestUtils.assertExceptionContains("cannot enter state", e); + } + } + + @Test + public void testStopBeforeInit() throws Throwable { + pauseMonitor = new JvmPauseMonitor(); + try { + pauseMonitor.stop(); + pauseMonitor.init(new Configuration()); + Assert.fail("Expected an exception, got " + pauseMonitor); + } catch (ServiceStateException e) { + GenericTestUtils.assertExceptionContains("cannot enter state", e); + } + } + } diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java index 05d806aed1..028252460b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java @@ -243,7 +243,8 @@ private void clearDirectory(String writeDumpDir) throws IOException { @Override public void startDaemons() { if (pauseMonitor == null) { - pauseMonitor = new JvmPauseMonitor(config); + pauseMonitor = new JvmPauseMonitor(); + pauseMonitor.init(config); pauseMonitor.start(); metrics.getJvmMetrics().setPauseMonitor(pauseMonitor); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 118df6ea60..642414f3cd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -893,6 +893,9 @@ Release 2.9.0 - UNRELEASED HDFS-9414. Refactor reconfiguration of ClientDatanodeProtocol for reusability. (Xiaobing Zhou via Arpit Agarwal) + HDFS-8947. NameNode, DataNode and NFS gateway to support JvmPauseMonitor as + a service. (Sunil G via Stevel) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java index 15e556b676..4b6e6e81af 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java @@ -1244,7 +1244,8 @@ void startDataNode(Configuration conf, registerMXBean(); initDataXceiver(conf); startInfoServer(conf); - pauseMonitor = new JvmPauseMonitor(conf); + pauseMonitor = new JvmPauseMonitor(); + pauseMonitor.init(conf); pauseMonitor.start(); // BlockPoolTokenSecretManager is required to create ipc server. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java index c3f3017952..acb4069c5c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java @@ -691,7 +691,8 @@ protected void initialize(Configuration conf) throws IOException { httpServer.setFSImage(getFSImage()); } - pauseMonitor = new JvmPauseMonitor(conf); + pauseMonitor = new JvmPauseMonitor(); + pauseMonitor.init(conf); pauseMonitor.start(); metrics.getJvmMetrics().setPauseMonitor(pauseMonitor); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/JobHistoryServer.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/JobHistoryServer.java index b5ac91a937..c93c4cffb7 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/JobHistoryServer.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/JobHistoryServer.java @@ -145,7 +145,8 @@ protected void serviceInit(Configuration conf) throws Exception { DefaultMetricsSystem.initialize("JobHistoryServer"); JvmMetrics jm = JvmMetrics.initSingleton("JobHistoryServer", null); - pauseMonitor = new JvmPauseMonitor(getConfig()); + pauseMonitor = new JvmPauseMonitor(); + addService(pauseMonitor); jm.setPauseMonitor(pauseMonitor); super.serviceInit(config); @@ -198,16 +199,12 @@ public static InetSocketAddress getBindAddress(Configuration conf) { @Override protected void serviceStart() throws Exception { - pauseMonitor.start(); super.serviceStart(); } @Override protected void serviceStop() throws Exception { DefaultMetricsSystem.shutdown(); - if (pauseMonitor != null) { - pauseMonitor.stop(); - } super.serviceStop(); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryServer.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryServer.java index 32b2cffec1..c914474303 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryServer.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryServer.java @@ -77,7 +77,6 @@ public void testStartStopServer() throws Exception { Configuration config = new Configuration(); historyServer.init(config); assertEquals(STATE.INITED, historyServer.getServiceState()); - assertEquals(6, historyServer.getServices().size()); HistoryClientService historyService = historyServer.getClientService(); assertNotNull(historyServer.getClientService()); assertEquals(STATE.INITED, historyService.getServiceState()); diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 1fed6a6917..632c88810a 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -63,6 +63,10 @@ Release 2.9.0 - UNRELEASED IMPROVEMENTS + YARN-4072. ApplicationHistoryServer, WebAppProxyServer, NodeManager and + ResourceManager to support JvmPauseMonitor as a service. + (Sunil G via Stevel) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryServer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryServer.java index cda84a22ea..160ad8ddf0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryServer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryServer.java @@ -103,7 +103,8 @@ protected void serviceInit(Configuration conf) throws Exception { DefaultMetricsSystem.initialize("ApplicationHistoryServer"); JvmMetrics jm = JvmMetrics.initSingleton("ApplicationHistoryServer", null); - pauseMonitor = new JvmPauseMonitor(conf); + pauseMonitor = new JvmPauseMonitor(); + addService(pauseMonitor); jm.setPauseMonitor(pauseMonitor); super.serviceInit(conf); } @@ -116,9 +117,6 @@ protected void serviceStart() throws Exception { throw new YarnRuntimeException("Failed to login", ie); } - if (pauseMonitor != null) { - pauseMonitor.start(); - } super.serviceStart(); startWebApp(); } @@ -128,9 +126,6 @@ protected void serviceStop() throws Exception { if (webApp != null) { webApp.stop(); } - if (pauseMonitor != null) { - pauseMonitor.stop(); - } DefaultMetricsSystem.shutdown(); super.serviceStop(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/TestApplicationHistoryServer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/TestApplicationHistoryServer.java index 8dbccaab73..b9d1c5e492 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/TestApplicationHistoryServer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/TestApplicationHistoryServer.java @@ -73,7 +73,6 @@ public void testStartStopServer() throws Exception { historyServer = new ApplicationHistoryServer(); historyServer.init(config); assertEquals(STATE.INITED, historyServer.getServiceState()); - assertEquals(5, historyServer.getServices().size()); ApplicationHistoryClientService historyService = historyServer.getClientService(); assertNotNull(historyServer.getClientService()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java index 04e383f0b1..a9a54118cf 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java @@ -344,7 +344,8 @@ protected void serviceInit(Configuration conf) throws Exception { dispatcher.register(NodeManagerEventType.class, this); addService(dispatcher); - pauseMonitor = new JvmPauseMonitor(conf); + pauseMonitor = new JvmPauseMonitor(); + addService(pauseMonitor); metrics.getJvmMetrics().setPauseMonitor(pauseMonitor); DefaultMetricsSystem.initialize("NodeManager"); @@ -364,7 +365,6 @@ protected void serviceStart() throws Exception { } catch (IOException e) { throw new YarnRuntimeException("Failed NodeManager login", e); } - pauseMonitor.start(); super.serviceStart(); } @@ -376,9 +376,6 @@ protected void serviceStop() throws Exception { try { super.serviceStop(); DefaultMetricsSystem.shutdown(); - if (pauseMonitor != null) { - pauseMonitor.stop(); - } } finally { // YARN-3641: NM's services stop get failed shouldn't block the // release of NMLevelDBStore. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java index 01a1c8f0ff..d392410672 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java @@ -518,7 +518,8 @@ protected void serviceInit(Configuration configuration) throws Exception { DefaultMetricsSystem.initialize("ResourceManager"); JvmMetrics jm = JvmMetrics.initSingleton("ResourceManager", null); - pauseMonitor = new JvmPauseMonitor(conf); + pauseMonitor = new JvmPauseMonitor(); + addService(pauseMonitor); jm.setPauseMonitor(pauseMonitor); // Initialize the Reservation system @@ -574,8 +575,6 @@ protected void serviceStart() throws Exception { // need events to move to further states. rmStore.start(); - pauseMonitor.start(); - if(recoveryEnabled) { try { LOG.info("Recovery started"); @@ -602,10 +601,6 @@ protected void serviceStop() throws Exception { super.serviceStop(); - if (pauseMonitor != null) { - pauseMonitor.stop(); - } - DefaultMetricsSystem.shutdown(); if (rmContext != null) { RMStateStore store = rmContext.getStateStore(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/WebAppProxyServer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/WebAppProxyServer.java index 4c1caa6357..a5dd8f6850 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/WebAppProxyServer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/WebAppProxyServer.java @@ -67,7 +67,8 @@ protected void serviceInit(Configuration conf) throws Exception { DefaultMetricsSystem.initialize("WebAppProxyServer"); JvmMetrics jm = JvmMetrics.initSingleton("WebAppProxyServer", null); - pauseMonitor = new JvmPauseMonitor(conf); + pauseMonitor = new JvmPauseMonitor(); + addService(pauseMonitor); jm.setPauseMonitor(pauseMonitor); super.serviceInit(config); @@ -75,9 +76,6 @@ protected void serviceInit(Configuration conf) throws Exception { @Override protected void serviceStart() throws Exception { - if (pauseMonitor != null) { - pauseMonitor.start(); - } super.serviceStart(); } @@ -85,9 +83,6 @@ protected void serviceStart() throws Exception { protected void serviceStop() throws Exception { super.serviceStop(); DefaultMetricsSystem.shutdown(); - if (pauseMonitor != null) { - pauseMonitor.stop(); - } } /**