HADOOP-12321. Make JvmPauseMonitor an AbstractService. (Sunil G via Stevel) [includes HDFS-8947 MAPREDUCE-6462 and YARN-4072]

This commit is contained in:
Steve Loughran 2015-12-06 17:42:56 +00:00
parent 742632e346
commit 65f395226b
15 changed files with 123 additions and 59 deletions

View File

@ -635,6 +635,9 @@ Release 2.9.0 - UNRELEASED
IMPROVEMENTS
HADOOP-12321. Make JvmPauseMonitor an AbstractService.
(Sunil G via Stevel)
OPTIMIZATIONS
BUG FIXES

View File

@ -28,6 +28,7 @@
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.service.AbstractService;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
@ -43,21 +44,21 @@
* detected, the thread logs a message.
*/
@InterfaceAudience.Private
public class JvmPauseMonitor {
public class JvmPauseMonitor extends AbstractService {
private static final Log LOG = LogFactory.getLog(
JvmPauseMonitor.class);
/** The target sleep time */
private static final long SLEEP_INTERVAL_MS = 500;
/** log WARN if we detect a pause longer than this threshold */
private final long warnThresholdMs;
private long warnThresholdMs;
private static final String WARN_THRESHOLD_KEY =
"jvm.pause.warn-threshold.ms";
private static final long WARN_THRESHOLD_DEFAULT = 10000;
/** log INFO if we detect a pause longer than this threshold */
private final long infoThresholdMs;
private long infoThresholdMs;
private static final String INFO_THRESHOLD_KEY =
"jvm.pause.info-threshold.ms";
private static final long INFO_THRESHOLD_DEFAULT = 1000;
@ -65,29 +66,32 @@ public class JvmPauseMonitor {
private long numGcWarnThresholdExceeded = 0;
private long numGcInfoThresholdExceeded = 0;
private long totalGcExtraSleepTime = 0;
private Thread monitorThread;
private volatile boolean shouldRun = true;
public JvmPauseMonitor(Configuration conf) {
public JvmPauseMonitor() {
super(JvmPauseMonitor.class.getName());
}
@Override
protected void serviceInit(Configuration conf) throws Exception {
this.warnThresholdMs = conf.getLong(WARN_THRESHOLD_KEY, WARN_THRESHOLD_DEFAULT);
this.infoThresholdMs = conf.getLong(INFO_THRESHOLD_KEY, INFO_THRESHOLD_DEFAULT);
super.serviceInit(conf);
}
public void start() {
Preconditions.checkState(monitorThread == null,
"Already started");
@Override
protected void serviceStart() throws Exception {
monitorThread = new Daemon(new Monitor());
if (shouldRun) {
monitorThread.start();
} else {
LOG.warn("stop() was called before start() completed");
}
monitorThread.start();
super.serviceStart();
}
public void stop() {
@Override
protected void serviceStop() throws Exception {
shouldRun = false;
if (isStarted()) {
if (monitorThread != null) {
monitorThread.interrupt();
try {
monitorThread.join();
@ -95,12 +99,13 @@ public void stop() {
Thread.currentThread().interrupt();
}
}
super.serviceStop();
}
public boolean isStarted() {
return monitorThread != null;
}
public long getNumGcWarnThreadholdExceeded() {
return numGcWarnThresholdExceeded;
}
@ -212,8 +217,11 @@ public void run() {
* with a 1GB heap will very quickly go into "GC hell" and result in
* log messages about the GC pauses.
*/
@SuppressWarnings("resource")
public static void main(String []args) throws Exception {
new JvmPauseMonitor(new Configuration()).start();
JvmPauseMonitor monitor = new JvmPauseMonitor();
monitor.init(new Configuration());
monitor.start();
List<String> list = Lists.newArrayList();
int i = 0;
while (true) {

View File

@ -18,7 +18,11 @@
package org.apache.hadoop.metrics2.source;
import org.junit.After;
import org.junit.Assert;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.Timeout;
import static org.mockito.Mockito.*;
import static org.apache.hadoop.test.MetricsAsserts.*;
@ -26,6 +30,9 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.metrics2.MetricsCollector;
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
import org.apache.hadoop.service.ServiceOperations;
import org.apache.hadoop.service.ServiceStateException;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.util.JvmPauseMonitor;
import static org.apache.hadoop.metrics2.source.JvmMetricsInfo.*;
@ -33,8 +40,23 @@
public class TestJvmMetrics {
@Test public void testPresence() {
JvmPauseMonitor pauseMonitor = new JvmPauseMonitor(new Configuration());
@Rule
public Timeout timeout = new Timeout(30000);
private JvmPauseMonitor pauseMonitor;
/**
* Robust shutdown of the pause monitor if it hasn't been stopped already.
*/
@After
public void teardown() {
ServiceOperations.stop(pauseMonitor);
}
@Test
public void testPresence() {
pauseMonitor = new JvmPauseMonitor();
pauseMonitor.init(new Configuration());
pauseMonitor.start();
JvmMetrics jvmMetrics = new JvmMetrics("test", "test");
jvmMetrics.setPauseMonitor(pauseMonitor);
MetricsRecordBuilder rb = getMetrics(jvmMetrics);
@ -54,4 +76,48 @@ else if (info.name().startsWith("Log"))
verify(rb).addCounter(eq(info), anyLong());
}
}
@Test
public void testDoubleStop() throws Throwable {
pauseMonitor = new JvmPauseMonitor();
pauseMonitor.init(new Configuration());
pauseMonitor.start();
pauseMonitor.stop();
pauseMonitor.stop();
}
@Test
public void testDoubleStart() throws Throwable {
pauseMonitor = new JvmPauseMonitor();
pauseMonitor.init(new Configuration());
pauseMonitor.start();
pauseMonitor.start();
pauseMonitor.stop();
}
@Test
public void testStopBeforeStart() throws Throwable {
pauseMonitor = new JvmPauseMonitor();
try {
pauseMonitor.init(new Configuration());
pauseMonitor.stop();
pauseMonitor.start();
Assert.fail("Expected an exception, got " + pauseMonitor);
} catch (ServiceStateException e) {
GenericTestUtils.assertExceptionContains("cannot enter state", e);
}
}
@Test
public void testStopBeforeInit() throws Throwable {
pauseMonitor = new JvmPauseMonitor();
try {
pauseMonitor.stop();
pauseMonitor.init(new Configuration());
Assert.fail("Expected an exception, got " + pauseMonitor);
} catch (ServiceStateException e) {
GenericTestUtils.assertExceptionContains("cannot enter state", e);
}
}
}

View File

@ -243,7 +243,8 @@ private void clearDirectory(String writeDumpDir) throws IOException {
@Override
public void startDaemons() {
if (pauseMonitor == null) {
pauseMonitor = new JvmPauseMonitor(config);
pauseMonitor = new JvmPauseMonitor();
pauseMonitor.init(config);
pauseMonitor.start();
metrics.getJvmMetrics().setPauseMonitor(pauseMonitor);
}

View File

@ -893,6 +893,9 @@ Release 2.9.0 - UNRELEASED
HDFS-9414. Refactor reconfiguration of ClientDatanodeProtocol for
reusability. (Xiaobing Zhou via Arpit Agarwal)
HDFS-8947. NameNode, DataNode and NFS gateway to support JvmPauseMonitor as
a service. (Sunil G via Stevel)
OPTIMIZATIONS
BUG FIXES

View File

@ -1244,7 +1244,8 @@ void startDataNode(Configuration conf,
registerMXBean();
initDataXceiver(conf);
startInfoServer(conf);
pauseMonitor = new JvmPauseMonitor(conf);
pauseMonitor = new JvmPauseMonitor();
pauseMonitor.init(conf);
pauseMonitor.start();
// BlockPoolTokenSecretManager is required to create ipc server.

View File

@ -691,7 +691,8 @@ protected void initialize(Configuration conf) throws IOException {
httpServer.setFSImage(getFSImage());
}
pauseMonitor = new JvmPauseMonitor(conf);
pauseMonitor = new JvmPauseMonitor();
pauseMonitor.init(conf);
pauseMonitor.start();
metrics.getJvmMetrics().setPauseMonitor(pauseMonitor);

View File

@ -145,7 +145,8 @@ protected void serviceInit(Configuration conf) throws Exception {
DefaultMetricsSystem.initialize("JobHistoryServer");
JvmMetrics jm = JvmMetrics.initSingleton("JobHistoryServer", null);
pauseMonitor = new JvmPauseMonitor(getConfig());
pauseMonitor = new JvmPauseMonitor();
addService(pauseMonitor);
jm.setPauseMonitor(pauseMonitor);
super.serviceInit(config);
@ -198,16 +199,12 @@ public static InetSocketAddress getBindAddress(Configuration conf) {
@Override
protected void serviceStart() throws Exception {
pauseMonitor.start();
super.serviceStart();
}
@Override
protected void serviceStop() throws Exception {
DefaultMetricsSystem.shutdown();
if (pauseMonitor != null) {
pauseMonitor.stop();
}
super.serviceStop();
}

View File

@ -77,7 +77,6 @@ public void testStartStopServer() throws Exception {
Configuration config = new Configuration();
historyServer.init(config);
assertEquals(STATE.INITED, historyServer.getServiceState());
assertEquals(6, historyServer.getServices().size());
HistoryClientService historyService = historyServer.getClientService();
assertNotNull(historyServer.getClientService());
assertEquals(STATE.INITED, historyService.getServiceState());

View File

@ -63,6 +63,10 @@ Release 2.9.0 - UNRELEASED
IMPROVEMENTS
YARN-4072. ApplicationHistoryServer, WebAppProxyServer, NodeManager and
ResourceManager to support JvmPauseMonitor as a service.
(Sunil G via Stevel)
OPTIMIZATIONS
BUG FIXES

View File

@ -103,7 +103,8 @@ protected void serviceInit(Configuration conf) throws Exception {
DefaultMetricsSystem.initialize("ApplicationHistoryServer");
JvmMetrics jm = JvmMetrics.initSingleton("ApplicationHistoryServer", null);
pauseMonitor = new JvmPauseMonitor(conf);
pauseMonitor = new JvmPauseMonitor();
addService(pauseMonitor);
jm.setPauseMonitor(pauseMonitor);
super.serviceInit(conf);
}
@ -116,9 +117,6 @@ protected void serviceStart() throws Exception {
throw new YarnRuntimeException("Failed to login", ie);
}
if (pauseMonitor != null) {
pauseMonitor.start();
}
super.serviceStart();
startWebApp();
}
@ -128,9 +126,6 @@ protected void serviceStop() throws Exception {
if (webApp != null) {
webApp.stop();
}
if (pauseMonitor != null) {
pauseMonitor.stop();
}
DefaultMetricsSystem.shutdown();
super.serviceStop();
}

View File

@ -73,7 +73,6 @@ public void testStartStopServer() throws Exception {
historyServer = new ApplicationHistoryServer();
historyServer.init(config);
assertEquals(STATE.INITED, historyServer.getServiceState());
assertEquals(5, historyServer.getServices().size());
ApplicationHistoryClientService historyService =
historyServer.getClientService();
assertNotNull(historyServer.getClientService());

View File

@ -344,7 +344,8 @@ protected void serviceInit(Configuration conf) throws Exception {
dispatcher.register(NodeManagerEventType.class, this);
addService(dispatcher);
pauseMonitor = new JvmPauseMonitor(conf);
pauseMonitor = new JvmPauseMonitor();
addService(pauseMonitor);
metrics.getJvmMetrics().setPauseMonitor(pauseMonitor);
DefaultMetricsSystem.initialize("NodeManager");
@ -364,7 +365,6 @@ protected void serviceStart() throws Exception {
} catch (IOException e) {
throw new YarnRuntimeException("Failed NodeManager login", e);
}
pauseMonitor.start();
super.serviceStart();
}
@ -376,9 +376,6 @@ protected void serviceStop() throws Exception {
try {
super.serviceStop();
DefaultMetricsSystem.shutdown();
if (pauseMonitor != null) {
pauseMonitor.stop();
}
} finally {
// YARN-3641: NM's services stop get failed shouldn't block the
// release of NMLevelDBStore.

View File

@ -518,7 +518,8 @@ protected void serviceInit(Configuration configuration) throws Exception {
DefaultMetricsSystem.initialize("ResourceManager");
JvmMetrics jm = JvmMetrics.initSingleton("ResourceManager", null);
pauseMonitor = new JvmPauseMonitor(conf);
pauseMonitor = new JvmPauseMonitor();
addService(pauseMonitor);
jm.setPauseMonitor(pauseMonitor);
// Initialize the Reservation system
@ -574,8 +575,6 @@ protected void serviceStart() throws Exception {
// need events to move to further states.
rmStore.start();
pauseMonitor.start();
if(recoveryEnabled) {
try {
LOG.info("Recovery started");
@ -602,10 +601,6 @@ protected void serviceStop() throws Exception {
super.serviceStop();
if (pauseMonitor != null) {
pauseMonitor.stop();
}
DefaultMetricsSystem.shutdown();
if (rmContext != null) {
RMStateStore store = rmContext.getStateStore();

View File

@ -67,7 +67,8 @@ protected void serviceInit(Configuration conf) throws Exception {
DefaultMetricsSystem.initialize("WebAppProxyServer");
JvmMetrics jm = JvmMetrics.initSingleton("WebAppProxyServer", null);
pauseMonitor = new JvmPauseMonitor(conf);
pauseMonitor = new JvmPauseMonitor();
addService(pauseMonitor);
jm.setPauseMonitor(pauseMonitor);
super.serviceInit(config);
@ -75,9 +76,6 @@ protected void serviceInit(Configuration conf) throws Exception {
@Override
protected void serviceStart() throws Exception {
if (pauseMonitor != null) {
pauseMonitor.start();
}
super.serviceStart();
}
@ -85,9 +83,6 @@ protected void serviceStart() throws Exception {
protected void serviceStop() throws Exception {
super.serviceStop();
DefaultMetricsSystem.shutdown();
if (pauseMonitor != null) {
pauseMonitor.stop();
}
}
/**