YARN-4019. Add JvmPauseMonitor to ResourceManager and NodeManager. Contributed by Robert Kanter.

This commit is contained in:
Junping Du 2015-08-06 06:49:45 -07:00
parent 154c9d2e42
commit cfee02b3bd
4 changed files with 35 additions and 5 deletions

View File

@ -374,6 +374,9 @@ Release 2.8.0 - UNRELEASED
YARN-3961. Expose pending, running and reserved containers of a queue in REST
api and yarn top (adhoot via asuresh)
YARN-4019. Add JvmPauseMonitor to ResourceManager and NodeManager. (Robert Kanter
via junping_du)
OPTIMIZATIONS
YARN-3339. TestDockerContainerExecutor should pull a single image and not

View File

@ -40,6 +40,7 @@
import org.apache.hadoop.service.CompositeService;
import org.apache.hadoop.util.ExitUtil;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.JvmPauseMonitor;
import org.apache.hadoop.util.NodeHealthScriptRunner;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.ShutdownHookManager;
@ -83,6 +84,7 @@ public class NodeManager extends CompositeService
private static final Log LOG = LogFactory.getLog(NodeManager.class);
private static long nmStartupTime = System.currentTimeMillis();
protected final NodeManagerMetrics metrics = NodeManagerMetrics.create();
private JvmPauseMonitor pauseMonitor;
private ApplicationACLsManager aclsManager;
private NodeHealthCheckerService nodeHealthChecker;
private NodeLabelsProvider nodeLabelsProvider;
@ -308,6 +310,9 @@ protected void serviceInit(Configuration conf) throws Exception {
dispatcher.register(NodeManagerEventType.class, this);
addService(dispatcher);
pauseMonitor = new JvmPauseMonitor(conf);
metrics.getJvmMetrics().setPauseMonitor(pauseMonitor);
DefaultMetricsSystem.initialize("NodeManager");
// StatusUpdater should be added last so that it get started last
@ -325,6 +330,7 @@ protected void serviceStart() throws Exception {
} catch (IOException e) {
throw new YarnRuntimeException("Failed NodeManager login", e);
}
pauseMonitor.start();
super.serviceStart();
}
@ -336,6 +342,9 @@ protected void serviceStop() throws Exception {
try {
super.serviceStop();
DefaultMetricsSystem.shutdown();
if (pauseMonitor != null) {
pauseMonitor.stop();
}
} finally {
// YARN-3641: NM's services stop get failed shouldn't block the
// release of NMLevelDBStore.

View File

@ -57,17 +57,26 @@ public class NodeManagerMetrics {
@Metric("Disk utilization % on good log dirs")
MutableGaugeInt goodLogDirsDiskUtilizationPerc;
private JvmMetrics jvmMetrics = null;
private long allocatedMB;
private long availableMB;
public NodeManagerMetrics(JvmMetrics jvmMetrics) {
this.jvmMetrics = jvmMetrics;
}
public static NodeManagerMetrics create() {
return create(DefaultMetricsSystem.instance());
}
static NodeManagerMetrics create(MetricsSystem ms) {
JvmMetrics.create("NodeManager", null, ms);
return ms.register(new NodeManagerMetrics());
JvmMetrics jm = JvmMetrics.create("NodeManager", null, ms);
return ms.register(new NodeManagerMetrics(jm));
}
public JvmMetrics getJvmMetrics() {
return jvmMetrics;
}
// Potential instrumentation interface methods

View File

@ -39,6 +39,7 @@
import org.apache.hadoop.service.Service;
import org.apache.hadoop.util.ExitUtil;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.JvmPauseMonitor;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.ShutdownHookManager;
import org.apache.hadoop.util.StringUtils;
@ -157,6 +158,7 @@ public class ResourceManager extends CompositeService implements Recoverable {
private WebApp webApp;
private AppReportFetcher fetcher = null;
protected ResourceTrackerService resourceTracker;
private JvmPauseMonitor pauseMonitor;
@VisibleForTesting
protected String webAppAddress;
@ -511,7 +513,9 @@ protected void serviceInit(Configuration configuration) throws Exception {
rmContext.setResourceTrackerService(resourceTracker);
DefaultMetricsSystem.initialize("ResourceManager");
JvmMetrics.initSingleton("ResourceManager", null);
JvmMetrics jm = JvmMetrics.initSingleton("ResourceManager", null);
pauseMonitor = new JvmPauseMonitor(conf);
jm.setPauseMonitor(pauseMonitor);
// Initialize the Reservation system
if (conf.getBoolean(YarnConfiguration.RM_RESERVATION_SYSTEM_ENABLE,
@ -566,6 +570,8 @@ protected void serviceStart() throws Exception {
// need events to move to further states.
rmStore.start();
pauseMonitor.start();
if(recoveryEnabled) {
try {
LOG.info("Recovery started");
@ -591,6 +597,9 @@ protected void serviceStart() throws Exception {
protected void serviceStop() throws Exception {
DefaultMetricsSystem.shutdown();
if (pauseMonitor != null) {
pauseMonitor.stop();
}
if (rmContext != null) {
RMStateStore store = rmContext.getStateStore();