YARN-5662. Provide an option to enable ContainerMonitor. Contributed by Jian He.

This commit is contained in:
Varun Vasudev 2016-09-28 15:18:18 +05:30
parent 03f519a757
commit bc2656f09f
5 changed files with 69 additions and 17 deletions

View File

@ -1106,7 +1106,7 @@ public static boolean isAclEnabled(Configuration conf) {
public static final String NM_VMEM_PMEM_RATIO =
NM_PREFIX + "vmem-pmem-ratio";
public static final float DEFAULT_NM_VMEM_PMEM_RATIO = 2.1f;
/** Number of Virtual CPU Cores which can be allocated for containers.*/
public static final String NM_VCORES = NM_PREFIX + "resource.cpu-vcores";
public static final int DEFAULT_NM_VCORES = 8;
@ -1259,6 +1259,10 @@ public static boolean isAclEnabled(Configuration conf) {
NM_PREFIX + "resource-monitor.interval-ms";
public static final int DEFAULT_NM_RESOURCE_MON_INTERVAL_MS = 3000;
public static final String NM_CONTAINER_MONITOR_ENABLED =
NM_PREFIX + "container-monitor.enabled";
public static final boolean DEFAULT_NM_CONTAINER_MONITOR_ENABLED = true;
/** How often to monitor containers.*/
public final static String NM_CONTAINER_MON_INTERVAL_MS =
NM_PREFIX + "container-monitor.interval-ms";

View File

@ -95,6 +95,8 @@ public String toString() {
sb.append("Capability: ").append(getCapability()).append(", ");
sb.append("Diagnostics: ").append(getDiagnostics()).append(", ");
sb.append("ExitStatus: ").append(getExitStatus()).append(", ");
sb.append("IP: ").append(getIPs()).append(", ");
sb.append("Host: ").append(getHost());
sb.append("]");
return sb.toString();
}

View File

@ -1364,6 +1364,12 @@
<name>yarn.nodemanager.resource-calculator.class</name>
</property>
<property>
<description>Enable container monitor</description>
<name>yarn.nodemanager.container-monitor.enabled</name>
<value>true</value>
</property>
<property>
<description>How often to monitor containers. If not set, the value for
yarn.nodemanager.resource-monitor.interval-ms will be used.</description>

View File

@ -171,7 +171,7 @@ protected void serviceInit(Configuration conf) throws Exception {
LOG.info("Physical memory check enabled: " + pmemCheckEnabled);
LOG.info("Virtual memory check enabled: " + vmemCheckEnabled);
containersMonitorEnabled = isEnabled();
containersMonitorEnabled = isContainerMonitorEnabled();
LOG.info("ContainersMonitor enabled: " + containersMonitorEnabled);
nodeCpuPercentageForYARN =
@ -204,23 +204,24 @@ protected void serviceInit(Configuration conf) throws Exception {
super.serviceInit(conf);
}
private boolean isEnabled() {
private boolean isContainerMonitorEnabled() {
return conf.getBoolean(YarnConfiguration.NM_CONTAINER_MONITOR_ENABLED,
YarnConfiguration.DEFAULT_NM_CONTAINER_MONITOR_ENABLED);
}
private boolean isResourceCalculatorAvailable() {
if (resourceCalculatorPlugin == null) {
LOG.info("ResourceCalculatorPlugin is unavailable on this system. "
+ this.getClass().getName() + " is disabled.");
return false;
}
if (ResourceCalculatorProcessTree.getResourceCalculatorProcessTree("0", processTreeClass, conf) == null) {
LOG.info("ResourceCalculatorProcessTree is unavailable on this system. "
+ this.getClass().getName() + " is disabled.");
return false;
}
if (!(isPmemCheckEnabled() || isVmemCheckEnabled())) {
LOG.info("Neither virtual-memory nor physical-memory monitoring is " +
"needed. Not running the monitor-thread");
LOG.info("ResourceCalculatorPlugin is unavailable on this system. " + this
.getClass().getName() + " is disabled.");
return false;
}
if (ResourceCalculatorProcessTree
.getResourceCalculatorProcessTree("0", processTreeClass, conf)
== null) {
LOG.info("ResourceCalculatorProcessTree is unavailable on this system. "
+ this.getClass().getName() + " is disabled.");
return false;
}
return true;
}
@ -462,7 +463,7 @@ public void run() {
}
// End of initializing any uninitialized processTrees
if (pId == null) {
if (pId == null || !isResourceCalculatorAvailable()) {
continue; // processTree cannot be tracked
}

View File

@ -29,15 +29,18 @@
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import com.google.common.base.Supplier;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.UnsupportedFileSystemException;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest;
import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
import org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest;
@ -181,6 +184,42 @@ public void testProcessTreeLimits() throws IOException {
}
}
// Test that even if VMEM_PMEM_CHECK is not enabled, container monitor will
// run.
@Test
public void testContainerMonitor() throws Exception {
conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false);
conf.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, false);
containerManager.start();
ContainerLaunchContext context =
recordFactory.newRecordInstance(ContainerLaunchContext.class);
context.setCommands(Arrays.asList("sleep 6"));
ContainerId cId = createContainerId(1705);
// start the container
StartContainerRequest scRequest = StartContainerRequest.newInstance(context,
createContainerToken(cId, DUMMY_RM_IDENTIFIER, this.context.getNodeId(),
user, this.context.getContainerTokenSecretManager()));
StartContainersRequest allRequests =
StartContainersRequest.newInstance(Arrays.asList(scRequest));
containerManager.startContainers(allRequests);
BaseContainerManagerTest
.waitForContainerState(containerManager, cId, ContainerState.RUNNING);
Thread.sleep(2000);
GenericTestUtils.waitFor(new Supplier<Boolean>() {
public Boolean get() {
try {
return containerManager.getContainerStatuses(
GetContainerStatusesRequest.newInstance(Arrays.asList(cId)))
.getContainerStatuses().get(0).getHost() != null;
} catch (Exception e) {
return false;
}
}
}, 300, 10000);
}
@Test
public void testContainerKillOnMemoryOverflow() throws IOException,
InterruptedException, YarnException {