diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 86e8a95d8a..f3009a1ae1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -1106,7 +1106,7 @@ public static boolean isAclEnabled(Configuration conf) { public static final String NM_VMEM_PMEM_RATIO = NM_PREFIX + "vmem-pmem-ratio"; public static final float DEFAULT_NM_VMEM_PMEM_RATIO = 2.1f; - + /** Number of Virtual CPU Cores which can be allocated for containers.*/ public static final String NM_VCORES = NM_PREFIX + "resource.cpu-vcores"; public static final int DEFAULT_NM_VCORES = 8; @@ -1259,6 +1259,10 @@ public static boolean isAclEnabled(Configuration conf) { NM_PREFIX + "resource-monitor.interval-ms"; public static final int DEFAULT_NM_RESOURCE_MON_INTERVAL_MS = 3000; + public static final String NM_CONTAINER_MONITOR_ENABLED = + NM_PREFIX + "container-monitor.enabled"; + public static final boolean DEFAULT_NM_CONTAINER_MONITOR_ENABLED = true; + /** How often to monitor containers.*/ public final static String NM_CONTAINER_MON_INTERVAL_MS = NM_PREFIX + "container-monitor.interval-ms"; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerStatusPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerStatusPBImpl.java index 7ec6619340..219cf02289 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerStatusPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerStatusPBImpl.java @@ -95,6 +95,8 @@ public String toString() { sb.append("Capability: ").append(getCapability()).append(", "); sb.append("Diagnostics: ").append(getDiagnostics()).append(", "); sb.append("ExitStatus: ").append(getExitStatus()).append(", "); + sb.append("IP: ").append(getIPs()).append(", "); + sb.append("Host: ").append(getHost()); sb.append("]"); return sb.toString(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index 423b78b489..d6c33a2906 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -1364,6 +1364,12 @@ yarn.nodemanager.resource-calculator.class + + Enable container monitor + yarn.nodemanager.container-monitor.enabled + true + + How often to monitor containers. If not set, the value for yarn.nodemanager.resource-monitor.interval-ms will be used. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java index c456bde361..a04a914b13 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java @@ -171,7 +171,7 @@ protected void serviceInit(Configuration conf) throws Exception { LOG.info("Physical memory check enabled: " + pmemCheckEnabled); LOG.info("Virtual memory check enabled: " + vmemCheckEnabled); - containersMonitorEnabled = isEnabled(); + containersMonitorEnabled = isContainerMonitorEnabled(); LOG.info("ContainersMonitor enabled: " + containersMonitorEnabled); nodeCpuPercentageForYARN = @@ -204,23 +204,24 @@ protected void serviceInit(Configuration conf) throws Exception { super.serviceInit(conf); } - private boolean isEnabled() { + private boolean isContainerMonitorEnabled() { + return conf.getBoolean(YarnConfiguration.NM_CONTAINER_MONITOR_ENABLED, + YarnConfiguration.DEFAULT_NM_CONTAINER_MONITOR_ENABLED); + } + + private boolean isResourceCalculatorAvailable() { if (resourceCalculatorPlugin == null) { - LOG.info("ResourceCalculatorPlugin is unavailable on this system. " - + this.getClass().getName() + " is disabled."); - return false; - } - if (ResourceCalculatorProcessTree.getResourceCalculatorProcessTree("0", processTreeClass, conf) == null) { - LOG.info("ResourceCalculatorProcessTree is unavailable on this system. " - + this.getClass().getName() + " is disabled."); - return false; - } - if (!(isPmemCheckEnabled() || isVmemCheckEnabled())) { - LOG.info("Neither virtual-memory nor physical-memory monitoring is " + - "needed. Not running the monitor-thread"); + LOG.info("ResourceCalculatorPlugin is unavailable on this system. " + this + .getClass().getName() + " is disabled."); + return false; + } + if (ResourceCalculatorProcessTree + .getResourceCalculatorProcessTree("0", processTreeClass, conf) + == null) { + LOG.info("ResourceCalculatorProcessTree is unavailable on this system. " + + this.getClass().getName() + " is disabled."); return false; } - return true; } @@ -462,7 +463,7 @@ public void run() { } // End of initializing any uninitialized processTrees - if (pId == null) { + if (pId == null || !isResourceCalculatorAvailable()) { continue; // processTree cannot be tracked } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java index 1b4e3b7d77..0f1c6f5e3b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java @@ -29,15 +29,18 @@ import java.io.IOException; import java.io.PrintWriter; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.regex.Pattern; +import com.google.common.base.Supplier; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.UnsupportedFileSystemException; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest; import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest; import org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest; @@ -181,6 +184,42 @@ public void testProcessTreeLimits() throws IOException { } } + // Test that even if VMEM_PMEM_CHECK is not enabled, container monitor will + // run. + @Test + public void testContainerMonitor() throws Exception { + conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false); + conf.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, false); + containerManager.start(); + ContainerLaunchContext context = + recordFactory.newRecordInstance(ContainerLaunchContext.class); + context.setCommands(Arrays.asList("sleep 6")); + ContainerId cId = createContainerId(1705); + + // start the container + StartContainerRequest scRequest = StartContainerRequest.newInstance(context, + createContainerToken(cId, DUMMY_RM_IDENTIFIER, this.context.getNodeId(), + user, this.context.getContainerTokenSecretManager())); + StartContainersRequest allRequests = + StartContainersRequest.newInstance(Arrays.asList(scRequest)); + containerManager.startContainers(allRequests); + BaseContainerManagerTest + .waitForContainerState(containerManager, cId, ContainerState.RUNNING); + Thread.sleep(2000); + GenericTestUtils.waitFor(new Supplier() { + public Boolean get() { + try { + return containerManager.getContainerStatuses( + GetContainerStatusesRequest.newInstance(Arrays.asList(cId))) + .getContainerStatuses().get(0).getHost() != null; + } catch (Exception e) { + return false; + } + } + + }, 300, 10000); + } + @Test public void testContainerKillOnMemoryOverflow() throws IOException, InterruptedException, YarnException {