From f76e3c3db789dd6866fa0fef8e014cbfe8c8f80d Mon Sep 17 00:00:00 2001 From: Robert Kanter Date: Thu, 25 Oct 2018 10:43:36 -0700 Subject: [PATCH] YARN-8930. CGroup-based strict container memory enforcement does not work with CGroupElasticMemoryController (haibochen via rkanter) --- .../CGroupsMemoryResourceHandlerImpl.java | 25 ---- .../resources/MemoryResourceHandler.java | 10 -- .../monitor/ContainersMonitorImpl.java | 116 ++++++++---------- .../TestCGroupsMemoryResourceHandlerImpl.java | 44 ------- .../site/markdown/NodeManagerCGroupsMemory.md | 12 +- 5 files changed, 60 insertions(+), 147 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsMemoryResourceHandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsMemoryResourceHandlerImpl.java index 053b796b5f..ee5ce2acdf 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsMemoryResourceHandlerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsMemoryResourceHandlerImpl.java @@ -34,9 +34,6 @@ import java.io.File; import java.util.ArrayList; import java.util.List; -import java.util.Optional; - -import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler.CGROUP_PARAM_MEMORY_OOM_CONTROL; /** * Handler class to handle the memory controller. YARN already ships a @@ -174,26 +171,4 @@ public List postComplete(ContainerId containerId) public List teardown() throws ResourceHandlerException { return null; } - - @Override - public Optional isUnderOOM(ContainerId containerId) { - try { - String status = cGroupsHandler.getCGroupParam( - CGroupsHandler.CGroupController.MEMORY, - containerId.toString(), - CGROUP_PARAM_MEMORY_OOM_CONTROL); - if (LOG.isDebugEnabled()) { - LOG.debug("cgroups OOM status for " + containerId + ": " + status); - } - if (status.contains(CGroupsHandler.UNDER_OOM)) { - LOG.warn("Container " + containerId + " under OOM based on cgroups."); - return Optional.of(true); - } else { - return Optional.of(false); - } - } catch (ResourceHandlerException e) { - LOG.warn("Could not read cgroups" + containerId, e); - } - return Optional.empty(); - } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/MemoryResourceHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/MemoryResourceHandler.java index 1729fc17a0..013a49fbb4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/MemoryResourceHandler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/MemoryResourceHandler.java @@ -20,18 +20,8 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.yarn.api.records.ContainerId; - -import java.util.Optional; @InterfaceAudience.Private @InterfaceStability.Unstable public interface MemoryResourceHandler extends ResourceHandler { - /** - * check whether a container is under OOM. - * @param containerId the id of the container - * @return empty if the status is unknown, true is the container is under oom, - * false otherwise - */ - Optional isUnderOOM(ContainerId containerId); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java index e5726c8e79..b9e2c68abe 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java @@ -22,7 +22,6 @@ import com.google.common.base.Preconditions; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupElasticMemoryController; -import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.MemoryResourceHandler; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.slf4j.Logger; @@ -52,7 +51,6 @@ import java.util.Arrays; import java.util.Map; import java.util.Map.Entry; -import java.util.Optional; import java.util.concurrent.ConcurrentHashMap; /** @@ -699,75 +697,61 @@ private void checkLimit(ContainerId containerId, String pId, ProcessTreeInfo ptInfo, long currentVmemUsage, long currentPmemUsage) { - Optional isMemoryOverLimit = Optional.empty(); + if (strictMemoryEnforcement && !elasticMemoryEnforcement) { + // When cgroup-based strict memory enforcement is used alone without + // elastic memory control, the oom-kill would take care of it. + // However, when elastic memory control is also enabled, the oom killer + // would be disabled at the root yarn container cgroup level (all child + // cgroups would inherit that setting). Hence, we fall back to the + // polling-based mechanism. + return; + } + boolean isMemoryOverLimit = false; String msg = ""; int containerExitStatus = ContainerExitStatus.INVALID; - if (strictMemoryEnforcement && elasticMemoryEnforcement) { - // Both elastic memory control and strict memory control are enabled - // through cgroups. A container will be frozen by the elastic memory - // control mechanism if it exceeds its request, so we check for this - // here and kill it. Otherwise, the container will not be killed if - // the node never exceeds its limit and the procfs-based - // memory accounting is different from the cgroup-based accounting. - - MemoryResourceHandler handler = - ResourceHandlerModule.getMemoryResourceHandler(); - if (handler != null) { - isMemoryOverLimit = handler.isUnderOOM(containerId); - containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_PMEM; - msg = containerId + " is under oom because it exceeded its" + - " physical memory limit"; - } - } else if (strictMemoryEnforcement || elasticMemoryEnforcement) { - // if cgroup-based memory control is enabled - isMemoryOverLimit = Optional.of(false); + long vmemLimit = ptInfo.getVmemLimit(); + long pmemLimit = ptInfo.getPmemLimit(); + // as processes begin with an age 1, we want to see if there + // are processes more than 1 iteration old. + long curMemUsageOfAgedProcesses = pTree.getVirtualMemorySize(1); + long curRssMemUsageOfAgedProcesses = pTree.getRssMemorySize(1); + if (isVmemCheckEnabled() + && isProcessTreeOverLimit(containerId.toString(), + currentVmemUsage, curMemUsageOfAgedProcesses, vmemLimit)) { + // The current usage (age=0) is always higher than the aged usage. We + // do not show the aged size in the message, base the delta on the + // current usage + long delta = currentVmemUsage - vmemLimit; + // Container (the root process) is still alive and overflowing + // memory. + // Dump the process-tree and then clean it up. + msg = formatErrorMessage("virtual", + formatUsageString(currentVmemUsage, vmemLimit, + currentPmemUsage, pmemLimit), + pId, containerId, pTree, delta); + isMemoryOverLimit = true; + containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_VMEM; + } else if (isPmemCheckEnabled() + && isProcessTreeOverLimit(containerId.toString(), + currentPmemUsage, curRssMemUsageOfAgedProcesses, + pmemLimit)) { + // The current usage (age=0) is always higher than the aged usage. We + // do not show the aged size in the message, base the delta on the + // current usage + long delta = currentPmemUsage - pmemLimit; + // Container (the root process) is still alive and overflowing + // memory. + // Dump the process-tree and then clean it up. + msg = formatErrorMessage("physical", + formatUsageString(currentVmemUsage, vmemLimit, + currentPmemUsage, pmemLimit), + pId, containerId, pTree, delta); + isMemoryOverLimit = true; + containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_PMEM; } - if (!isMemoryOverLimit.isPresent()) { - long vmemLimit = ptInfo.getVmemLimit(); - long pmemLimit = ptInfo.getPmemLimit(); - // as processes begin with an age 1, we want to see if there - // are processes more than 1 iteration old. - long curMemUsageOfAgedProcesses = pTree.getVirtualMemorySize(1); - long curRssMemUsageOfAgedProcesses = pTree.getRssMemorySize(1); - if (isVmemCheckEnabled() - && isProcessTreeOverLimit(containerId.toString(), - currentVmemUsage, curMemUsageOfAgedProcesses, vmemLimit)) { - // The current usage (age=0) is always higher than the aged usage. We - // do not show the aged size in the message, base the delta on the - // current usage - long delta = currentVmemUsage - vmemLimit; - // Container (the root process) is still alive and overflowing - // memory. - // Dump the process-tree and then clean it up. - msg = formatErrorMessage("virtual", - formatUsageString(currentVmemUsage, vmemLimit, - currentPmemUsage, pmemLimit), - pId, containerId, pTree, delta); - isMemoryOverLimit = Optional.of(true); - containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_VMEM; - } else if (isPmemCheckEnabled() - && isProcessTreeOverLimit(containerId.toString(), - currentPmemUsage, curRssMemUsageOfAgedProcesses, - pmemLimit)) { - // The current usage (age=0) is always higher than the aged usage. We - // do not show the aged size in the message, base the delta on the - // current usage - long delta = currentPmemUsage - pmemLimit; - // Container (the root process) is still alive and overflowing - // memory. - // Dump the process-tree and then clean it up. - msg = formatErrorMessage("physical", - formatUsageString(currentVmemUsage, vmemLimit, - currentPmemUsage, pmemLimit), - pId, containerId, pTree, delta); - isMemoryOverLimit = Optional.of(true); - containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_PMEM; - } - } - - if (isMemoryOverLimit.isPresent() && isMemoryOverLimit.get()) { + if (isMemoryOverLimit) { // Virtual or physical memory over limit. Fail the container and // remove // the corresponding process tree diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsMemoryResourceHandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsMemoryResourceHandlerImpl.java index 4d3e7e6e1d..0d001bc21f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsMemoryResourceHandlerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsMemoryResourceHandlerImpl.java @@ -31,9 +31,6 @@ import org.junit.Assert; import java.util.List; -import java.util.Optional; - -import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler.CGROUP_PARAM_MEMORY_OOM_CONTROL; import static org.mockito.Mockito.*; /** @@ -244,45 +241,4 @@ public void testOpportunistic() throws Exception { .updateCGroupParam(CGroupsHandler.CGroupController.MEMORY, id, CGroupsHandler.CGROUP_PARAM_MEMORY_HARD_LIMIT_BYTES, "1024M"); } - - @Test - public void testContainerUnderOom() throws Exception { - Configuration conf = new YarnConfiguration(); - conf.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, false); - conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false); - - cGroupsMemoryResourceHandler.bootstrap(conf); - - ContainerId containerId = mock(ContainerId.class); - when(containerId.toString()).thenReturn("container_01_01"); - - when(mockCGroupsHandler.getCGroupParam( - CGroupsHandler.CGroupController.MEMORY, - containerId.toString(), - CGROUP_PARAM_MEMORY_OOM_CONTROL)).thenReturn(CGroupsHandler.UNDER_OOM); - Optional outOfOom = - cGroupsMemoryResourceHandler.isUnderOOM(containerId); - Assert.assertTrue("The container should be reported to run under oom", - outOfOom.isPresent() && outOfOom.get().equals(true)); - - when(mockCGroupsHandler.getCGroupParam( - CGroupsHandler.CGroupController.MEMORY, - containerId.toString(), - CGROUP_PARAM_MEMORY_OOM_CONTROL)).thenReturn(""); - outOfOom = cGroupsMemoryResourceHandler.isUnderOOM(containerId); - Assert.assertTrue( - "The container should not be reported to run under oom", - outOfOom.isPresent() && outOfOom.get().equals(false)); - - when(mockCGroupsHandler.getCGroupParam( - CGroupsHandler.CGroupController.MEMORY, - containerId.toString(), - CGROUP_PARAM_MEMORY_OOM_CONTROL)). - thenThrow(new ResourceHandlerException()); - outOfOom = cGroupsMemoryResourceHandler.isUnderOOM(containerId); - Assert.assertFalse( - "No report of the oom status should be available.", - outOfOom.isPresent()); - - } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/NodeManagerCGroupsMemory.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/NodeManagerCGroupsMemory.md index ec9323453b..d1988a5048 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/NodeManagerCGroupsMemory.md +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/NodeManagerCGroupsMemory.md @@ -20,8 +20,6 @@ YARN has multiple features to enforce container memory limits. There are three t 2. Strict memory control kills each container that has exceeded its limits. It is using the OOM killer capability of the cgroups Linux kernel feature. 3. Elastic memory control is also based on cgroups. It allows bursting and starts killing containers only, if the overall system memory usage reaches a limit. -If you use 2. or 3. feature 1. is disabled. - Strict Memory Feature --------------------- @@ -131,3 +129,13 @@ Configure the cgroups prerequisites mentioned above. `yarn.nodemanager.resource.memory.enforced` should be `false` `yarn.nodemanager.pmem-check-enabled` or `yarn.nodemanager.vmem-check-enabled` should be `true`. If swapping is turned off the former should be set, the latter should be set otherwise. + + +Configuring elastic memory control and strict container memory enforcement through cgroups +------------------------------------------ +ADVANCED ONLY +Elastic memory control and strict container memory enforcement can be enabled at the same time to allow Node Manager to over-allocate itself. +However, elastic memory control changes how strict container memory enforcement through cgroups is performed. Elastic memory control +disables the oom killer on the root yarn container cgroup. The oom killer setting overrides that of individual container cgroups, so individual +containers won't be killed by the oom killer when they go over their memory limit. The strict container memory enforcement in this case falls +back to the polling-based mechanism.