YARN-8930. CGroup-based strict container memory enforcement does not work with CGroupElasticMemoryController (haibochen via rkanter)

This commit is contained in:
Robert Kanter 2018-10-25 10:43:36 -07:00
parent fb2b72e6fc
commit f76e3c3db7
5 changed files with 60 additions and 147 deletions

View File

@ -34,9 +34,6 @@
import java.io.File; import java.io.File;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Optional;
import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler.CGROUP_PARAM_MEMORY_OOM_CONTROL;
/** /**
* Handler class to handle the memory controller. YARN already ships a * Handler class to handle the memory controller. YARN already ships a
@ -174,26 +171,4 @@ public List<PrivilegedOperation> postComplete(ContainerId containerId)
public List<PrivilegedOperation> teardown() throws ResourceHandlerException { public List<PrivilegedOperation> teardown() throws ResourceHandlerException {
return null; return null;
} }
@Override
public Optional<Boolean> isUnderOOM(ContainerId containerId) {
try {
String status = cGroupsHandler.getCGroupParam(
CGroupsHandler.CGroupController.MEMORY,
containerId.toString(),
CGROUP_PARAM_MEMORY_OOM_CONTROL);
if (LOG.isDebugEnabled()) {
LOG.debug("cgroups OOM status for " + containerId + ": " + status);
}
if (status.contains(CGroupsHandler.UNDER_OOM)) {
LOG.warn("Container " + containerId + " under OOM based on cgroups.");
return Optional.of(true);
} else {
return Optional.of(false);
}
} catch (ResourceHandlerException e) {
LOG.warn("Could not read cgroups" + containerId, e);
}
return Optional.empty();
}
} }

View File

@ -20,18 +20,8 @@
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.yarn.api.records.ContainerId;
import java.util.Optional;
@InterfaceAudience.Private @InterfaceAudience.Private
@InterfaceStability.Unstable @InterfaceStability.Unstable
public interface MemoryResourceHandler extends ResourceHandler { public interface MemoryResourceHandler extends ResourceHandler {
/**
* check whether a container is under OOM.
* @param containerId the id of the container
* @return empty if the status is unknown, true is the container is under oom,
* false otherwise
*/
Optional<Boolean> isUnderOOM(ContainerId containerId);
} }

View File

@ -22,7 +22,6 @@
import com.google.common.base.Preconditions; import com.google.common.base.Preconditions;
import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupElasticMemoryController; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupElasticMemoryController;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.MemoryResourceHandler;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule;
import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -52,7 +51,6 @@
import java.util.Arrays; import java.util.Arrays;
import java.util.Map; import java.util.Map;
import java.util.Map.Entry; import java.util.Map.Entry;
import java.util.Optional;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
/** /**
@ -699,75 +697,61 @@ private void checkLimit(ContainerId containerId, String pId,
ProcessTreeInfo ptInfo, ProcessTreeInfo ptInfo,
long currentVmemUsage, long currentVmemUsage,
long currentPmemUsage) { long currentPmemUsage) {
Optional<Boolean> isMemoryOverLimit = Optional.empty(); if (strictMemoryEnforcement && !elasticMemoryEnforcement) {
// When cgroup-based strict memory enforcement is used alone without
// elastic memory control, the oom-kill would take care of it.
// However, when elastic memory control is also enabled, the oom killer
// would be disabled at the root yarn container cgroup level (all child
// cgroups would inherit that setting). Hence, we fall back to the
// polling-based mechanism.
return;
}
boolean isMemoryOverLimit = false;
String msg = ""; String msg = "";
int containerExitStatus = ContainerExitStatus.INVALID; int containerExitStatus = ContainerExitStatus.INVALID;
if (strictMemoryEnforcement && elasticMemoryEnforcement) { long vmemLimit = ptInfo.getVmemLimit();
// Both elastic memory control and strict memory control are enabled long pmemLimit = ptInfo.getPmemLimit();
// through cgroups. A container will be frozen by the elastic memory // as processes begin with an age 1, we want to see if there
// control mechanism if it exceeds its request, so we check for this // are processes more than 1 iteration old.
// here and kill it. Otherwise, the container will not be killed if long curMemUsageOfAgedProcesses = pTree.getVirtualMemorySize(1);
// the node never exceeds its limit and the procfs-based long curRssMemUsageOfAgedProcesses = pTree.getRssMemorySize(1);
// memory accounting is different from the cgroup-based accounting. if (isVmemCheckEnabled()
&& isProcessTreeOverLimit(containerId.toString(),
MemoryResourceHandler handler = currentVmemUsage, curMemUsageOfAgedProcesses, vmemLimit)) {
ResourceHandlerModule.getMemoryResourceHandler(); // The current usage (age=0) is always higher than the aged usage. We
if (handler != null) { // do not show the aged size in the message, base the delta on the
isMemoryOverLimit = handler.isUnderOOM(containerId); // current usage
containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_PMEM; long delta = currentVmemUsage - vmemLimit;
msg = containerId + " is under oom because it exceeded its" + // Container (the root process) is still alive and overflowing
" physical memory limit"; // memory.
} // Dump the process-tree and then clean it up.
} else if (strictMemoryEnforcement || elasticMemoryEnforcement) { msg = formatErrorMessage("virtual",
// if cgroup-based memory control is enabled formatUsageString(currentVmemUsage, vmemLimit,
isMemoryOverLimit = Optional.of(false); currentPmemUsage, pmemLimit),
pId, containerId, pTree, delta);
isMemoryOverLimit = true;
containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_VMEM;
} else if (isPmemCheckEnabled()
&& isProcessTreeOverLimit(containerId.toString(),
currentPmemUsage, curRssMemUsageOfAgedProcesses,
pmemLimit)) {
// The current usage (age=0) is always higher than the aged usage. We
// do not show the aged size in the message, base the delta on the
// current usage
long delta = currentPmemUsage - pmemLimit;
// Container (the root process) is still alive and overflowing
// memory.
// Dump the process-tree and then clean it up.
msg = formatErrorMessage("physical",
formatUsageString(currentVmemUsage, vmemLimit,
currentPmemUsage, pmemLimit),
pId, containerId, pTree, delta);
isMemoryOverLimit = true;
containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_PMEM;
} }
if (!isMemoryOverLimit.isPresent()) { if (isMemoryOverLimit) {
long vmemLimit = ptInfo.getVmemLimit();
long pmemLimit = ptInfo.getPmemLimit();
// as processes begin with an age 1, we want to see if there
// are processes more than 1 iteration old.
long curMemUsageOfAgedProcesses = pTree.getVirtualMemorySize(1);
long curRssMemUsageOfAgedProcesses = pTree.getRssMemorySize(1);
if (isVmemCheckEnabled()
&& isProcessTreeOverLimit(containerId.toString(),
currentVmemUsage, curMemUsageOfAgedProcesses, vmemLimit)) {
// The current usage (age=0) is always higher than the aged usage. We
// do not show the aged size in the message, base the delta on the
// current usage
long delta = currentVmemUsage - vmemLimit;
// Container (the root process) is still alive and overflowing
// memory.
// Dump the process-tree and then clean it up.
msg = formatErrorMessage("virtual",
formatUsageString(currentVmemUsage, vmemLimit,
currentPmemUsage, pmemLimit),
pId, containerId, pTree, delta);
isMemoryOverLimit = Optional.of(true);
containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_VMEM;
} else if (isPmemCheckEnabled()
&& isProcessTreeOverLimit(containerId.toString(),
currentPmemUsage, curRssMemUsageOfAgedProcesses,
pmemLimit)) {
// The current usage (age=0) is always higher than the aged usage. We
// do not show the aged size in the message, base the delta on the
// current usage
long delta = currentPmemUsage - pmemLimit;
// Container (the root process) is still alive and overflowing
// memory.
// Dump the process-tree and then clean it up.
msg = formatErrorMessage("physical",
formatUsageString(currentVmemUsage, vmemLimit,
currentPmemUsage, pmemLimit),
pId, containerId, pTree, delta);
isMemoryOverLimit = Optional.of(true);
containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_PMEM;
}
}
if (isMemoryOverLimit.isPresent() && isMemoryOverLimit.get()) {
// Virtual or physical memory over limit. Fail the container and // Virtual or physical memory over limit. Fail the container and
// remove // remove
// the corresponding process tree // the corresponding process tree

View File

@ -31,9 +31,6 @@
import org.junit.Assert; import org.junit.Assert;
import java.util.List; import java.util.List;
import java.util.Optional;
import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler.CGROUP_PARAM_MEMORY_OOM_CONTROL;
import static org.mockito.Mockito.*; import static org.mockito.Mockito.*;
/** /**
@ -244,45 +241,4 @@ public void testOpportunistic() throws Exception {
.updateCGroupParam(CGroupsHandler.CGroupController.MEMORY, id, .updateCGroupParam(CGroupsHandler.CGroupController.MEMORY, id,
CGroupsHandler.CGROUP_PARAM_MEMORY_HARD_LIMIT_BYTES, "1024M"); CGroupsHandler.CGROUP_PARAM_MEMORY_HARD_LIMIT_BYTES, "1024M");
} }
@Test
public void testContainerUnderOom() throws Exception {
Configuration conf = new YarnConfiguration();
conf.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, false);
conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false);
cGroupsMemoryResourceHandler.bootstrap(conf);
ContainerId containerId = mock(ContainerId.class);
when(containerId.toString()).thenReturn("container_01_01");
when(mockCGroupsHandler.getCGroupParam(
CGroupsHandler.CGroupController.MEMORY,
containerId.toString(),
CGROUP_PARAM_MEMORY_OOM_CONTROL)).thenReturn(CGroupsHandler.UNDER_OOM);
Optional<Boolean> outOfOom =
cGroupsMemoryResourceHandler.isUnderOOM(containerId);
Assert.assertTrue("The container should be reported to run under oom",
outOfOom.isPresent() && outOfOom.get().equals(true));
when(mockCGroupsHandler.getCGroupParam(
CGroupsHandler.CGroupController.MEMORY,
containerId.toString(),
CGROUP_PARAM_MEMORY_OOM_CONTROL)).thenReturn("");
outOfOom = cGroupsMemoryResourceHandler.isUnderOOM(containerId);
Assert.assertTrue(
"The container should not be reported to run under oom",
outOfOom.isPresent() && outOfOom.get().equals(false));
when(mockCGroupsHandler.getCGroupParam(
CGroupsHandler.CGroupController.MEMORY,
containerId.toString(),
CGROUP_PARAM_MEMORY_OOM_CONTROL)).
thenThrow(new ResourceHandlerException());
outOfOom = cGroupsMemoryResourceHandler.isUnderOOM(containerId);
Assert.assertFalse(
"No report of the oom status should be available.",
outOfOom.isPresent());
}
} }

View File

@ -20,8 +20,6 @@ YARN has multiple features to enforce container memory limits. There are three t
2. Strict memory control kills each container that has exceeded its limits. It is using the OOM killer capability of the cgroups Linux kernel feature. 2. Strict memory control kills each container that has exceeded its limits. It is using the OOM killer capability of the cgroups Linux kernel feature.
3. Elastic memory control is also based on cgroups. It allows bursting and starts killing containers only, if the overall system memory usage reaches a limit. 3. Elastic memory control is also based on cgroups. It allows bursting and starts killing containers only, if the overall system memory usage reaches a limit.
If you use 2. or 3. feature 1. is disabled.
Strict Memory Feature Strict Memory Feature
--------------------- ---------------------
@ -131,3 +129,13 @@ Configure the cgroups prerequisites mentioned above.
`yarn.nodemanager.resource.memory.enforced` should be `false` `yarn.nodemanager.resource.memory.enforced` should be `false`
`yarn.nodemanager.pmem-check-enabled` or `yarn.nodemanager.vmem-check-enabled` should be `true`. If swapping is turned off the former should be set, the latter should be set otherwise. `yarn.nodemanager.pmem-check-enabled` or `yarn.nodemanager.vmem-check-enabled` should be `true`. If swapping is turned off the former should be set, the latter should be set otherwise.
Configuring elastic memory control and strict container memory enforcement through cgroups
------------------------------------------
ADVANCED ONLY
Elastic memory control and strict container memory enforcement can be enabled at the same time to allow Node Manager to over-allocate itself.
However, elastic memory control changes how strict container memory enforcement through cgroups is performed. Elastic memory control
disables the oom killer on the root yarn container cgroup. The oom killer setting overrides that of individual container cgroups, so individual
containers won't be killed by the oom killer when they go over their memory limit. The strict container memory enforcement in this case falls
back to the polling-based mechanism.