YARN-8461. Support strict memory control on individual container with elastic control memory mechanism. Contributed by Haibo Chen.

This commit is contained in:
Miklos Szegedi 2018-06-26 15:21:35 -07:00
parent b69ba0f330
commit 62d83ca536
4 changed files with 144 additions and 45 deletions

View File

@ -34,6 +34,9 @@
import java.io.File; import java.io.File;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Optional;
import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler.CGROUP_PARAM_MEMORY_OOM_CONTROL;
/** /**
* Handler class to handle the memory controller. YARN already ships a * Handler class to handle the memory controller. YARN already ships a
@ -172,4 +175,25 @@ public List<PrivilegedOperation> teardown() throws ResourceHandlerException {
return null; return null;
} }
@Override
public Optional<Boolean> isUnderOOM(ContainerId containerId) {
try {
String status = cGroupsHandler.getCGroupParam(
CGroupsHandler.CGroupController.MEMORY,
containerId.toString(),
CGROUP_PARAM_MEMORY_OOM_CONTROL);
if (LOG.isDebugEnabled()) {
LOG.debug("cgroups OOM status for " + containerId + ": " + status);
}
if (status.contains(CGroupsHandler.UNDER_OOM)) {
LOG.warn("Container " + containerId + " under OOM based on cgroups.");
return Optional.of(true);
} else {
return Optional.of(false);
}
} catch (ResourceHandlerException e) {
LOG.warn("Could not read cgroups" + containerId, e);
}
return Optional.empty();
}
} }

View File

@ -20,8 +20,18 @@
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.yarn.api.records.ContainerId;
import java.util.Optional;
@InterfaceAudience.Private @InterfaceAudience.Private
@InterfaceStability.Unstable @InterfaceStability.Unstable
public interface MemoryResourceHandler extends ResourceHandler { public interface MemoryResourceHandler extends ResourceHandler {
/**
* check whether a container is under OOM.
* @param containerId the id of the container
* @return empty if the status is unknown, true is the container is under oom,
* false otherwise
*/
Optional<Boolean> isUnderOOM(ContainerId containerId);
} }

View File

@ -22,6 +22,7 @@
import com.google.common.base.Preconditions; import com.google.common.base.Preconditions;
import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupElasticMemoryController; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupElasticMemoryController;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.MemoryResourceHandler;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule;
import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -51,6 +52,7 @@
import java.util.Arrays; import java.util.Arrays;
import java.util.Map; import java.util.Map;
import java.util.Map.Entry; import java.util.Map.Entry;
import java.util.Optional;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
/** /**
@ -697,55 +699,75 @@ private void checkLimit(ContainerId containerId, String pId,
ProcessTreeInfo ptInfo, ProcessTreeInfo ptInfo,
long currentVmemUsage, long currentVmemUsage,
long currentPmemUsage) { long currentPmemUsage) {
if (elasticMemoryEnforcement || strictMemoryEnforcement) { Optional<Boolean> isMemoryOverLimit = Optional.empty();
// We enforce the overall memory usage instead of individual containers
return;
}
boolean isMemoryOverLimit = false;
long vmemLimit = ptInfo.getVmemLimit();
long pmemLimit = ptInfo.getPmemLimit();
// as processes begin with an age 1, we want to see if there
// are processes more than 1 iteration old.
long curMemUsageOfAgedProcesses = pTree.getVirtualMemorySize(1);
long curRssMemUsageOfAgedProcesses = pTree.getRssMemorySize(1);
String msg = ""; String msg = "";
int containerExitStatus = ContainerExitStatus.INVALID; int containerExitStatus = ContainerExitStatus.INVALID;
if (isVmemCheckEnabled()
&& isProcessTreeOverLimit(containerId.toString(), if (strictMemoryEnforcement && elasticMemoryEnforcement) {
currentVmemUsage, curMemUsageOfAgedProcesses, vmemLimit)) { // Both elastic memory control and strict memory control are enabled
// The current usage (age=0) is always higher than the aged usage. We // through cgroups. A container will be frozen by the elastic memory
// do not show the aged size in the message, base the delta on the // control mechanism if it exceeds its request, so we check for this
// current usage // here and kill it. Otherwise, the container will not be killed if
long delta = currentVmemUsage - vmemLimit; // the node never exceeds its limit and the procfs-based
// Container (the root process) is still alive and overflowing // memory accounting is different from the cgroup-based accounting.
// memory.
// Dump the process-tree and then clean it up. MemoryResourceHandler handler =
msg = formatErrorMessage("virtual", ResourceHandlerModule.getMemoryResourceHandler();
formatUsageString(currentVmemUsage, vmemLimit, if (handler != null) {
currentPmemUsage, pmemLimit), isMemoryOverLimit = handler.isUnderOOM(containerId);
pId, containerId, pTree, delta); containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_PMEM;
isMemoryOverLimit = true; msg = containerId + " is under oom because it exceeded its" +
containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_VMEM; " physical memory limit";
} else if (isPmemCheckEnabled() }
&& isProcessTreeOverLimit(containerId.toString(), } else if (strictMemoryEnforcement || elasticMemoryEnforcement) {
currentPmemUsage, curRssMemUsageOfAgedProcesses, // if cgroup-based memory control is enabled
pmemLimit)) { isMemoryOverLimit = Optional.of(false);
// The current usage (age=0) is always higher than the aged usage. We
// do not show the aged size in the message, base the delta on the
// current usage
long delta = currentPmemUsage - pmemLimit;
// Container (the root process) is still alive and overflowing
// memory.
// Dump the process-tree and then clean it up.
msg = formatErrorMessage("physical",
formatUsageString(currentVmemUsage, vmemLimit,
currentPmemUsage, pmemLimit),
pId, containerId, pTree, delta);
isMemoryOverLimit = true;
containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_PMEM;
} }
if (isMemoryOverLimit) { if (!isMemoryOverLimit.isPresent()) {
long vmemLimit = ptInfo.getVmemLimit();
long pmemLimit = ptInfo.getPmemLimit();
// as processes begin with an age 1, we want to see if there
// are processes more than 1 iteration old.
long curMemUsageOfAgedProcesses = pTree.getVirtualMemorySize(1);
long curRssMemUsageOfAgedProcesses = pTree.getRssMemorySize(1);
if (isVmemCheckEnabled()
&& isProcessTreeOverLimit(containerId.toString(),
currentVmemUsage, curMemUsageOfAgedProcesses, vmemLimit)) {
// The current usage (age=0) is always higher than the aged usage. We
// do not show the aged size in the message, base the delta on the
// current usage
long delta = currentVmemUsage - vmemLimit;
// Container (the root process) is still alive and overflowing
// memory.
// Dump the process-tree and then clean it up.
msg = formatErrorMessage("virtual",
formatUsageString(currentVmemUsage, vmemLimit,
currentPmemUsage, pmemLimit),
pId, containerId, pTree, delta);
isMemoryOverLimit = Optional.of(true);
containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_VMEM;
} else if (isPmemCheckEnabled()
&& isProcessTreeOverLimit(containerId.toString(),
currentPmemUsage, curRssMemUsageOfAgedProcesses,
pmemLimit)) {
// The current usage (age=0) is always higher than the aged usage. We
// do not show the aged size in the message, base the delta on the
// current usage
long delta = currentPmemUsage - pmemLimit;
// Container (the root process) is still alive and overflowing
// memory.
// Dump the process-tree and then clean it up.
msg = formatErrorMessage("physical",
formatUsageString(currentVmemUsage, vmemLimit,
currentPmemUsage, pmemLimit),
pId, containerId, pTree, delta);
isMemoryOverLimit = Optional.of(true);
containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_PMEM;
}
}
if (isMemoryOverLimit.isPresent() && isMemoryOverLimit.get()) {
// Virtual or physical memory over limit. Fail the container and // Virtual or physical memory over limit. Fail the container and
// remove // remove
// the corresponding process tree // the corresponding process tree

View File

@ -31,7 +31,9 @@
import org.junit.Assert; import org.junit.Assert;
import java.util.List; import java.util.List;
import java.util.Optional;
import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler.CGROUP_PARAM_MEMORY_OOM_CONTROL;
import static org.mockito.Mockito.*; import static org.mockito.Mockito.*;
/** /**
@ -242,4 +244,45 @@ public void testOpportunistic() throws Exception {
.updateCGroupParam(CGroupsHandler.CGroupController.MEMORY, id, .updateCGroupParam(CGroupsHandler.CGroupController.MEMORY, id,
CGroupsHandler.CGROUP_PARAM_MEMORY_HARD_LIMIT_BYTES, "1024M"); CGroupsHandler.CGROUP_PARAM_MEMORY_HARD_LIMIT_BYTES, "1024M");
} }
@Test
public void testContainerUnderOom() throws Exception {
Configuration conf = new YarnConfiguration();
conf.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, false);
conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false);
cGroupsMemoryResourceHandler.bootstrap(conf);
ContainerId containerId = mock(ContainerId.class);
when(containerId.toString()).thenReturn("container_01_01");
when(mockCGroupsHandler.getCGroupParam(
CGroupsHandler.CGroupController.MEMORY,
containerId.toString(),
CGROUP_PARAM_MEMORY_OOM_CONTROL)).thenReturn(CGroupsHandler.UNDER_OOM);
Optional<Boolean> outOfOom =
cGroupsMemoryResourceHandler.isUnderOOM(containerId);
Assert.assertTrue("The container should be reported to run under oom",
outOfOom.isPresent() && outOfOom.get().equals(true));
when(mockCGroupsHandler.getCGroupParam(
CGroupsHandler.CGroupController.MEMORY,
containerId.toString(),
CGROUP_PARAM_MEMORY_OOM_CONTROL)).thenReturn("");
outOfOom = cGroupsMemoryResourceHandler.isUnderOOM(containerId);
Assert.assertTrue(
"The container should not be reported to run under oom",
outOfOom.isPresent() && outOfOom.get().equals(false));
when(mockCGroupsHandler.getCGroupParam(
CGroupsHandler.CGroupController.MEMORY,
containerId.toString(),
CGROUP_PARAM_MEMORY_OOM_CONTROL)).
thenThrow(new ResourceHandlerException());
outOfOom = cGroupsMemoryResourceHandler.isUnderOOM(containerId);
Assert.assertFalse(
"No report of the oom status should be available.",
outOfOom.isPresent());
}
} }