YARN-8461. Support strict memory control on individual container with elastic control memory mechanism. Contributed by Haibo Chen.
This commit is contained in:
parent
b69ba0f330
commit
62d83ca536
@ -34,6 +34,9 @@
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler.CGROUP_PARAM_MEMORY_OOM_CONTROL;
|
||||
|
||||
/**
|
||||
* Handler class to handle the memory controller. YARN already ships a
|
||||
@ -172,4 +175,25 @@ public List<PrivilegedOperation> teardown() throws ResourceHandlerException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Optional<Boolean> isUnderOOM(ContainerId containerId) {
|
||||
try {
|
||||
String status = cGroupsHandler.getCGroupParam(
|
||||
CGroupsHandler.CGroupController.MEMORY,
|
||||
containerId.toString(),
|
||||
CGROUP_PARAM_MEMORY_OOM_CONTROL);
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("cgroups OOM status for " + containerId + ": " + status);
|
||||
}
|
||||
if (status.contains(CGroupsHandler.UNDER_OOM)) {
|
||||
LOG.warn("Container " + containerId + " under OOM based on cgroups.");
|
||||
return Optional.of(true);
|
||||
} else {
|
||||
return Optional.of(false);
|
||||
}
|
||||
} catch (ResourceHandlerException e) {
|
||||
LOG.warn("Could not read cgroups" + containerId, e);
|
||||
}
|
||||
return Optional.empty();
|
||||
}
|
||||
}
|
||||
|
@ -20,8 +20,18 @@
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.classification.InterfaceStability;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||
|
||||
import java.util.Optional;
|
||||
|
||||
@InterfaceAudience.Private
|
||||
@InterfaceStability.Unstable
|
||||
public interface MemoryResourceHandler extends ResourceHandler {
|
||||
/**
|
||||
* check whether a container is under OOM.
|
||||
* @param containerId the id of the container
|
||||
* @return empty if the status is unknown, true is the container is under oom,
|
||||
* false otherwise
|
||||
*/
|
||||
Optional<Boolean> isUnderOOM(ContainerId containerId);
|
||||
}
|
||||
|
@ -22,6 +22,7 @@
|
||||
import com.google.common.base.Preconditions;
|
||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupElasticMemoryController;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.MemoryResourceHandler;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
|
||||
import org.slf4j.Logger;
|
||||
@ -51,6 +52,7 @@
|
||||
import java.util.Arrays;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Optional;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
/**
|
||||
@ -697,55 +699,75 @@ private void checkLimit(ContainerId containerId, String pId,
|
||||
ProcessTreeInfo ptInfo,
|
||||
long currentVmemUsage,
|
||||
long currentPmemUsage) {
|
||||
if (elasticMemoryEnforcement || strictMemoryEnforcement) {
|
||||
// We enforce the overall memory usage instead of individual containers
|
||||
return;
|
||||
}
|
||||
boolean isMemoryOverLimit = false;
|
||||
long vmemLimit = ptInfo.getVmemLimit();
|
||||
long pmemLimit = ptInfo.getPmemLimit();
|
||||
// as processes begin with an age 1, we want to see if there
|
||||
// are processes more than 1 iteration old.
|
||||
long curMemUsageOfAgedProcesses = pTree.getVirtualMemorySize(1);
|
||||
long curRssMemUsageOfAgedProcesses = pTree.getRssMemorySize(1);
|
||||
Optional<Boolean> isMemoryOverLimit = Optional.empty();
|
||||
String msg = "";
|
||||
int containerExitStatus = ContainerExitStatus.INVALID;
|
||||
if (isVmemCheckEnabled()
|
||||
&& isProcessTreeOverLimit(containerId.toString(),
|
||||
currentVmemUsage, curMemUsageOfAgedProcesses, vmemLimit)) {
|
||||
// The current usage (age=0) is always higher than the aged usage. We
|
||||
// do not show the aged size in the message, base the delta on the
|
||||
// current usage
|
||||
long delta = currentVmemUsage - vmemLimit;
|
||||
// Container (the root process) is still alive and overflowing
|
||||
// memory.
|
||||
// Dump the process-tree and then clean it up.
|
||||
msg = formatErrorMessage("virtual",
|
||||
formatUsageString(currentVmemUsage, vmemLimit,
|
||||
currentPmemUsage, pmemLimit),
|
||||
pId, containerId, pTree, delta);
|
||||
isMemoryOverLimit = true;
|
||||
containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_VMEM;
|
||||
} else if (isPmemCheckEnabled()
|
||||
&& isProcessTreeOverLimit(containerId.toString(),
|
||||
currentPmemUsage, curRssMemUsageOfAgedProcesses,
|
||||
pmemLimit)) {
|
||||
// The current usage (age=0) is always higher than the aged usage. We
|
||||
// do not show the aged size in the message, base the delta on the
|
||||
// current usage
|
||||
long delta = currentPmemUsage - pmemLimit;
|
||||
// Container (the root process) is still alive and overflowing
|
||||
// memory.
|
||||
// Dump the process-tree and then clean it up.
|
||||
msg = formatErrorMessage("physical",
|
||||
formatUsageString(currentVmemUsage, vmemLimit,
|
||||
currentPmemUsage, pmemLimit),
|
||||
pId, containerId, pTree, delta);
|
||||
isMemoryOverLimit = true;
|
||||
containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_PMEM;
|
||||
|
||||
if (strictMemoryEnforcement && elasticMemoryEnforcement) {
|
||||
// Both elastic memory control and strict memory control are enabled
|
||||
// through cgroups. A container will be frozen by the elastic memory
|
||||
// control mechanism if it exceeds its request, so we check for this
|
||||
// here and kill it. Otherwise, the container will not be killed if
|
||||
// the node never exceeds its limit and the procfs-based
|
||||
// memory accounting is different from the cgroup-based accounting.
|
||||
|
||||
MemoryResourceHandler handler =
|
||||
ResourceHandlerModule.getMemoryResourceHandler();
|
||||
if (handler != null) {
|
||||
isMemoryOverLimit = handler.isUnderOOM(containerId);
|
||||
containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_PMEM;
|
||||
msg = containerId + " is under oom because it exceeded its" +
|
||||
" physical memory limit";
|
||||
}
|
||||
} else if (strictMemoryEnforcement || elasticMemoryEnforcement) {
|
||||
// if cgroup-based memory control is enabled
|
||||
isMemoryOverLimit = Optional.of(false);
|
||||
}
|
||||
|
||||
if (isMemoryOverLimit) {
|
||||
if (!isMemoryOverLimit.isPresent()) {
|
||||
long vmemLimit = ptInfo.getVmemLimit();
|
||||
long pmemLimit = ptInfo.getPmemLimit();
|
||||
// as processes begin with an age 1, we want to see if there
|
||||
// are processes more than 1 iteration old.
|
||||
long curMemUsageOfAgedProcesses = pTree.getVirtualMemorySize(1);
|
||||
long curRssMemUsageOfAgedProcesses = pTree.getRssMemorySize(1);
|
||||
if (isVmemCheckEnabled()
|
||||
&& isProcessTreeOverLimit(containerId.toString(),
|
||||
currentVmemUsage, curMemUsageOfAgedProcesses, vmemLimit)) {
|
||||
// The current usage (age=0) is always higher than the aged usage. We
|
||||
// do not show the aged size in the message, base the delta on the
|
||||
// current usage
|
||||
long delta = currentVmemUsage - vmemLimit;
|
||||
// Container (the root process) is still alive and overflowing
|
||||
// memory.
|
||||
// Dump the process-tree and then clean it up.
|
||||
msg = formatErrorMessage("virtual",
|
||||
formatUsageString(currentVmemUsage, vmemLimit,
|
||||
currentPmemUsage, pmemLimit),
|
||||
pId, containerId, pTree, delta);
|
||||
isMemoryOverLimit = Optional.of(true);
|
||||
containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_VMEM;
|
||||
} else if (isPmemCheckEnabled()
|
||||
&& isProcessTreeOverLimit(containerId.toString(),
|
||||
currentPmemUsage, curRssMemUsageOfAgedProcesses,
|
||||
pmemLimit)) {
|
||||
// The current usage (age=0) is always higher than the aged usage. We
|
||||
// do not show the aged size in the message, base the delta on the
|
||||
// current usage
|
||||
long delta = currentPmemUsage - pmemLimit;
|
||||
// Container (the root process) is still alive and overflowing
|
||||
// memory.
|
||||
// Dump the process-tree and then clean it up.
|
||||
msg = formatErrorMessage("physical",
|
||||
formatUsageString(currentVmemUsage, vmemLimit,
|
||||
currentPmemUsage, pmemLimit),
|
||||
pId, containerId, pTree, delta);
|
||||
isMemoryOverLimit = Optional.of(true);
|
||||
containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_PMEM;
|
||||
}
|
||||
}
|
||||
|
||||
if (isMemoryOverLimit.isPresent() && isMemoryOverLimit.get()) {
|
||||
// Virtual or physical memory over limit. Fail the container and
|
||||
// remove
|
||||
// the corresponding process tree
|
||||
|
@ -31,7 +31,9 @@
|
||||
import org.junit.Assert;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler.CGROUP_PARAM_MEMORY_OOM_CONTROL;
|
||||
import static org.mockito.Mockito.*;
|
||||
|
||||
/**
|
||||
@ -242,4 +244,45 @@ public void testOpportunistic() throws Exception {
|
||||
.updateCGroupParam(CGroupsHandler.CGroupController.MEMORY, id,
|
||||
CGroupsHandler.CGROUP_PARAM_MEMORY_HARD_LIMIT_BYTES, "1024M");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testContainerUnderOom() throws Exception {
|
||||
Configuration conf = new YarnConfiguration();
|
||||
conf.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, false);
|
||||
conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false);
|
||||
|
||||
cGroupsMemoryResourceHandler.bootstrap(conf);
|
||||
|
||||
ContainerId containerId = mock(ContainerId.class);
|
||||
when(containerId.toString()).thenReturn("container_01_01");
|
||||
|
||||
when(mockCGroupsHandler.getCGroupParam(
|
||||
CGroupsHandler.CGroupController.MEMORY,
|
||||
containerId.toString(),
|
||||
CGROUP_PARAM_MEMORY_OOM_CONTROL)).thenReturn(CGroupsHandler.UNDER_OOM);
|
||||
Optional<Boolean> outOfOom =
|
||||
cGroupsMemoryResourceHandler.isUnderOOM(containerId);
|
||||
Assert.assertTrue("The container should be reported to run under oom",
|
||||
outOfOom.isPresent() && outOfOom.get().equals(true));
|
||||
|
||||
when(mockCGroupsHandler.getCGroupParam(
|
||||
CGroupsHandler.CGroupController.MEMORY,
|
||||
containerId.toString(),
|
||||
CGROUP_PARAM_MEMORY_OOM_CONTROL)).thenReturn("");
|
||||
outOfOom = cGroupsMemoryResourceHandler.isUnderOOM(containerId);
|
||||
Assert.assertTrue(
|
||||
"The container should not be reported to run under oom",
|
||||
outOfOom.isPresent() && outOfOom.get().equals(false));
|
||||
|
||||
when(mockCGroupsHandler.getCGroupParam(
|
||||
CGroupsHandler.CGroupController.MEMORY,
|
||||
containerId.toString(),
|
||||
CGROUP_PARAM_MEMORY_OOM_CONTROL)).
|
||||
thenThrow(new ResourceHandlerException());
|
||||
outOfOom = cGroupsMemoryResourceHandler.isUnderOOM(containerId);
|
||||
Assert.assertFalse(
|
||||
"No report of the oom status should be available.",
|
||||
outOfOom.isPresent());
|
||||
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user