YARN-8461. Support strict memory control on individual container with elastic control memory mechanism. Contributed by Haibo Chen.
This commit is contained in:
parent
b69ba0f330
commit
62d83ca536
@ -34,6 +34,9 @@
|
|||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler.CGROUP_PARAM_MEMORY_OOM_CONTROL;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Handler class to handle the memory controller. YARN already ships a
|
* Handler class to handle the memory controller. YARN already ships a
|
||||||
@ -172,4 +175,25 @@ public List<PrivilegedOperation> teardown() throws ResourceHandlerException {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Optional<Boolean> isUnderOOM(ContainerId containerId) {
|
||||||
|
try {
|
||||||
|
String status = cGroupsHandler.getCGroupParam(
|
||||||
|
CGroupsHandler.CGroupController.MEMORY,
|
||||||
|
containerId.toString(),
|
||||||
|
CGROUP_PARAM_MEMORY_OOM_CONTROL);
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("cgroups OOM status for " + containerId + ": " + status);
|
||||||
|
}
|
||||||
|
if (status.contains(CGroupsHandler.UNDER_OOM)) {
|
||||||
|
LOG.warn("Container " + containerId + " under OOM based on cgroups.");
|
||||||
|
return Optional.of(true);
|
||||||
|
} else {
|
||||||
|
return Optional.of(false);
|
||||||
|
}
|
||||||
|
} catch (ResourceHandlerException e) {
|
||||||
|
LOG.warn("Could not read cgroups" + containerId, e);
|
||||||
|
}
|
||||||
|
return Optional.empty();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -20,8 +20,18 @@
|
|||||||
|
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.classification.InterfaceStability;
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||||
|
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
@InterfaceAudience.Private
|
@InterfaceAudience.Private
|
||||||
@InterfaceStability.Unstable
|
@InterfaceStability.Unstable
|
||||||
public interface MemoryResourceHandler extends ResourceHandler {
|
public interface MemoryResourceHandler extends ResourceHandler {
|
||||||
|
/**
|
||||||
|
* check whether a container is under OOM.
|
||||||
|
* @param containerId the id of the container
|
||||||
|
* @return empty if the status is unknown, true is the container is under oom,
|
||||||
|
* false otherwise
|
||||||
|
*/
|
||||||
|
Optional<Boolean> isUnderOOM(ContainerId containerId);
|
||||||
}
|
}
|
||||||
|
@ -22,6 +22,7 @@
|
|||||||
import com.google.common.base.Preconditions;
|
import com.google.common.base.Preconditions;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupElasticMemoryController;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupElasticMemoryController;
|
||||||
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.MemoryResourceHandler;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
|
import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
@ -51,6 +52,7 @@
|
|||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
|
import java.util.Optional;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -697,55 +699,75 @@ private void checkLimit(ContainerId containerId, String pId,
|
|||||||
ProcessTreeInfo ptInfo,
|
ProcessTreeInfo ptInfo,
|
||||||
long currentVmemUsage,
|
long currentVmemUsage,
|
||||||
long currentPmemUsage) {
|
long currentPmemUsage) {
|
||||||
if (elasticMemoryEnforcement || strictMemoryEnforcement) {
|
Optional<Boolean> isMemoryOverLimit = Optional.empty();
|
||||||
// We enforce the overall memory usage instead of individual containers
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
boolean isMemoryOverLimit = false;
|
|
||||||
long vmemLimit = ptInfo.getVmemLimit();
|
|
||||||
long pmemLimit = ptInfo.getPmemLimit();
|
|
||||||
// as processes begin with an age 1, we want to see if there
|
|
||||||
// are processes more than 1 iteration old.
|
|
||||||
long curMemUsageOfAgedProcesses = pTree.getVirtualMemorySize(1);
|
|
||||||
long curRssMemUsageOfAgedProcesses = pTree.getRssMemorySize(1);
|
|
||||||
String msg = "";
|
String msg = "";
|
||||||
int containerExitStatus = ContainerExitStatus.INVALID;
|
int containerExitStatus = ContainerExitStatus.INVALID;
|
||||||
if (isVmemCheckEnabled()
|
|
||||||
&& isProcessTreeOverLimit(containerId.toString(),
|
if (strictMemoryEnforcement && elasticMemoryEnforcement) {
|
||||||
currentVmemUsage, curMemUsageOfAgedProcesses, vmemLimit)) {
|
// Both elastic memory control and strict memory control are enabled
|
||||||
// The current usage (age=0) is always higher than the aged usage. We
|
// through cgroups. A container will be frozen by the elastic memory
|
||||||
// do not show the aged size in the message, base the delta on the
|
// control mechanism if it exceeds its request, so we check for this
|
||||||
// current usage
|
// here and kill it. Otherwise, the container will not be killed if
|
||||||
long delta = currentVmemUsage - vmemLimit;
|
// the node never exceeds its limit and the procfs-based
|
||||||
// Container (the root process) is still alive and overflowing
|
// memory accounting is different from the cgroup-based accounting.
|
||||||
// memory.
|
|
||||||
// Dump the process-tree and then clean it up.
|
MemoryResourceHandler handler =
|
||||||
msg = formatErrorMessage("virtual",
|
ResourceHandlerModule.getMemoryResourceHandler();
|
||||||
formatUsageString(currentVmemUsage, vmemLimit,
|
if (handler != null) {
|
||||||
currentPmemUsage, pmemLimit),
|
isMemoryOverLimit = handler.isUnderOOM(containerId);
|
||||||
pId, containerId, pTree, delta);
|
containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_PMEM;
|
||||||
isMemoryOverLimit = true;
|
msg = containerId + " is under oom because it exceeded its" +
|
||||||
containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_VMEM;
|
" physical memory limit";
|
||||||
} else if (isPmemCheckEnabled()
|
}
|
||||||
&& isProcessTreeOverLimit(containerId.toString(),
|
} else if (strictMemoryEnforcement || elasticMemoryEnforcement) {
|
||||||
currentPmemUsage, curRssMemUsageOfAgedProcesses,
|
// if cgroup-based memory control is enabled
|
||||||
pmemLimit)) {
|
isMemoryOverLimit = Optional.of(false);
|
||||||
// The current usage (age=0) is always higher than the aged usage. We
|
|
||||||
// do not show the aged size in the message, base the delta on the
|
|
||||||
// current usage
|
|
||||||
long delta = currentPmemUsage - pmemLimit;
|
|
||||||
// Container (the root process) is still alive and overflowing
|
|
||||||
// memory.
|
|
||||||
// Dump the process-tree and then clean it up.
|
|
||||||
msg = formatErrorMessage("physical",
|
|
||||||
formatUsageString(currentVmemUsage, vmemLimit,
|
|
||||||
currentPmemUsage, pmemLimit),
|
|
||||||
pId, containerId, pTree, delta);
|
|
||||||
isMemoryOverLimit = true;
|
|
||||||
containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_PMEM;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isMemoryOverLimit) {
|
if (!isMemoryOverLimit.isPresent()) {
|
||||||
|
long vmemLimit = ptInfo.getVmemLimit();
|
||||||
|
long pmemLimit = ptInfo.getPmemLimit();
|
||||||
|
// as processes begin with an age 1, we want to see if there
|
||||||
|
// are processes more than 1 iteration old.
|
||||||
|
long curMemUsageOfAgedProcesses = pTree.getVirtualMemorySize(1);
|
||||||
|
long curRssMemUsageOfAgedProcesses = pTree.getRssMemorySize(1);
|
||||||
|
if (isVmemCheckEnabled()
|
||||||
|
&& isProcessTreeOverLimit(containerId.toString(),
|
||||||
|
currentVmemUsage, curMemUsageOfAgedProcesses, vmemLimit)) {
|
||||||
|
// The current usage (age=0) is always higher than the aged usage. We
|
||||||
|
// do not show the aged size in the message, base the delta on the
|
||||||
|
// current usage
|
||||||
|
long delta = currentVmemUsage - vmemLimit;
|
||||||
|
// Container (the root process) is still alive and overflowing
|
||||||
|
// memory.
|
||||||
|
// Dump the process-tree and then clean it up.
|
||||||
|
msg = formatErrorMessage("virtual",
|
||||||
|
formatUsageString(currentVmemUsage, vmemLimit,
|
||||||
|
currentPmemUsage, pmemLimit),
|
||||||
|
pId, containerId, pTree, delta);
|
||||||
|
isMemoryOverLimit = Optional.of(true);
|
||||||
|
containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_VMEM;
|
||||||
|
} else if (isPmemCheckEnabled()
|
||||||
|
&& isProcessTreeOverLimit(containerId.toString(),
|
||||||
|
currentPmemUsage, curRssMemUsageOfAgedProcesses,
|
||||||
|
pmemLimit)) {
|
||||||
|
// The current usage (age=0) is always higher than the aged usage. We
|
||||||
|
// do not show the aged size in the message, base the delta on the
|
||||||
|
// current usage
|
||||||
|
long delta = currentPmemUsage - pmemLimit;
|
||||||
|
// Container (the root process) is still alive and overflowing
|
||||||
|
// memory.
|
||||||
|
// Dump the process-tree and then clean it up.
|
||||||
|
msg = formatErrorMessage("physical",
|
||||||
|
formatUsageString(currentVmemUsage, vmemLimit,
|
||||||
|
currentPmemUsage, pmemLimit),
|
||||||
|
pId, containerId, pTree, delta);
|
||||||
|
isMemoryOverLimit = Optional.of(true);
|
||||||
|
containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_PMEM;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isMemoryOverLimit.isPresent() && isMemoryOverLimit.get()) {
|
||||||
// Virtual or physical memory over limit. Fail the container and
|
// Virtual or physical memory over limit. Fail the container and
|
||||||
// remove
|
// remove
|
||||||
// the corresponding process tree
|
// the corresponding process tree
|
||||||
|
@ -31,7 +31,9 @@
|
|||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler.CGROUP_PARAM_MEMORY_OOM_CONTROL;
|
||||||
import static org.mockito.Mockito.*;
|
import static org.mockito.Mockito.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -242,4 +244,45 @@ public void testOpportunistic() throws Exception {
|
|||||||
.updateCGroupParam(CGroupsHandler.CGroupController.MEMORY, id,
|
.updateCGroupParam(CGroupsHandler.CGroupController.MEMORY, id,
|
||||||
CGroupsHandler.CGROUP_PARAM_MEMORY_HARD_LIMIT_BYTES, "1024M");
|
CGroupsHandler.CGROUP_PARAM_MEMORY_HARD_LIMIT_BYTES, "1024M");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testContainerUnderOom() throws Exception {
|
||||||
|
Configuration conf = new YarnConfiguration();
|
||||||
|
conf.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, false);
|
||||||
|
conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false);
|
||||||
|
|
||||||
|
cGroupsMemoryResourceHandler.bootstrap(conf);
|
||||||
|
|
||||||
|
ContainerId containerId = mock(ContainerId.class);
|
||||||
|
when(containerId.toString()).thenReturn("container_01_01");
|
||||||
|
|
||||||
|
when(mockCGroupsHandler.getCGroupParam(
|
||||||
|
CGroupsHandler.CGroupController.MEMORY,
|
||||||
|
containerId.toString(),
|
||||||
|
CGROUP_PARAM_MEMORY_OOM_CONTROL)).thenReturn(CGroupsHandler.UNDER_OOM);
|
||||||
|
Optional<Boolean> outOfOom =
|
||||||
|
cGroupsMemoryResourceHandler.isUnderOOM(containerId);
|
||||||
|
Assert.assertTrue("The container should be reported to run under oom",
|
||||||
|
outOfOom.isPresent() && outOfOom.get().equals(true));
|
||||||
|
|
||||||
|
when(mockCGroupsHandler.getCGroupParam(
|
||||||
|
CGroupsHandler.CGroupController.MEMORY,
|
||||||
|
containerId.toString(),
|
||||||
|
CGROUP_PARAM_MEMORY_OOM_CONTROL)).thenReturn("");
|
||||||
|
outOfOom = cGroupsMemoryResourceHandler.isUnderOOM(containerId);
|
||||||
|
Assert.assertTrue(
|
||||||
|
"The container should not be reported to run under oom",
|
||||||
|
outOfOom.isPresent() && outOfOom.get().equals(false));
|
||||||
|
|
||||||
|
when(mockCGroupsHandler.getCGroupParam(
|
||||||
|
CGroupsHandler.CGroupController.MEMORY,
|
||||||
|
containerId.toString(),
|
||||||
|
CGROUP_PARAM_MEMORY_OOM_CONTROL)).
|
||||||
|
thenThrow(new ResourceHandlerException());
|
||||||
|
outOfOom = cGroupsMemoryResourceHandler.isUnderOOM(containerId);
|
||||||
|
Assert.assertFalse(
|
||||||
|
"No report of the oom status should be available.",
|
||||||
|
outOfOom.isPresent());
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user