YARN-8511. When AM releases a container, RM removes allocation tags before it is released by NM. (Weiwei Yang via wangda)

Change-Id: I6f9f409f2ef685b405cbff547dea9623bf3322d9
This commit is contained in:
Wangda Tan 2018-07-16 10:54:41 -07:00
parent 88b2794244
commit 752dcce5f4
9 changed files with 162 additions and 6 deletions

View File

@ -38,6 +38,7 @@
import org.apache.hadoop.yarn.api.records.ResourceUtilization;
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
import org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode
@ -218,6 +219,11 @@ public Map<String, Long> getAllocationTagsWithCount() {
return null;
}
@Override
public RMContext getRMContext() {
return null;
}
@Override
public Resource getPhysicalResource() {
return null;

View File

@ -30,6 +30,7 @@
import org.apache.hadoop.yarn.api.records.ResourceUtilization;
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
import org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode
@ -206,6 +207,11 @@ public Map<String, Long> getAllocationTagsWithCount() {
return node.getAllocationTagsWithCount();
}
@Override
public RMContext getRMContext() {
return node.getRMContext();
}
@Override
public Resource getPhysicalResource() {
return null;

View File

@ -701,11 +701,6 @@ private static class FinishedTransition extends BaseTransition {
@Override
public void transition(RMContainerImpl container, RMContainerEvent event) {
// Notify AllocationTagsManager
container.rmContext.getAllocationTagsManager().removeContainer(
container.getNodeId(), container.getContainerId(),
container.getAllocationTags());
RMContainerFinishedEvent finishedEvent = (RMContainerFinishedEvent) event;
container.finishTime = System.currentTimeMillis();

View File

@ -33,6 +33,7 @@
import org.apache.hadoop.yarn.api.records.ResourceUtilization;
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
import org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
/**
* Node managers information on available resources
@ -189,4 +190,9 @@ public interface RMNode {
* @return a map of each allocation tag and its count.
*/
Map<String, Long> getAllocationTagsWithCount();
/**
* @return the RM context associated with this RM node.
*/
RMContext getRMContext();
}

View File

@ -1541,4 +1541,9 @@ public Map<String, Long> getAllocationTagsWithCount() {
return context.getAllocationTagsManager()
.getAllocationTagsWithCount(getNodeID());
}
@Override
public RMContext getRMContext() {
return this.context;
}
}

View File

@ -39,6 +39,7 @@
import org.apache.hadoop.yarn.api.records.ResourceUtilization;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState;
@ -74,6 +75,7 @@ public abstract class SchedulerNode {
private final RMNode rmNode;
private final String nodeName;
private final RMContext rmContext;
private volatile Set<String> labels = null;
@ -83,6 +85,7 @@ public abstract class SchedulerNode {
public SchedulerNode(RMNode node, boolean usePortForNodeName,
Set<String> labels) {
this.rmNode = node;
this.rmContext = node.getRMContext();
this.unallocatedResource = Resources.clone(node.getTotalCapability());
this.totalResource = Resources.clone(node.getTotalCapability());
if (usePortForNodeName) {
@ -242,6 +245,18 @@ public synchronized void releaseContainer(ContainerId containerId,
launchedContainers.remove(containerId);
Container container = info.container.getContainer();
// We remove allocation tags when a container is actually
// released on NM. This is to avoid running into situation
// when AM releases a container and NM has some delay to
// actually release it, then the tag can still be visible
// at RM so that RM can respect it during scheduling new containers.
if (rmContext != null && rmContext.getAllocationTagsManager() != null) {
rmContext.getAllocationTagsManager()
.removeContainer(container.getNodeId(),
container.getId(), container.getAllocationTags());
}
updateResourceForReleasedContainer(container);
if (LOG.isDebugEnabled()) {

View File

@ -285,6 +285,11 @@ public Map<String, Long> getAllocationTagsWithCount() {
return null;
}
@Override
public RMContext getRMContext() {
return null;
}
@Override
public Resource getPhysicalResource() {
return this.physicalResource;

View File

@ -60,10 +60,14 @@
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerFinishedEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEventType;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.TestUtils;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.AllocationTags;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.AllocationTagsManager;
import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey;
@ -401,6 +405,7 @@ public void testContainerTransitionNotifyAllocationTagsManager()
Container container = BuilderUtils.newContainer(containerId, nodeId,
"host:3465", resource, priority, null);
container.setAllocationTags(ImmutableSet.of("mapper"));
ConcurrentMap<ApplicationId, RMApp> rmApps =
spy(new ConcurrentHashMap<ApplicationId, RMApp>());
RMApp rmApp = mock(RMApp.class);
@ -423,11 +428,14 @@ public void testContainerTransitionNotifyAllocationTagsManager()
true);
when(rmContext.getYarnConfiguration()).thenReturn(conf);
RMNode rmNode = new RMNodeImpl(nodeId, rmContext,
"localhost", 0, 0, null, Resource.newInstance(10240, 10), null);
SchedulerNode schedulerNode = new FiCaSchedulerNode(rmNode, false);
/* First container: ALLOCATED -> KILLED */
RMContainerImpl rmContainer = new RMContainerImpl(container,
SchedulerRequestKey.extractFrom(container), appAttemptId,
nodeId, "user", rmContext);
rmContainer.setAllocationTags(ImmutableSet.of("mapper"));
Assert.assertEquals(0,
tagsManager.getNodeCardinalityByOp(nodeId,
@ -437,6 +445,7 @@ public void testContainerTransitionNotifyAllocationTagsManager()
rmContainer.handle(new RMContainerEvent(containerId,
RMContainerEventType.START));
schedulerNode.allocateContainer(rmContainer);
Assert.assertEquals(1,
tagsManager.getNodeCardinalityByOp(nodeId,
@ -446,6 +455,7 @@ public void testContainerTransitionNotifyAllocationTagsManager()
rmContainer.handle(new RMContainerFinishedEvent(containerId, ContainerStatus
.newInstance(containerId, ContainerState.COMPLETE, "", 0),
RMContainerEventType.KILL));
schedulerNode.releaseContainer(container.getId(), true);
Assert.assertEquals(0,
tagsManager.getNodeCardinalityByOp(nodeId,
@ -465,6 +475,7 @@ public void testContainerTransitionNotifyAllocationTagsManager()
rmContainer.setAllocationTags(ImmutableSet.of("mapper"));
rmContainer.handle(new RMContainerEvent(containerId,
RMContainerEventType.START));
schedulerNode.allocateContainer(rmContainer);
Assert.assertEquals(1,
tagsManager.getNodeCardinalityByOp(nodeId,
@ -477,6 +488,7 @@ public void testContainerTransitionNotifyAllocationTagsManager()
rmContainer.handle(new RMContainerFinishedEvent(containerId, ContainerStatus
.newInstance(containerId, ContainerState.COMPLETE, "", 0),
RMContainerEventType.FINISHED));
schedulerNode.releaseContainer(container.getId(), true);
Assert.assertEquals(0,
tagsManager.getNodeCardinalityByOp(nodeId,
@ -496,6 +508,7 @@ public void testContainerTransitionNotifyAllocationTagsManager()
rmContainer.handle(new RMContainerEvent(containerId,
RMContainerEventType.START));
schedulerNode.allocateContainer(rmContainer);
Assert.assertEquals(1,
tagsManager.getNodeCardinalityByOp(nodeId,
@ -511,6 +524,7 @@ public void testContainerTransitionNotifyAllocationTagsManager()
rmContainer.handle(new RMContainerFinishedEvent(containerId, ContainerStatus
.newInstance(containerId, ContainerState.COMPLETE, "", 0),
RMContainerEventType.FINISHED));
schedulerNode.releaseContainer(container.getId(), true);
Assert.assertEquals(0,
tagsManager.getNodeCardinalityByOp(nodeId,

View File

@ -27,9 +27,16 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.service.Service;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
import org.apache.hadoop.yarn.api.records.*;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.Dispatcher;
@ -416,6 +423,103 @@ public void testReleasedContainerIfAppAttemptisNull() throws Exception {
}
}
@Test(timeout = 30000l)
public void testContainerReleaseWithAllocationTags() throws Exception {
// Currently only can be tested against capacity scheduler.
if (getSchedulerType().equals(SchedulerType.CAPACITY)) {
final String testTag1 = "some-tag";
final String testTag2 = "some-other-tag";
YarnConfiguration conf = getConf();
conf.set(YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_HANDLER, "scheduler");
MockRM rm1 = new MockRM(conf);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234",
10240, rm1.getResourceTrackerService());
nm1.registerNode();
RMApp app1 =
rm1.submitApp(200, "name", "user", new HashMap<>(), false, "default",
-1, null, "Test", false, true);
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
// allocate 1 container with tag1
SchedulingRequest sr = SchedulingRequest
.newInstance(1l, Priority.newInstance(1),
ExecutionTypeRequest.newInstance(ExecutionType.GUARANTEED),
Sets.newHashSet(testTag1),
ResourceSizing.newInstance(1, Resource.newInstance(1024, 1)),
null);
// allocate 3 containers with tag2
SchedulingRequest sr1 = SchedulingRequest
.newInstance(2l, Priority.newInstance(1),
ExecutionTypeRequest.newInstance(ExecutionType.GUARANTEED),
Sets.newHashSet(testTag2),
ResourceSizing.newInstance(3, Resource.newInstance(1024, 1)),
null);
AllocateRequest ar = AllocateRequest.newBuilder()
.schedulingRequests(Lists.newArrayList(sr, sr1)).build();
am1.allocate(ar);
nm1.nodeHeartbeat(true);
List<Container> allocated = new ArrayList<>();
while (allocated.size() < 4) {
AllocateResponse rsp = am1
.allocate(new ArrayList<>(), new ArrayList<>());
allocated.addAll(rsp.getAllocatedContainers());
nm1.nodeHeartbeat(true);
Thread.sleep(1000);
}
Assert.assertEquals(4, allocated.size());
Set<Container> containers = allocated.stream()
.filter(container -> container.getAllocationRequestId() == 1l)
.collect(Collectors.toSet());
Assert.assertNotNull(containers);
Assert.assertEquals(1, containers.size());
ContainerId cid = containers.iterator().next().getId();
// mock container start
rm1.getRMContext().getScheduler()
.getSchedulerNode(nm1.getNodeId()).containerStarted(cid);
// verifies the allocation is made with correct number of tags
Map<String, Long> nodeTags = rm1.getRMContext()
.getAllocationTagsManager()
.getAllocationTagsWithCount(nm1.getNodeId());
Assert.assertNotNull(nodeTags.get(testTag1));
Assert.assertEquals(1, nodeTags.get(testTag1).intValue());
// release a container
am1.allocate(new ArrayList<>(), Lists.newArrayList(cid));
// before NM confirms, the tag should still exist
nodeTags = rm1.getRMContext().getAllocationTagsManager()
.getAllocationTagsWithCount(nm1.getNodeId());
Assert.assertNotNull(nodeTags);
Assert.assertNotNull(nodeTags.get(testTag1));
Assert.assertEquals(1, nodeTags.get(testTag1).intValue());
// NM reports back that container is released
// RM should cleanup the tag
ContainerStatus cs = ContainerStatus.newInstance(cid,
ContainerState.COMPLETE, "", 0);
nm1.nodeHeartbeat(Lists.newArrayList(cs), true);
// Wait on condition
// 1) tag1 doesn't exist anymore
// 2) num of tag2 is still 3
GenericTestUtils.waitFor(() -> {
Map<String, Long> tags = rm1.getRMContext()
.getAllocationTagsManager()
.getAllocationTagsWithCount(nm1.getNodeId());
return tags.get(testTag1) == null &&
tags.get(testTag2).intValue() == 3;
}, 500, 3000);
}
}
@Test(timeout=60000)
public void testContainerReleasedByNode() throws Exception {
System.out.println("Starting testContainerReleasedByNode");