YARN-10034. Remove Allocation Tags from released container from Decommission node

Contributed by Kyungwan Nam. Reviewed by Adam Antal.
This commit is contained in:
Prabhu Joseph 2020-03-19 12:48:30 +05:30 committed by Prabhu Joseph
parent 654db35fa2
commit f2d3ac2a3f
3 changed files with 88 additions and 0 deletions

View File

@ -2044,6 +2044,7 @@ public class CapacityScheduler extends
super.completedContainer(container, SchedulerUtils super.completedContainer(container, SchedulerUtils
.createAbnormalContainerStatus(container.getContainerId(), .createAbnormalContainerStatus(container.getContainerId(),
SchedulerUtils.LOST_CONTAINER), RMContainerEventType.KILL); SchedulerUtils.LOST_CONTAINER), RMContainerEventType.KILL);
node.releaseContainer(container.getContainerId(), true);
} }
// Remove reservations, if any // Remove reservations, if any

View File

@ -795,6 +795,7 @@ public class FairScheduler extends
super.completedContainer(container, SchedulerUtils super.completedContainer(container, SchedulerUtils
.createAbnormalContainerStatus(container.getContainerId(), .createAbnormalContainerStatus(container.getContainerId(),
SchedulerUtils.LOST_CONTAINER), RMContainerEventType.KILL); SchedulerUtils.LOST_CONTAINER), RMContainerEventType.KILL);
node.releaseContainer(container.getContainerId(), true);
} }
// Remove reservations, if any // Remove reservations, if any

View File

@ -537,6 +537,92 @@ public class TestAbstractYarnScheduler extends ParameterizedSchedulerTestBase {
} }
} }
@Test(timeout = 30000L)
public void testNodeRemovedWithAllocationTags() throws Exception {
// Currently only can be tested against capacity scheduler.
if (getSchedulerType().equals(SchedulerType.CAPACITY)) {
final String testTag1 = "some-tag";
YarnConfiguration conf = getConf();
conf.set(YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_HANDLER, "scheduler");
MockRM rm1 = new MockRM(conf);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234",
10240, rm1.getResourceTrackerService());
nm1.registerNode();
MockRMAppSubmissionData data =
MockRMAppSubmissionData.Builder.createWithMemory(200, rm1)
.withAppName("name")
.withUser("user")
.withAcls(new HashMap<>())
.withUnmanagedAM(false)
.withQueue("default")
.withMaxAppAttempts(-1)
.withCredentials(null)
.withAppType("Test")
.withWaitForAppAcceptedState(false)
.withKeepContainers(true)
.build();
RMApp app1 =
MockRMAppSubmitter.submit(rm1, data);
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
// allocate 1 container with tag1
SchedulingRequest sr = SchedulingRequest
.newInstance(1L, Priority.newInstance(1),
ExecutionTypeRequest.newInstance(ExecutionType.GUARANTEED),
Sets.newHashSet(testTag1),
ResourceSizing.newInstance(1, Resource.newInstance(1024, 1)),
null);
AllocateRequest ar = AllocateRequest.newBuilder()
.schedulingRequests(Lists.newArrayList(sr)).build();
am1.allocate(ar);
nm1.nodeHeartbeat(true);
List<Container> allocated = new ArrayList<>();
while (allocated.size() < 1) {
AllocateResponse rsp = am1
.allocate(new ArrayList<>(), new ArrayList<>());
allocated.addAll(rsp.getAllocatedContainers());
nm1.nodeHeartbeat(true);
Thread.sleep(1000);
}
Assert.assertEquals(1, allocated.size());
Set<Container> containers = allocated.stream()
.filter(container -> container.getAllocationRequestId() == 1L)
.collect(Collectors.toSet());
Assert.assertNotNull(containers);
Assert.assertEquals(1, containers.size());
ContainerId cid = containers.iterator().next().getId();
// mock container start
rm1.getRMContext().getScheduler()
.getSchedulerNode(nm1.getNodeId()).containerStarted(cid);
// verifies the allocation is made with correct number of tags
Map<String, Long> nodeTags = rm1.getRMContext()
.getAllocationTagsManager()
.getAllocationTagsWithCount(nm1.getNodeId());
Assert.assertNotNull(nodeTags.get(testTag1));
Assert.assertEquals(1, nodeTags.get(testTag1).intValue());
// remove the node
RMNode node1 = MockNodes.newNodeInfo(
0, Resources.createResource(nm1.getMemory()), 1, "127.0.0.1", 1234);
rm1.getRMContext().getScheduler().handle(
new NodeRemovedSchedulerEvent(node1));
// Once the node is removed, the tag should be removed immediately
nodeTags = rm1.getRMContext().getAllocationTagsManager()
.getAllocationTagsWithCount(nm1.getNodeId());
Assert.assertNull(nodeTags);
}
}
@Test(timeout=60000) @Test(timeout=60000)
public void testContainerReleasedByNode() throws Exception { public void testContainerReleasedByNode() throws Exception {
System.out.println("Starting testContainerReleasedByNode"); System.out.println("Starting testContainerReleasedByNode");