From f2b6c03fc1929679170dfec04891f202181d88e8 Mon Sep 17 00:00:00 2001 From: Szilard Nemeth Date: Wed, 28 Jul 2021 17:02:15 +0200 Subject: [PATCH] YARN-6272. TestAMRMClient#testAMRMClientWithContainerResourceChange fails intermittently. Contributed by Andras Gyory & Prabhu Joseph --- .../yarn/client/api/impl/TestAMRMClient.java | 55 +++++++++++-------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java index 24666f066d..fff27bd008 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java @@ -956,7 +956,6 @@ private List allocateAndStartContainers( return containers; } - private void doContainerResourceChange( final AMRMClient amClient, List containers) throws YarnException, IOException { @@ -986,38 +985,50 @@ private void doContainerResourceChange( Resource.newInstance(512, 1), null)); assertEquals(Resource.newInstance(512, 1), amClientImpl.change.get(container1.getId()).getValue().getCapability()); - // request resource increase for container2 - amClientImpl.requestContainerUpdate(container2, - UpdateContainerRequest.newInstance(container2.getVersion(), - container2.getId(), ContainerUpdateType.INCREASE_RESOURCE, - Resource.newInstance(2048, 1), null)); - assertEquals(Resource.newInstance(2048, 1), - amClientImpl.change.get(container2.getId()).getValue().getCapability()); // verify release request will cancel pending change requests for the same // container amClientImpl.requestContainerUpdate(container3, UpdateContainerRequest.newInstance(container3.getVersion(), container3.getId(), ContainerUpdateType.INCREASE_RESOURCE, Resource.newInstance(2048, 1), null)); - assertEquals(3, amClientImpl.pendingChange.size()); - amClientImpl.releaseAssignedContainer(container3.getId()); assertEquals(2, amClientImpl.pendingChange.size()); + amClientImpl.releaseAssignedContainer(container3.getId()); + assertEquals(1, amClientImpl.pendingChange.size()); // as of now: container1 asks to decrease to (512, 1) // container2 asks to increase to (2048, 1) // send allocation requests - AllocateResponse allocResponse = amClient.allocate(0.1f); - assertEquals(0, amClientImpl.change.size()); - // we should get decrease confirmation right away - List updatedContainers = - allocResponse.getUpdatedContainers(); - assertEquals(1, updatedContainers.size()); // we should get increase allocation after the next NM's heartbeat to RM - triggerSchedulingWithNMHeartBeat(); - // get allocations - allocResponse = amClient.allocate(0.1f); - updatedContainers = - allocResponse.getUpdatedContainers(); - assertEquals(1, updatedContainers.size()); + assertUpdatedContainers(amClient, container1); + // request resource increase for container2 + amClientImpl.requestContainerUpdate(container2, + UpdateContainerRequest.newInstance(container2.getVersion(), + container2.getId(), ContainerUpdateType.INCREASE_RESOURCE, + Resource.newInstance(2048, 1), null)); + assertEquals(Resource.newInstance(2048, 1), + amClientImpl.change.get(container2.getId()).getValue().getCapability()); + assertUpdatedContainers(amClient, container2); + } + + private void assertUpdatedContainers(AMRMClient amClient, Container container) { + RMContext context = yarnCluster.getResourceManager().getRMContext(); + RMNode rmNode = context.getRMNodes().get(container.getNodeId()); + List updateResponse = new ArrayList<>(); + int allocationAttempts = 0; + while (allocationAttempts < 1000) { + context.getScheduler().handle(new NodeUpdateSchedulerEvent(rmNode)); + try { + updateResponse = amClient.allocate(0.1f).getUpdatedContainers(); + if (updateResponse.size() == 1) { + break; + } else { + allocationAttempts++; + sleep(20); + } + } catch (Exception ignored) { + } + } + + Assert.assertEquals("Container resource change update failed", 1, updateResponse.size()); } @Test