From 08d5060605af81a3d6048044176dc656c0dad56c Mon Sep 17 00:00:00 2001 From: Weiwei Yang Date: Fri, 10 Aug 2018 08:32:02 +0800 Subject: [PATCH] YARN-8521. NPE in AllocationTagsManager when a container is removed more than once. Contributed by Weiwei Yang. --- .../constraint/AllocationTagsManager.java | 5 ++ .../constraint/TestAllocationTagsManager.java | 37 ++++++++++++++ .../TestPlacementConstraintsUtil.java | 51 ++++++++++--------- 3 files changed, 68 insertions(+), 25 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/AllocationTagsManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/AllocationTagsManager.java index a6907676f9..6f160b6363 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/AllocationTagsManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/AllocationTagsManager.java @@ -115,6 +115,11 @@ private void addTag(T type, String tag) { private void removeTagFromInnerMap(Map innerMap, String tag) { Long count = innerMap.get(tag); + if (count == null) { + LOG.warn("Trying to remove tags, however the tag " + tag + + " no longer exists on this node/rack."); + return; + } if (count > 1) { innerMap.put(tag, count - 1); } else { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/TestAllocationTagsManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/TestAllocationTagsManager.java index 3f2aaed2a7..9095ac1291 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/TestAllocationTagsManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/TestAllocationTagsManager.java @@ -22,6 +22,7 @@ import com.google.common.collect.ImmutableSet; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.server.resourcemanager.MockNodes; @@ -38,6 +39,7 @@ import org.mockito.Mockito; import java.util.List; +import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; @@ -60,6 +62,41 @@ public void setup() { rmContext = rm.getRMContext(); } + @Test + public void testMultipleAddRemoveContainer() { + AllocationTagsManager atm = new AllocationTagsManager(rmContext); + + NodeId nodeId = NodeId.fromString("host1:123"); + ContainerId cid1 = TestUtils.getMockContainerId(1, 1); + ContainerId cid2 = TestUtils.getMockContainerId(1, 2); + ContainerId cid3 = TestUtils.getMockContainerId(1, 3); + Set tags1 = ImmutableSet.of("mapper", "reducer"); + Set tags2 = ImmutableSet.of("mapper"); + Set tags3 = ImmutableSet.of("zk"); + + // node - mapper : 2 + // - reduce : 1 + atm.addContainer(nodeId, cid1, tags1); + atm.addContainer(nodeId, cid2, tags2); + atm.addContainer(nodeId, cid3, tags3); + Assert.assertEquals(2L, + (long) atm.getAllocationTagsWithCount(nodeId).get("mapper")); + Assert.assertEquals(1L, + (long) atm.getAllocationTagsWithCount(nodeId).get("reducer")); + + // remove container1 + atm.removeContainer(nodeId, cid1, tags1); + Assert.assertEquals(1L, + (long) atm.getAllocationTagsWithCount(nodeId).get("mapper")); + Assert.assertNull(atm.getAllocationTagsWithCount(nodeId).get("reducer")); + + // remove the same container again, the reducer no longer exists, + // make sure there is no NPE here + atm.removeContainer(nodeId, cid1, tags1); + Assert.assertNull(atm.getAllocationTagsWithCount(nodeId).get("mapper")); + Assert.assertNull(atm.getAllocationTagsWithCount(nodeId).get("reducer")); + } + @Test public void testAllocationTagsManagerSimpleCases() throws InvalidAllocationTagsQueryException { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/TestPlacementConstraintsUtil.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/TestPlacementConstraintsUtil.java index dc61981123..5dbdc8a1b9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/TestPlacementConstraintsUtil.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/TestPlacementConstraintsUtil.java @@ -163,6 +163,11 @@ private ContainerId newContainerId(ApplicationId appId) { ApplicationAttemptId.newInstance(appId, 0), 0); } + private ContainerId newContainerId(ApplicationId appId, int containerId) { + return ContainerId.newContainerId( + ApplicationAttemptId.newInstance(appId, 0), containerId); + } + private SchedulerNode newSchedulerNode(String hostname, String rackName, NodeId nodeId) { SchedulerNode node = mock(SchedulerNode.class); @@ -271,12 +276,10 @@ public void testMultiTagsPlacementConstraints() SchedulerNode schedulerNode3 =newSchedulerNode(n3_r2.getHostName(), n3_r2.getRackName(), n3_r2.getNodeID()); - ContainerId ca = ContainerId - .newContainerId(ApplicationAttemptId.newInstance(appId1, 0), 0); + ContainerId ca = newContainerId(appId1, 0); tm.addContainer(n0_r1.getNodeID(), ca, ImmutableSet.of("A")); - ContainerId cb = ContainerId - .newContainerId(ApplicationAttemptId.newInstance(appId1, 0), 0); + ContainerId cb = newContainerId(appId1, 1); tm.addContainer(n1_r1.getNodeID(), cb, ImmutableSet.of("B")); // n0 and n1 has A/B so they cannot satisfy the PC @@ -297,11 +300,9 @@ public void testMultiTagsPlacementConstraints() * n2: A(1), B(1) * n3: */ - ContainerId ca1 = ContainerId - .newContainerId(ApplicationAttemptId.newInstance(appId1, 0), 0); + ContainerId ca1 = newContainerId(appId1, 2); tm.addContainer(n2_r2.getNodeID(), ca1, ImmutableSet.of("A")); - ContainerId cb1 = ContainerId - .newContainerId(ApplicationAttemptId.newInstance(appId1, 0), 0); + ContainerId cb1 = newContainerId(appId1, 3); tm.addContainer(n2_r2.getNodeID(), cb1, ImmutableSet.of("B")); // Only n2 has both A and B so only it can satisfy the PC @@ -468,9 +469,9 @@ public void testORConstraintAssignment() * n3: "" */ tm.addContainer(n0r1.getNodeID(), - newContainerId(appId1), ImmutableSet.of("hbase-m")); + newContainerId(appId1, 1), ImmutableSet.of("hbase-m")); tm.addContainer(n2r2.getNodeID(), - newContainerId(appId1), ImmutableSet.of("hbase-rs")); + newContainerId(appId1, 2), ImmutableSet.of("hbase-rs")); Assert.assertEquals(1L, tm.getAllocationTagsWithCount(n0r1.getNodeID()) .get("hbase-m").longValue()); Assert.assertEquals(1L, tm.getAllocationTagsWithCount(n2r2.getNodeID()) @@ -504,7 +505,7 @@ public void testORConstraintAssignment() * n3: hbase-rs(1) */ tm.addContainer(n3r2.getNodeID(), - newContainerId(appId1), ImmutableSet.of("hbase-rs")); + newContainerId(appId1, 2), ImmutableSet.of("hbase-rs")); // n3 is qualified now because it is allocated with hbase-rs tag Assert.assertTrue(PlacementConstraintsUtil.canSatisfyConstraints(appId1, createSchedulingRequest(sourceTag1), schedulerNode3, pcm, tm)); @@ -518,7 +519,7 @@ public void testORConstraintAssignment() */ // Place tm.addContainer(n2r2.getNodeID(), - newContainerId(appId1), ImmutableSet.of("spark")); + newContainerId(appId1, 3), ImmutableSet.of("spark")); // According to constraint, "zk" is allowed to be placed on a node // has "hbase-m" tag OR a node has both "hbase-rs" and "spark" tags. Assert.assertTrue(PlacementConstraintsUtil.canSatisfyConstraints(appId1, @@ -552,9 +553,9 @@ public void testANDConstraintAssignment() * n3: "" */ tm.addContainer(n0r1.getNodeID(), - newContainerId(appId1), ImmutableSet.of("hbase-m")); + newContainerId(appId1, 0), ImmutableSet.of("hbase-m")); tm.addContainer(n2r2.getNodeID(), - newContainerId(appId1), ImmutableSet.of("hbase-m")); + newContainerId(appId1, 1), ImmutableSet.of("hbase-m")); Assert.assertEquals(1L, tm.getAllocationTagsWithCount(n0r1.getNodeID()) .get("hbase-m").longValue()); Assert.assertEquals(1L, tm.getAllocationTagsWithCount(n2r2.getNodeID()) @@ -589,7 +590,7 @@ public void testANDConstraintAssignment() */ for (int i=0; i<4; i++) { tm.addContainer(n1r1.getNodeID(), - newContainerId(appId1), ImmutableSet.of("spark")); + newContainerId(appId1, i+2), ImmutableSet.of("spark")); } Assert.assertEquals(4L, tm.getAllocationTagsWithCount(n1r1.getNodeID()) .get("spark").longValue()); @@ -633,19 +634,19 @@ public void testGlobalAppConstraints() * n3: "" */ tm.addContainer(n0r1.getNodeID(), - newContainerId(application1), ImmutableSet.of("A")); + newContainerId(application1, 0), ImmutableSet.of("A")); tm.addContainer(n0r1.getNodeID(), - newContainerId(application2), ImmutableSet.of("A")); + newContainerId(application2, 1), ImmutableSet.of("A")); tm.addContainer(n1r1.getNodeID(), - newContainerId(application3), ImmutableSet.of("A")); + newContainerId(application3, 2), ImmutableSet.of("A")); tm.addContainer(n1r1.getNodeID(), - newContainerId(application3), ImmutableSet.of("A")); + newContainerId(application3, 3), ImmutableSet.of("A")); tm.addContainer(n1r1.getNodeID(), - newContainerId(application3), ImmutableSet.of("A")); + newContainerId(application3, 4), ImmutableSet.of("A")); tm.addContainer(n2r2.getNodeID(), - newContainerId(application1), ImmutableSet.of("A")); + newContainerId(application1, 5), ImmutableSet.of("A")); tm.addContainer(n2r2.getNodeID(), - newContainerId(application1), ImmutableSet.of("A")); + newContainerId(application1, 6), ImmutableSet.of("A")); SchedulerNode schedulerNode0 = newSchedulerNode(n0r1.getHostName(), n0r1.getRackName(), n0r1.getNodeID()); @@ -888,9 +889,9 @@ public void testInterAppConstraintsByAppID() * n3: "" */ tm.addContainer(n0r1.getNodeID(), - newContainerId(application1), ImmutableSet.of("hbase-m")); + newContainerId(application1, 0), ImmutableSet.of("hbase-m")); tm.addContainer(n2r2.getNodeID(), - newContainerId(application1), ImmutableSet.of("hbase-m")); + newContainerId(application1, 1), ImmutableSet.of("hbase-m")); Assert.assertEquals(1L, tm.getAllocationTagsWithCount(n0r1.getNodeID()) .get("hbase-m").longValue()); Assert.assertEquals(1L, tm.getAllocationTagsWithCount(n2r2.getNodeID()) @@ -958,7 +959,7 @@ application2, createSchedulingRequest(srcTags2), * n3: "" */ tm.addContainer(n0r1.getNodeID(), - newContainerId(application3), ImmutableSet.of("hbase-m")); + newContainerId(application3, 0), ImmutableSet.of("hbase-m")); // Anti-affinity to self/hbase-m Assert.assertFalse(PlacementConstraintsUtil