YARN-3971. Skip RMNodeLabelsManager#checkRemoveFromClusterNodeLabelsOfQueue on nodelabel recovery. (Bibin A Chundatt via wangda)

This commit is contained in:
Wangda Tan 2015-07-30 10:00:31 -07:00
parent 8acb30b016
commit 91b42e7d6e
3 changed files with 59 additions and 6 deletions

View File

@ -713,6 +713,9 @@ Release 2.8.0 - UNRELEASED
YARN-3963. AddNodeLabel on duplicate label addition shows success. YARN-3963. AddNodeLabel on duplicate label addition shows success.
(Bibin A Chundatt via wangda) (Bibin A Chundatt via wangda)
YARN-3971. Skip RMNodeLabelsManager#checkRemoveFromClusterNodeLabelsOfQueue
on nodelabel recovery. (Bibin A Chundatt via wangda)
Release 2.7.2 - UNRELEASED Release 2.7.2 - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -33,8 +33,8 @@
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.service.Service;
import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.NodeLabel;
import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager; import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager;
import org.apache.hadoop.yarn.nodelabels.RMNodeLabel; import org.apache.hadoop.yarn.nodelabels.RMNodeLabel;
@ -114,9 +114,15 @@ public void removeFromClusterNodeLabels(Collection<String> labelsToRemove)
throws IOException { throws IOException {
try { try {
writeLock.lock(); writeLock.lock();
if (getServiceState() == Service.STATE.STARTED) {
checkRemoveFromClusterNodeLabelsOfQueue(labelsToRemove); // We cannot remove node labels from collection when some queue(s) are
// using any of them.
// We will only do this check when service starting finished. Before
// service starting, we will replay edit logs and recover state. It is
// possible that a history operation removed some labels which were being
// used by some queues in the past but not used by current queues.
checkRemoveFromClusterNodeLabelsOfQueue(labelsToRemove);
}
// copy before NMs // copy before NMs
Map<String, Host> before = cloneNodeMap(); Map<String, Host> before = cloneNodeMap();

View File

@ -18,7 +18,10 @@
package org.apache.hadoop.yarn.server.resourcemanager.nodelabels; package org.apache.hadoop.yarn.server.resourcemanager.nodelabels;
import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -31,6 +34,7 @@
import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager; import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager;
import org.apache.hadoop.yarn.nodelabels.RMNodeLabel; import org.apache.hadoop.yarn.nodelabels.RMNodeLabel;
import org.apache.hadoop.yarn.nodelabels.NodeLabelTestBase; import org.apache.hadoop.yarn.nodelabels.NodeLabelTestBase;
import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
import org.apache.hadoop.yarn.util.resource.Resources; import org.apache.hadoop.yarn.util.resource.Resources;
import org.junit.After; import org.junit.After;
import org.junit.Assert; import org.junit.Assert;
@ -46,7 +50,8 @@ public class TestRMNodeLabelsManager extends NodeLabelTestBase {
private final Resource LARGE_NODE = Resource.newInstance(1000, 0); private final Resource LARGE_NODE = Resource.newInstance(1000, 0);
NullRMNodeLabelsManager mgr = null; NullRMNodeLabelsManager mgr = null;
RMNodeLabelsManager lmgr = null;
boolean checkQueueCall = false;
@Before @Before
public void before() { public void before() {
mgr = new NullRMNodeLabelsManager(); mgr = new NullRMNodeLabelsManager();
@ -507,6 +512,45 @@ public void testPullRMNodeLabelsInfo() throws IOException {
checkNodeLabelInfo(infos, "z", 0, 0); checkNodeLabelInfo(infos, "z", 0, 0);
} }
@Test(timeout = 60000)
public void testcheckRemoveFromClusterNodeLabelsOfQueue() throws Exception {
class TestRMLabelManger extends RMNodeLabelsManager {
@Override
protected void checkRemoveFromClusterNodeLabelsOfQueue(
Collection<String> labelsToRemove) throws IOException {
checkQueueCall = true;
// Do nothing
}
}
lmgr = new TestRMLabelManger();
Configuration conf = new Configuration();
File tempDir = File.createTempFile("nlb", ".tmp");
tempDir.delete();
tempDir.mkdirs();
tempDir.deleteOnExit();
conf.set(YarnConfiguration.FS_NODE_LABELS_STORE_ROOT_DIR,
tempDir.getAbsolutePath());
conf.setBoolean(YarnConfiguration.NODE_LABELS_ENABLED, true);
MockRM rm = new MockRM(conf) {
@Override
public RMNodeLabelsManager createNodeLabelManager() {
return lmgr;
}
};
lmgr.addToCluserNodeLabelsWithDefaultExclusivity(toSet("a"));
lmgr.removeFromClusterNodeLabels(Arrays.asList(new String[] { "a" }));
rm.getRMContext().setNodeLabelManager(lmgr);
rm.start();
lmgr.addToCluserNodeLabelsWithDefaultExclusivity(toSet("a"));
Assert.assertEquals(false, checkQueueCall);
lmgr.removeFromClusterNodeLabels(Arrays.asList(new String[] { "a" }));
Assert.assertEquals(true, checkQueueCall);
lmgr.stop();
lmgr.close();
rm.stop();
}
@Test(timeout = 5000) @Test(timeout = 5000)
public void testLabelsToNodesOnNodeActiveDeactive() throws Exception { public void testLabelsToNodesOnNodeActiveDeactive() throws Exception {
// Activate a node without assigning any labels // Activate a node without assigning any labels