From 39f15579b83eae9a1384dc077491f3b9326ca99a Mon Sep 17 00:00:00 2001 From: Siddharth Seth Date: Sat, 29 Sep 2012 22:39:03 +0000 Subject: [PATCH] YARN-116. Add the ability to change the RM include/exclude file without a restart. (Contributed by xieguiming and Harsh J) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1391912 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 + .../server/resourcemanager/AdminService.java | 5 +- .../resourcemanager/NodesListManager.java | 10 ++- .../TestResourceTrackerService.java | 82 ++++++++++++++++++- 4 files changed, 92 insertions(+), 8 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index c55db5a785..1af5763f10 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -40,6 +40,9 @@ Release 2.0.3-alpha - Unreleased YARN-53. Added the missing getGroups API to ResourceManager. (Bo Wang via vinodkv) + YARN-116. Add the ability to change the RM include/exclude file without + a restart. (xieguiming and Harsh J via sseth) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java index 4a36fd28dd..5d05c58904 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java @@ -189,15 +189,14 @@ public RefreshNodesResponse refreshNodes(RefreshNodesRequest request) throws YarnRemoteException { UserGroupInformation user = checkAcls("refreshNodes"); try { - this.nodesListManager.refreshNodes(); + this.nodesListManager.refreshNodes(new YarnConfiguration()); RMAuditLogger.logSuccess(user.getShortUserName(), "refreshNodes", "AdminService"); return recordFactory.newRecordInstance(RefreshNodesResponse.class); } catch (IOException ioe) { LOG.info("Exception refreshing nodes ", ioe); RMAuditLogger.logFailure(user.getShortUserName(), "refreshNodes", - adminAcl.toString(), "AdminService", - "Exception refreshing nodes"); + adminAcl.toString(), "AdminService", "Exception refreshing nodes"); throw RPCUtil.getRemoteException(ioe); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/NodesListManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/NodesListManager.java index 33c79f6cad..41b5881da3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/NodesListManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/NodesListManager.java @@ -103,8 +103,16 @@ private void printConfiguredHosts() { } } - public void refreshNodes() throws IOException { + public void refreshNodes(Configuration yarnConf) throws IOException { synchronized (hostsReader) { + if (null == yarnConf) { + yarnConf = new YarnConfiguration(); + } + hostsReader.updateFileNames(yarnConf.get( + YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, + YarnConfiguration.DEFAULT_RM_NODES_INCLUDE_FILE_PATH), yarnConf.get( + YarnConfiguration.RM_NODES_EXCLUDE_FILE_PATH, + YarnConfiguration.DEFAULT_RM_NODES_EXCLUDE_FILE_PATH)); hostsReader.refresh(); printConfiguredHosts(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java index d7852557bd..b97ba015e3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java @@ -54,7 +54,7 @@ public class TestResourceTrackerService { private MockRM rm; /** - * decommissioning using a include hosts file + * Decommissioning using a pre-configured include hosts file */ @Test public void testDecommissionWithIncludeHosts() throws Exception { @@ -86,7 +86,7 @@ public void testDecommissionWithIncludeHosts() throws Exception { String ip = NetUtils.normalizeHostName("localhost"); writeToHostsFile("host1", ip); - rm.getNodesListManager().refreshNodes(); + rm.getNodesListManager().refreshNodes(conf); nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); @@ -106,7 +106,7 @@ public void testDecommissionWithIncludeHosts() throws Exception { } /** - * decommissioning using a exclude hosts file + * Decommissioning using a pre-configured exclude hosts file */ @Test public void testDecommissionWithExcludeHosts() throws Exception { @@ -133,7 +133,7 @@ public void testDecommissionWithExcludeHosts() throws Exception { String ip = NetUtils.normalizeHostName("localhost"); writeToHostsFile("host2", ip); - rm.getNodesListManager().refreshNodes(); + rm.getNodesListManager().refreshNodes(conf); nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); @@ -147,7 +147,81 @@ public void testDecommissionWithExcludeHosts() throws Exception { NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction())); checkDecommissionedNMCount(rm, ++metricCount); } + + /** + * Decommissioning using a post-configured include hosts file + */ + @Test + public void testAddNewIncludePathToConfiguration() throws Exception { + Configuration conf = new Configuration(); + rm = new MockRM(conf); + rm.start(); + MockNM nm1 = rm.registerNode("host1:1234", 5120); + MockNM nm2 = rm.registerNode("host2:5678", 10240); + ClusterMetrics metrics = ClusterMetrics.getMetrics(); + assert(metrics != null); + int initialMetricCount = metrics.getNumDecommisionedNMs(); + HeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true); + Assert.assertEquals( + NodeAction.NORMAL, + nodeHeartbeat.getNodeAction()); + nodeHeartbeat = nm2.nodeHeartbeat(true); + Assert.assertEquals( + NodeAction.NORMAL, + nodeHeartbeat.getNodeAction()); + writeToHostsFile("host1"); + conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile + .getAbsolutePath()); + rm.getNodesListManager().refreshNodes(conf); + nodeHeartbeat = nm1.nodeHeartbeat(true); + Assert.assertEquals( + "Node should not have been decomissioned.", + NodeAction.NORMAL, + nodeHeartbeat.getNodeAction()); + nodeHeartbeat = nm2.nodeHeartbeat(true); + Assert.assertEquals("Node should have been decomissioned but is in state" + + nodeHeartbeat.getNodeAction(), + NodeAction.SHUTDOWN, nodeHeartbeat.getNodeAction()); + checkDecommissionedNMCount(rm, ++initialMetricCount); + } + /** + * Decommissioning using a post-configured exclude hosts file + */ + @Test + public void testAddNewExcludePathToConfiguration() throws Exception { + Configuration conf = new Configuration(); + rm = new MockRM(conf); + rm.start(); + MockNM nm1 = rm.registerNode("host1:1234", 5120); + MockNM nm2 = rm.registerNode("host2:5678", 10240); + ClusterMetrics metrics = ClusterMetrics.getMetrics(); + assert(metrics != null); + int initialMetricCount = metrics.getNumDecommisionedNMs(); + HeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true); + Assert.assertEquals( + NodeAction.NORMAL, + nodeHeartbeat.getNodeAction()); + nodeHeartbeat = nm2.nodeHeartbeat(true); + Assert.assertEquals( + NodeAction.NORMAL, + nodeHeartbeat.getNodeAction()); + writeToHostsFile("host2"); + conf.set(YarnConfiguration.RM_NODES_EXCLUDE_FILE_PATH, hostFile + .getAbsolutePath()); + rm.getNodesListManager().refreshNodes(conf); + nodeHeartbeat = nm1.nodeHeartbeat(true); + Assert.assertEquals( + "Node should not have been decomissioned.", + NodeAction.NORMAL, + nodeHeartbeat.getNodeAction()); + nodeHeartbeat = nm2.nodeHeartbeat(true); + Assert.assertEquals("Node should have been decomissioned but is in state" + + nodeHeartbeat.getNodeAction(), + NodeAction.SHUTDOWN, nodeHeartbeat.getNodeAction()); + checkDecommissionedNMCount(rm, ++initialMetricCount); + } + @Test public void testNodeRegistrationFailure() throws Exception { writeToHostsFile("host1");