From 15c3e7ffe3d1c57ad36afd993f09fc47889c93bd Mon Sep 17 00:00:00 2001 From: rohithsharmaks Date: Wed, 9 Dec 2015 10:50:43 +0530 Subject: [PATCH] YARN-4431. Not necessary to do unRegisterNM() if NM get stop due to failed to connect to RM. (Junpin Du via rohithsharmaks) --- hadoop-yarn-project/CHANGES.txt | 3 +++ .../yarn/server/nodemanager/NodeStatusUpdaterImpl.java | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index c7eae9f49d..3935cde1e9 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -1108,6 +1108,9 @@ Release 2.8.0 - UNRELEASED YARN-4392. ApplicationCreatedEvent event time resets after RM restart/failover. (Naganarasimha G R and Xuan Gong via xgong) + YARN-4431. Not necessary to do unRegisterNM() if NM get stop due to failed to connect + to RM. (Junping Du via rohithsharmaks) + Release 2.7.3 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java index 34267b373f..ba915c27cb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java @@ -134,6 +134,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements private Runnable statusUpdaterRunnable; private Thread statusUpdater; + private boolean failedToConnect = false; private long rmIdentifier = ResourceManagerConstants.RM_INVALID_IDENTIFIER; private boolean registeredWithRM = false; Set pendingContainersToRemove = new HashSet(); @@ -241,7 +242,7 @@ protected void serviceStop() throws Exception { // the isStopped check is for avoiding multiple unregistrations. if (this.registeredWithRM && !this.isStopped && !isNMUnderSupervisionWithRecoveryEnabled() - && !context.getDecommissioned()) { + && !context.getDecommissioned() && !failedToConnect) { unRegisterNM(); } // Interrupt the updater. @@ -823,6 +824,8 @@ public void run() { //catch and throw the exception if tried MAX wait time to connect RM dispatcher.getEventHandler().handle( new NodeManagerEvent(NodeManagerEventType.SHUTDOWN)); + // failed to connect to RM. + failedToConnect = true; throw new YarnRuntimeException(e); } catch (Throwable e) {