diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index dfedc8a88f..71fde68115 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -227,6 +227,9 @@ Release 2.8.0 - UNRELEASED
YARN-3493. RM fails to come up with error "Failed to load/recover state"
when mem settings are changed. (Jian He via wangda)
+ YARN-3136. Fixed a synchronization problem of
+ AbstractYarnScheduler#getTransferredContainers. (Sunil G via jianhe)
+
Release 2.7.1 - UNRELEASED
INCOMPATIBLE CHANGES
diff --git a/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml b/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml
index 8aae152f93..ece8548456 100644
--- a/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml
+++ b/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml
@@ -469,6 +469,14 @@
+
+
+
+
+
+
+
+
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java
index 4ad8e879a1..7244b17f02 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java
@@ -298,32 +298,35 @@ public RegisterApplicationMasterResponse registerApplicationMaster(
// For work-preserving AM restart, retrieve previous attempts' containers
// and corresponding NM tokens.
- List transferredContainers =
- ((AbstractYarnScheduler) rScheduler)
+ if (app.getApplicationSubmissionContext()
+ .getKeepContainersAcrossApplicationAttempts()) {
+ List transferredContainers = ((AbstractYarnScheduler) rScheduler)
.getTransferredContainers(applicationAttemptId);
- if (!transferredContainers.isEmpty()) {
- response.setContainersFromPreviousAttempts(transferredContainers);
- List nmTokens = new ArrayList();
- for (Container container : transferredContainers) {
- try {
- NMToken token = rmContext.getNMTokenSecretManager()
- .createAndGetNMToken(app.getUser(), applicationAttemptId,
- container);
- if (null != token) {
- nmTokens.add(token);
- }
- } catch (IllegalArgumentException e) {
- // if it's a DNS issue, throw UnknowHostException directly and that
- // will be automatically retried by RMProxy in RPC layer.
- if (e.getCause() instanceof UnknownHostException) {
- throw (UnknownHostException) e.getCause();
+ if (!transferredContainers.isEmpty()) {
+ response.setContainersFromPreviousAttempts(transferredContainers);
+ List nmTokens = new ArrayList();
+ for (Container container : transferredContainers) {
+ try {
+ NMToken token = rmContext.getNMTokenSecretManager()
+ .createAndGetNMToken(app.getUser(), applicationAttemptId,
+ container);
+ if (null != token) {
+ nmTokens.add(token);
+ }
+ } catch (IllegalArgumentException e) {
+ // if it's a DNS issue, throw UnknowHostException directly and
+ // that
+ // will be automatically retried by RMProxy in RPC layer.
+ if (e.getCause() instanceof UnknownHostException) {
+ throw (UnknownHostException) e.getCause();
+ }
}
}
+ response.setNMTokensFromPreviousAttempts(nmTokens);
+ LOG.info("Application " + appID + " retrieved "
+ + transferredContainers.size() + " containers from previous"
+ + " attempts and " + nmTokens.size() + " NM tokens.");
}
- response.setNMTokensFromPreviousAttempts(nmTokens);
- LOG.info("Application " + appID + " retrieved "
- + transferredContainers.size() + " containers from previous"
- + " attempts and " + nmTokens.size() + " NM tokens.");
}
response.setSchedulerResourceTypes(rScheduler
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java
index 1a8c653cc5..ae927f1139 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java
@@ -21,12 +21,15 @@
import java.io.IOException;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock;
import java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience.Private;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
@@ -67,6 +70,8 @@
@SuppressWarnings("unchecked")
+@Private
+@Unstable
public abstract class AbstractYarnScheduler
extends AbstractService implements ResourceScheduler {
@@ -91,7 +96,12 @@ public abstract class AbstractYarnScheduler
private long configuredMaximumAllocationWaitTime;
protected RMContext rmContext;
- protected Map> applications;
+
+ /*
+ * All schedulers which are inheriting AbstractYarnScheduler should use
+ * concurrent version of 'applications' map.
+ */
+ protected ConcurrentMap> applications;
protected int nmExpireInterval;
protected final static List EMPTY_CONTAINER_LIST =
@@ -123,7 +133,7 @@ public void serviceInit(Configuration conf) throws Exception {
super.serviceInit(conf);
}
- public synchronized List getTransferredContainers(
+ public List getTransferredContainers(
ApplicationAttemptId currentAttempt) {
ApplicationId appId = currentAttempt.getApplicationId();
SchedulerApplication app = applications.get(appId);
@@ -132,6 +142,9 @@ public synchronized List getTransferredContainers(
if (appImpl.getApplicationSubmissionContext().getUnmanagedAM()) {
return containerList;
}
+ if (app == null) {
+ return containerList;
+ }
Collection liveContainers =
app.getCurrentAppAttempt().getLiveContainers();
ContainerId amContainerId =