diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
index e7f28d05cb..17fbbc3660 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
@@ -893,6 +893,11 @@ public static boolean isAclEnabled(Configuration conf) {
NM_PREFIX + "container-diagnostics-maximum-size";
public static final int DEFAULT_NM_CONTAINER_DIAGNOSTICS_MAXIMUM_SIZE = 10000;
+ /** Minimum container restart interval. */
+ public static final String NM_CONTAINER_RETRY_MINIMUM_INTERVAL_MS =
+ NM_PREFIX + "container-retry-minimum-interval-ms";
+ public static final int DEFAULT_NM_CONTAINER_RETRY_MINIMUM_INTERVAL_MS = 1000;
+
/** Interval at which the delayed token removal thread runs */
public static final String RM_DELAYED_DELEGATION_TOKEN_REMOVAL_INTERVAL_MS =
RM_PREFIX + "delayed.delegation-token.removal-interval-ms";
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
index c9e761a247..703c03df0f 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
@@ -1588,6 +1588,12 @@
10000
+
+ Minimum container restart interval in milliseconds.
+ yarn.nodemanager.container-retry-minimum-interval-ms
+ 1000
+
+
Max number of threads in NMClientAsync to process container
management events
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
index b1ddc2ef95..2ff8e7ccf3 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
@@ -33,6 +33,7 @@
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
+import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
@@ -155,6 +156,16 @@ public ContainerImpl(Configuration conf, Dispatcher dispatcher,
this.containerRetryContext = ContainerRetryContext.NEVER_RETRY_CONTEXT;
}
this.remainingRetryAttempts = containerRetryContext.getMaxRetries();
+ int minimumRestartInterval = conf.getInt(
+ YarnConfiguration.NM_CONTAINER_RETRY_MINIMUM_INTERVAL_MS,
+ YarnConfiguration.DEFAULT_NM_CONTAINER_RETRY_MINIMUM_INTERVAL_MS);
+ if (containerRetryContext.getRetryPolicy()
+ != ContainerRetryPolicy.NEVER_RETRY
+ && containerRetryContext.getRetryInterval() < minimumRestartInterval) {
+ LOG.info("Set restart interval to minimum value " + minimumRestartInterval
+ + "ms for container " + containerTokenIdentifier.getContainerID());
+ this.containerRetryContext.setRetryInterval(minimumRestartInterval);
+ }
this.diagnosticsMaxSize = conf.getInt(
YarnConfiguration.NM_CONTAINER_DIAGNOSTICS_MAXIMUM_SIZE,
YarnConfiguration.DEFAULT_NM_CONTAINER_DIAGNOSTICS_MAXIMUM_SIZE);
@@ -1368,4 +1379,9 @@ private static boolean shouldBeUploadedToSharedCache(ContainerImpl container,
LocalResourceRequest resource) {
return container.resourcesUploadPolicies.get(resource);
}
+
+ @VisibleForTesting
+ ContainerRetryContext getContainerRetryContext() {
+ return containerRetryContext;
+ }
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java
index 118bc42bcb..766a1f91fe 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java
@@ -725,6 +725,40 @@ private void testContainerRetry(ContainerRetryContext containerRetryContext,
}
}
+ @Test
+ public void testContainerRestartInterval() throws IOException {
+ conf.setInt(YarnConfiguration.NM_CONTAINER_RETRY_MINIMUM_INTERVAL_MS, 2000);
+
+ ContainerRetryContext containerRetryContext1 = ContainerRetryContext
+ .newInstance(ContainerRetryPolicy.NEVER_RETRY, null, 3, 0);
+ testContainerRestartInterval(containerRetryContext1, 0);
+
+ ContainerRetryContext containerRetryContext2 = ContainerRetryContext
+ .newInstance(ContainerRetryPolicy.RETRY_ON_ALL_ERRORS, null, 3, 0);
+ testContainerRestartInterval(containerRetryContext2, 2000);
+
+ ContainerRetryContext containerRetryContext3 = ContainerRetryContext
+ .newInstance(ContainerRetryPolicy.RETRY_ON_ALL_ERRORS, null, 3, 4000);
+ testContainerRestartInterval(containerRetryContext3, 4000);
+ }
+
+ private void testContainerRestartInterval(
+ ContainerRetryContext containerRetryContext,
+ int expectedRestartInterval) throws IOException {
+ WrappedContainer wc = null;
+ try {
+ wc = new WrappedContainer(25, 314159265358980L, 4345,
+ "yak", containerRetryContext);
+ Assert.assertEquals(
+ ((ContainerImpl)wc.c).getContainerRetryContext().getRetryInterval(),
+ expectedRestartInterval);
+ } finally {
+ if (wc != null) {
+ wc.finished();
+ }
+ }
+ }
+
private void verifyCleanupCall(WrappedContainer wc) throws Exception {
ResourcesReleasedMatcher matchesReq =
new ResourcesReleasedMatcher(wc.localResources, EnumSet.of(