From 47ad98b2e1b4e9ad9304aca4d452da428778e7ee Mon Sep 17 00:00:00 2001 From: Eric Yang Date: Fri, 19 Oct 2018 13:49:04 -0400 Subject: [PATCH] YARN-8910. Fixed misleading log statement when container max retries is infinite. Contributed by Chandni Singh --- .../container/ContainerImpl.java | 15 ++++++++--- .../container/SlidingWindowRetryPolicy.java | 4 +++ .../TestSlidingWindowRetryPolicy.java | 26 ++++++++++++++++--- 3 files changed, 37 insertions(+), 8 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java index f88dfbfb71..6716dbb02e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java @@ -1653,10 +1653,17 @@ public class ContainerImpl implements Container { private void doRelaunch(final ContainerImpl container, int remainingRetryAttempts, final int retryInterval) { - LOG.info("Relaunching Container " + container.getContainerId() - + ". Remaining retry attempts(after relaunch) : " - + remainingRetryAttempts + ". Interval between retries is " - + retryInterval + "ms"); + if (remainingRetryAttempts == ContainerRetryContext.RETRY_FOREVER) { + LOG.info("Relaunching Container {}. " + + "retry interval {} ms", container.getContainerId(), + retryInterval); + } else { + LOG.info("Relaunching Container {}. " + + "remaining retry attempts(after relaunch) {}, " + + "retry interval {} ms", container.getContainerId(), + remainingRetryAttempts, retryInterval); + } + container.wasLaunched = false; container.metrics.endRunningContainer(); if (retryInterval == 0) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/SlidingWindowRetryPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/SlidingWindowRetryPolicy.java index 9360669bb6..957764fe4b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/SlidingWindowRetryPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/SlidingWindowRetryPolicy.java @@ -153,6 +153,10 @@ public class SlidingWindowRetryPolicy { } int getRemainingRetries() { + if (containerRetryContext.getMaxRetries() == + ContainerRetryContext.RETRY_FOREVER) { + return ContainerRetryContext.RETRY_FOREVER; + } return remainingRetries; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestSlidingWindowRetryPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestSlidingWindowRetryPolicy.java index bacf3bbf18..2aa4605c54 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestSlidingWindowRetryPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestSlidingWindowRetryPolicy.java @@ -43,8 +43,12 @@ public class TestSlidingWindowRetryPolicy { public void testNeverRetry() { ContainerRetryContext retryContext = ContainerRetryContext.NEVER_RETRY_CONTEXT; - Assert.assertFalse("never retry", retryPolicy.shouldRetry( - new SlidingWindowRetryPolicy.RetryContext(retryContext), 12)); + SlidingWindowRetryPolicy.RetryContext windowContext = new + SlidingWindowRetryPolicy.RetryContext(retryContext); + Assert.assertFalse("never retry", retryPolicy.shouldRetry(windowContext, + 12)); + Assert.assertEquals("remaining retries", 0, + windowContext.getRemainingRetries()); } @Test @@ -52,8 +56,13 @@ public class TestSlidingWindowRetryPolicy { ContainerRetryContext retryContext = ContainerRetryContext.newInstance( ContainerRetryPolicy.RETRY_ON_ALL_ERRORS, null, -1, 0, 10); - Assert.assertTrue("always retry", retryPolicy.shouldRetry( - new SlidingWindowRetryPolicy.RetryContext(retryContext), 12)); + SlidingWindowRetryPolicy.RetryContext windowContext = new + SlidingWindowRetryPolicy.RetryContext(retryContext); + Assert.assertTrue("always retry", retryPolicy.shouldRetry(windowContext, + 12)); + Assert.assertEquals("remaining retries", + ContainerRetryContext.RETRY_FOREVER, + windowContext.getRemainingRetries()); } @Test @@ -65,19 +74,28 @@ public class TestSlidingWindowRetryPolicy { Assert.assertTrue("retry 1", retryPolicy.shouldRetry(windowRetryContext, 12)); retryPolicy.updateRetryContext(windowRetryContext); + Assert.assertEquals("remaining retries", 1, + windowRetryContext.getRemainingRetries()); clock.setTime(20); Assert.assertTrue("retry 2", retryPolicy.shouldRetry(windowRetryContext, 12)); retryPolicy.updateRetryContext(windowRetryContext); + Assert.assertEquals("remaining retries", 1, + windowRetryContext.getRemainingRetries()); clock.setTime(40); Assert.assertTrue("retry 3", retryPolicy.shouldRetry(windowRetryContext, 12)); retryPolicy.updateRetryContext(windowRetryContext); + Assert.assertEquals("remaining retries", 1, + windowRetryContext.getRemainingRetries()); clock.setTime(45); Assert.assertFalse("retry failed", retryPolicy.shouldRetry(windowRetryContext, 12)); + retryPolicy.updateRetryContext(windowRetryContext); + Assert.assertEquals("remaining retries", 0, + windowRetryContext.getRemainingRetries()); } }