diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicies.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicies.java index f978ae7537..171d52af7c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicies.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicies.java @@ -139,7 +139,17 @@ public static final RetryPolicy retryByRemoteException( Map, RetryPolicy> exceptionToPolicyMap) { return new RemoteExceptionDependentRetry(defaultPolicy, exceptionToPolicyMap); } - + + /** + * A retry policy for exceptions other than RemoteException. + */ + public static final RetryPolicy retryOtherThanRemoteException( + RetryPolicy defaultPolicy, + Map, RetryPolicy> exceptionToPolicyMap) { + return new OtherThanRemoteExceptionDependentRetry(defaultPolicy, + exceptionToPolicyMap); + } + public static final RetryPolicy failoverOnNetworkException(int maxFailovers) { return failoverOnNetworkException(TRY_ONCE_THEN_FAIL, maxFailovers); } @@ -489,7 +499,37 @@ public RetryAction shouldRetry(Exception e, int retries, int failovers, return policy.shouldRetry(e, retries, failovers, isIdempotentOrAtMostOnce); } } - + + static class OtherThanRemoteExceptionDependentRetry implements RetryPolicy { + + private RetryPolicy defaultPolicy; + private Map, RetryPolicy> exceptionToPolicyMap; + + public OtherThanRemoteExceptionDependentRetry(RetryPolicy defaultPolicy, + Map, + RetryPolicy> exceptionToPolicyMap) { + this.defaultPolicy = defaultPolicy; + this.exceptionToPolicyMap = exceptionToPolicyMap; + } + + @Override + public RetryAction shouldRetry(Exception e, int retries, int failovers, + boolean isIdempotentOrAtMostOnce) throws Exception { + RetryPolicy policy = null; + // ignore Remote Exception + if (e instanceof RemoteException) { + // do nothing + } else { + policy = exceptionToPolicyMap.get(e.getClass()); + } + if (policy == null) { + policy = defaultPolicy; + } + return policy.shouldRetry( + e, retries, failovers, isIdempotentOrAtMostOnce); + } + } + static class ExponentialBackoffRetry extends RetryLimited { public ExponentialBackoffRetry( diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/TestRetryProxy.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/TestRetryProxy.java index 81f3a9b643..35a45b4f73 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/TestRetryProxy.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/TestRetryProxy.java @@ -22,6 +22,7 @@ import static org.apache.hadoop.io.retry.RetryPolicies.TRY_ONCE_THEN_FAIL; import static org.apache.hadoop.io.retry.RetryPolicies.retryByException; import static org.apache.hadoop.io.retry.RetryPolicies.retryByRemoteException; +import static org.apache.hadoop.io.retry.RetryPolicies.retryOtherThanRemoteException; import static org.apache.hadoop.io.retry.RetryPolicies.retryUpToMaximumCountWithFixedSleep; import static org.apache.hadoop.io.retry.RetryPolicies.retryUpToMaximumCountWithProportionalSleep; import static org.apache.hadoop.io.retry.RetryPolicies.retryUpToMaximumTimeWithFixedSleep; @@ -29,6 +30,7 @@ import static org.apache.hadoop.io.retry.RetryPolicies.exponentialBackoffRetry; import static org.junit.Assert.*; +import java.io.IOException; import java.util.Collections; import java.util.Map; import java.util.concurrent.Callable; @@ -213,8 +215,29 @@ public void testRetryByRemoteException() { } catch (RemoteException e) { // expected } - } - + } + + @Test + public void testRetryOtherThanRemoteException() throws Throwable { + Map, RetryPolicy> exceptionToPolicyMap = + Collections., RetryPolicy>singletonMap( + IOException.class, RETRY_FOREVER); + + UnreliableInterface unreliable = (UnreliableInterface) + RetryProxy.create(UnreliableInterface.class, unreliableImpl, + retryOtherThanRemoteException(TRY_ONCE_THEN_FAIL, + exceptionToPolicyMap)); + // should retry with local IOException. + unreliable.failsOnceWithIOException(); + try { + // won't get retry on remote exception + unreliable.failsOnceWithRemoteException(); + fail("Should fail"); + } catch (RemoteException e) { + // expected + } + } + @Test public void testRetryInterruptible() throws Throwable { final UnreliableInterface unreliable = (UnreliableInterface) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/UnreliableImplementation.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/UnreliableImplementation.java index ce9c16ea60..9387772464 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/UnreliableImplementation.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/UnreliableImplementation.java @@ -26,6 +26,8 @@ class UnreliableImplementation implements UnreliableInterface { private int failsOnceInvocationCount, failsOnceWithValueInvocationCount, + failsOnceIOExceptionInvocationCount, + failsOnceRemoteExceptionInvocationCount, failsTenTimesInvocationCount, succeedsOnceThenFailsCount, succeedsOnceThenFailsIdempotentCount, @@ -89,6 +91,21 @@ public boolean failsOnceThenSucceedsWithReturnValue() throws UnreliableException return true; } + @Override + public void failsOnceWithIOException() throws IOException { + if (failsOnceIOExceptionInvocationCount++ == 0) { + throw new IOException("test exception for failsOnceWithIOException"); + } + } + + @Override + public void failsOnceWithRemoteException() throws RemoteException { + if (failsOnceRemoteExceptionInvocationCount++ == 0) { + throw new RemoteException(IOException.class.getName(), + "test exception for failsOnceWithRemoteException"); + } + } + @Override public void failsTenTimesThenSucceeds() throws UnreliableException { if (failsTenTimesInvocationCount++ < 10) { diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/UnreliableInterface.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/UnreliableInterface.java index 3fbe11a1c2..6c9c15313f 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/UnreliableInterface.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/UnreliableInterface.java @@ -54,6 +54,9 @@ public static class FatalException extends UnreliableException { void alwaysFailsWithFatalException() throws FatalException; void alwaysFailsWithRemoteFatalException() throws RemoteException; + void failsOnceWithIOException() throws IOException; + void failsOnceWithRemoteException() throws RemoteException; + void failsOnceThenSucceeds() throws UnreliableException; boolean failsOnceThenSucceedsWithReturnValue() throws UnreliableException; diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 57cdb15dc1..8dc1fd0e4b 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -1034,6 +1034,9 @@ Release 2.8.0 - UNRELEASED YARN-4130. Duplicate declaration of ApplicationId in RMAppManager#submitApplication method. (Kai Sasaki via rohithsharmaks) + YARN-4288. Fixed RMProxy to retry on IOException from local host. + (Junping Du via jianhe) + Release 2.7.2 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/RMProxy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/RMProxy.java index be08f2f614..23e1691a8d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/RMProxy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/RMProxy.java @@ -250,8 +250,10 @@ public static RetryPolicy createRetryPolicy(Configuration conf) { exceptionToPolicyMap.put(ConnectTimeoutException.class, retryPolicy); exceptionToPolicyMap.put(RetriableException.class, retryPolicy); exceptionToPolicyMap.put(SocketException.class, retryPolicy); - - return RetryPolicies.retryByException( + // YARN-4288: local IOException is also possible. + exceptionToPolicyMap.put(IOException.class, retryPolicy); + // Not retry on remote IO exception. + return RetryPolicies.retryOtherThanRemoteException( RetryPolicies.TRY_ONCE_THEN_FAIL, exceptionToPolicyMap); } }