diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index b50f490d19..c3cf9d83db 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -693,6 +693,9 @@ Release 2.7.1 - UNRELEASED YARN-3804. Both RM are on standBy state when kerberos user not in yarn.admin.acl (Varun Saxena via xgong) + YARN-3842. NMProxy should retry on NMNotYetReadyException. + (Robert Kanter via kasha) + Release 2.7.0 - 2015-04-20 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/ServerProxy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/ServerProxy.java index 60245604c2..e9bcf8d5d2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/ServerProxy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/ServerProxy.java @@ -38,6 +38,7 @@ import org.apache.hadoop.ipc.RetriableException; import org.apache.hadoop.net.ConnectTimeoutException; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.yarn.exceptions.NMNotYetReadyException; import org.apache.hadoop.yarn.ipc.YarnRPC; import com.google.common.base.Preconditions; @@ -74,6 +75,7 @@ protected static RetryPolicy createRetryPolicy(Configuration conf, exceptionToPolicyMap.put(UnknownHostException.class, retryPolicy); exceptionToPolicyMap.put(RetriableException.class, retryPolicy); exceptionToPolicyMap.put(SocketException.class, retryPolicy); + exceptionToPolicyMap.put(NMNotYetReadyException.class, retryPolicy); return RetryPolicies.retryByException(RetryPolicies.TRY_ONCE_THEN_FAIL, exceptionToPolicyMap); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestNMProxy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestNMProxy.java index 67f540c930..0b372be7b0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestNMProxy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestNMProxy.java @@ -54,6 +54,7 @@ public TestNMProxy() throws UnsupportedFileSystemException { } int retryCount = 0; + boolean shouldThrowNMNotYetReadyException = false; @Before public void setUp() throws Exception { @@ -72,7 +73,15 @@ public StartContainersResponse startContainers( StartContainersRequest requests) throws YarnException, IOException { if (retryCount < 5) { retryCount++; - throw new java.net.ConnectException("start container exception"); + if (shouldThrowNMNotYetReadyException) { + // This causes super to throw an NMNotYetReadyException + containerManager.setBlockNewContainerRequests(true); + } else { + throw new java.net.ConnectException("start container exception"); + } + } else { + // This stops super from throwing an NMNotYetReadyException + containerManager.setBlockNewContainerRequests(false); } return super.startContainers(requests); } @@ -126,16 +135,25 @@ public void testNMProxyRetry() throws Exception { NMProxy.createNMProxy(conf, ContainerManagementProtocol.class, ugi, YarnRPC.create(conf), address); + retryCount = 0; + shouldThrowNMNotYetReadyException = false; proxy.startContainers(allRequests); Assert.assertEquals(5, retryCount); retryCount = 0; + shouldThrowNMNotYetReadyException = false; proxy.stopContainers(Records.newRecord(StopContainersRequest.class)); Assert.assertEquals(5, retryCount); retryCount = 0; + shouldThrowNMNotYetReadyException = false; proxy.getContainerStatuses(Records .newRecord(GetContainerStatusesRequest.class)); Assert.assertEquals(5, retryCount); + + retryCount = 0; + shouldThrowNMNotYetReadyException = true; + proxy.startContainers(allRequests); + Assert.assertEquals(5, retryCount); } }