YARN-3842. NMProxy should retry on NMNotYetReadyException. (Robert Kanter via kasha)

This commit is contained in:
Karthik Kambatla 2015-06-22 17:45:41 -07:00
parent fac4e04dd3
commit 5ebf2817e5
3 changed files with 24 additions and 1 deletions

View File

@ -693,6 +693,9 @@ Release 2.7.1 - UNRELEASED
YARN-3804. Both RM are on standBy state when kerberos user not in yarn.admin.acl YARN-3804. Both RM are on standBy state when kerberos user not in yarn.admin.acl
(Varun Saxena via xgong) (Varun Saxena via xgong)
YARN-3842. NMProxy should retry on NMNotYetReadyException.
(Robert Kanter via kasha)
Release 2.7.0 - 2015-04-20 Release 2.7.0 - 2015-04-20
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -38,6 +38,7 @@
import org.apache.hadoop.ipc.RetriableException; import org.apache.hadoop.ipc.RetriableException;
import org.apache.hadoop.net.ConnectTimeoutException; import org.apache.hadoop.net.ConnectTimeoutException;
import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.yarn.exceptions.NMNotYetReadyException;
import org.apache.hadoop.yarn.ipc.YarnRPC; import org.apache.hadoop.yarn.ipc.YarnRPC;
import com.google.common.base.Preconditions; import com.google.common.base.Preconditions;
@ -74,6 +75,7 @@ protected static RetryPolicy createRetryPolicy(Configuration conf,
exceptionToPolicyMap.put(UnknownHostException.class, retryPolicy); exceptionToPolicyMap.put(UnknownHostException.class, retryPolicy);
exceptionToPolicyMap.put(RetriableException.class, retryPolicy); exceptionToPolicyMap.put(RetriableException.class, retryPolicy);
exceptionToPolicyMap.put(SocketException.class, retryPolicy); exceptionToPolicyMap.put(SocketException.class, retryPolicy);
exceptionToPolicyMap.put(NMNotYetReadyException.class, retryPolicy);
return RetryPolicies.retryByException(RetryPolicies.TRY_ONCE_THEN_FAIL, return RetryPolicies.retryByException(RetryPolicies.TRY_ONCE_THEN_FAIL,
exceptionToPolicyMap); exceptionToPolicyMap);

View File

@ -54,6 +54,7 @@ public TestNMProxy() throws UnsupportedFileSystemException {
} }
int retryCount = 0; int retryCount = 0;
boolean shouldThrowNMNotYetReadyException = false;
@Before @Before
public void setUp() throws Exception { public void setUp() throws Exception {
@ -72,7 +73,15 @@ public StartContainersResponse startContainers(
StartContainersRequest requests) throws YarnException, IOException { StartContainersRequest requests) throws YarnException, IOException {
if (retryCount < 5) { if (retryCount < 5) {
retryCount++; retryCount++;
throw new java.net.ConnectException("start container exception"); if (shouldThrowNMNotYetReadyException) {
// This causes super to throw an NMNotYetReadyException
containerManager.setBlockNewContainerRequests(true);
} else {
throw new java.net.ConnectException("start container exception");
}
} else {
// This stops super from throwing an NMNotYetReadyException
containerManager.setBlockNewContainerRequests(false);
} }
return super.startContainers(requests); return super.startContainers(requests);
} }
@ -126,16 +135,25 @@ public void testNMProxyRetry() throws Exception {
NMProxy.createNMProxy(conf, ContainerManagementProtocol.class, ugi, NMProxy.createNMProxy(conf, ContainerManagementProtocol.class, ugi,
YarnRPC.create(conf), address); YarnRPC.create(conf), address);
retryCount = 0;
shouldThrowNMNotYetReadyException = false;
proxy.startContainers(allRequests); proxy.startContainers(allRequests);
Assert.assertEquals(5, retryCount); Assert.assertEquals(5, retryCount);
retryCount = 0; retryCount = 0;
shouldThrowNMNotYetReadyException = false;
proxy.stopContainers(Records.newRecord(StopContainersRequest.class)); proxy.stopContainers(Records.newRecord(StopContainersRequest.class));
Assert.assertEquals(5, retryCount); Assert.assertEquals(5, retryCount);
retryCount = 0; retryCount = 0;
shouldThrowNMNotYetReadyException = false;
proxy.getContainerStatuses(Records proxy.getContainerStatuses(Records
.newRecord(GetContainerStatusesRequest.class)); .newRecord(GetContainerStatusesRequest.class));
Assert.assertEquals(5, retryCount); Assert.assertEquals(5, retryCount);
retryCount = 0;
shouldThrowNMNotYetReadyException = true;
proxy.startContainers(allRequests);
Assert.assertEquals(5, retryCount);
} }
} }