YARN-227. Application expiration difficult to debug for end-users (Jason Lowe via jeagles)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1453080 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
56c7e7a12e
commit
21066255da
@ -341,6 +341,9 @@ Release 0.23.7 - UNRELEASED
|
||||
YARN-269. Resource Manager not logging the health_check_script result when
|
||||
taking it out (Jason Lowe via kihwal)
|
||||
|
||||
YARN-227. Application expiration difficult to debug for end-users
|
||||
(Jason Lowe via jeagles)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
YARN-357. App submission should not be synchronized (daryn)
|
||||
|
@ -147,6 +147,9 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
||||
|
||||
private Configuration conf;
|
||||
|
||||
private static final ExpiredTransition EXPIRED_TRANSITION =
|
||||
new ExpiredTransition();
|
||||
|
||||
private static final StateMachineFactory<RMAppAttemptImpl,
|
||||
RMAppAttemptState,
|
||||
RMAppAttemptEventType,
|
||||
@ -243,7 +246,7 @@ RMAppAttemptEventType.REGISTERED, new AMRegisteredTransition())
|
||||
.addTransition(
|
||||
RMAppAttemptState.LAUNCHED, RMAppAttemptState.FAILED,
|
||||
RMAppAttemptEventType.EXPIRE,
|
||||
new FinalTransition(RMAppAttemptState.FAILED))
|
||||
EXPIRED_TRANSITION)
|
||||
.addTransition(RMAppAttemptState.LAUNCHED, RMAppAttemptState.KILLED,
|
||||
RMAppAttemptEventType.KILL,
|
||||
new FinalTransition(RMAppAttemptState.KILLED))
|
||||
@ -268,7 +271,7 @@ RMAppAttemptEventType.STATUS_UPDATE, new StatusUpdateTransition())
|
||||
.addTransition(
|
||||
RMAppAttemptState.RUNNING, RMAppAttemptState.FAILED,
|
||||
RMAppAttemptEventType.EXPIRE,
|
||||
new FinalTransition(RMAppAttemptState.FAILED))
|
||||
EXPIRED_TRANSITION)
|
||||
.addTransition(
|
||||
RMAppAttemptState.RUNNING, RMAppAttemptState.KILLED,
|
||||
RMAppAttemptEventType.KILL,
|
||||
@ -491,6 +494,13 @@ private String generateProxyUriWithoutScheme(
|
||||
}
|
||||
}
|
||||
|
||||
private void setTrackingUrlToRMAppPage() {
|
||||
origTrackingUrl = pjoin(
|
||||
YarnConfiguration.getRMWebAppHostAndPort(conf),
|
||||
"cluster", "app", getAppAttemptId().getApplicationId());
|
||||
proxiedTrackingUrl = origTrackingUrl;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ClientToken getClientToken() {
|
||||
return this.clientToken;
|
||||
@ -992,7 +1002,23 @@ public void transition(RMAppAttemptImpl appAttempt,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static class ExpiredTransition extends FinalTransition {
|
||||
|
||||
public ExpiredTransition() {
|
||||
super(RMAppAttemptState.FAILED);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void transition(RMAppAttemptImpl appAttempt,
|
||||
RMAppAttemptEvent event) {
|
||||
appAttempt.diagnostics.append("ApplicationMaster for attempt " +
|
||||
appAttempt.getAppAttemptId() + " timed out");
|
||||
appAttempt.setTrackingUrlToRMAppPage();
|
||||
super.transition(appAttempt, event);
|
||||
}
|
||||
}
|
||||
|
||||
private static class UnexpectedAMRegisteredTransition extends
|
||||
BaseFinalTransition {
|
||||
|
||||
@ -1110,10 +1136,7 @@ public RMAppAttemptState transition(RMAppAttemptImpl appAttempt,
|
||||
// When the AM dies, the trackingUrl is left pointing to the AM's URL,
|
||||
// which shows up in the scheduler UI as a broken link. Direct the
|
||||
// user to the app page on the RM so they can see the status and logs.
|
||||
appAttempt.origTrackingUrl = pjoin(
|
||||
YarnConfiguration.getRMWebAppHostAndPort(appAttempt.conf),
|
||||
"cluster", "app", appAttempt.getAppAttemptId().getApplicationId());
|
||||
appAttempt.proxiedTrackingUrl = appAttempt.origTrackingUrl;
|
||||
appAttempt.setTrackingUrlToRMAppPage();
|
||||
|
||||
new FinalTransition(RMAppAttemptState.FAILED).transition(
|
||||
appAttempt, containerFinishedEvent);
|
||||
|
@ -22,6 +22,7 @@
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.mockito.Matchers.any;
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.times;
|
||||
@ -665,6 +666,39 @@ public void testRunningToFailed() {
|
||||
assertEquals(rmAppPageUrl, applicationAttempt.getTrackingUrl());
|
||||
}
|
||||
|
||||
@Test(timeout=10000)
|
||||
public void testLaunchedExpire() {
|
||||
Container amContainer = allocateApplicationAttempt();
|
||||
launchApplicationAttempt(amContainer);
|
||||
applicationAttempt.handle(new RMAppAttemptEvent(
|
||||
applicationAttempt.getAppAttemptId(), RMAppAttemptEventType.EXPIRE));
|
||||
assertEquals(RMAppAttemptState.FAILED,
|
||||
applicationAttempt.getAppAttemptState());
|
||||
assertTrue("expire diagnostics missing",
|
||||
applicationAttempt.getDiagnostics().contains("timed out"));
|
||||
String rmAppPageUrl = pjoin(RM_WEBAPP_ADDR, "cluster", "app",
|
||||
applicationAttempt.getAppAttemptId().getApplicationId());
|
||||
assertEquals(rmAppPageUrl, applicationAttempt.getOriginalTrackingUrl());
|
||||
assertEquals(rmAppPageUrl, applicationAttempt.getTrackingUrl());
|
||||
}
|
||||
|
||||
@Test(timeout=20000)
|
||||
public void testRunningExpire() {
|
||||
Container amContainer = allocateApplicationAttempt();
|
||||
launchApplicationAttempt(amContainer);
|
||||
runApplicationAttempt(amContainer, "host", 8042, "oldtrackingurl");
|
||||
applicationAttempt.handle(new RMAppAttemptEvent(
|
||||
applicationAttempt.getAppAttemptId(), RMAppAttemptEventType.EXPIRE));
|
||||
assertEquals(RMAppAttemptState.FAILED,
|
||||
applicationAttempt.getAppAttemptState());
|
||||
assertTrue("expire diagnostics missing",
|
||||
applicationAttempt.getDiagnostics().contains("timed out"));
|
||||
String rmAppPageUrl = pjoin(RM_WEBAPP_ADDR, "cluster", "app",
|
||||
applicationAttempt.getAppAttemptId().getApplicationId());
|
||||
assertEquals(rmAppPageUrl, applicationAttempt.getOriginalTrackingUrl());
|
||||
assertEquals(rmAppPageUrl, applicationAttempt.getTrackingUrl());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnregisterToKilledFinishing() {
|
||||
Container amContainer = allocateApplicationAttempt();
|
||||
|
Loading…
Reference in New Issue
Block a user