YARN-227. Application expiration difficult to debug for end-users (Jason Lowe via jeagles)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1453080 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
56c7e7a12e
commit
21066255da
@ -341,6 +341,9 @@ Release 0.23.7 - UNRELEASED
|
|||||||
YARN-269. Resource Manager not logging the health_check_script result when
|
YARN-269. Resource Manager not logging the health_check_script result when
|
||||||
taking it out (Jason Lowe via kihwal)
|
taking it out (Jason Lowe via kihwal)
|
||||||
|
|
||||||
|
YARN-227. Application expiration difficult to debug for end-users
|
||||||
|
(Jason Lowe via jeagles)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
YARN-357. App submission should not be synchronized (daryn)
|
YARN-357. App submission should not be synchronized (daryn)
|
||||||
|
@ -147,6 +147,9 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
|||||||
|
|
||||||
private Configuration conf;
|
private Configuration conf;
|
||||||
|
|
||||||
|
private static final ExpiredTransition EXPIRED_TRANSITION =
|
||||||
|
new ExpiredTransition();
|
||||||
|
|
||||||
private static final StateMachineFactory<RMAppAttemptImpl,
|
private static final StateMachineFactory<RMAppAttemptImpl,
|
||||||
RMAppAttemptState,
|
RMAppAttemptState,
|
||||||
RMAppAttemptEventType,
|
RMAppAttemptEventType,
|
||||||
@ -243,7 +246,7 @@ RMAppAttemptEventType.REGISTERED, new AMRegisteredTransition())
|
|||||||
.addTransition(
|
.addTransition(
|
||||||
RMAppAttemptState.LAUNCHED, RMAppAttemptState.FAILED,
|
RMAppAttemptState.LAUNCHED, RMAppAttemptState.FAILED,
|
||||||
RMAppAttemptEventType.EXPIRE,
|
RMAppAttemptEventType.EXPIRE,
|
||||||
new FinalTransition(RMAppAttemptState.FAILED))
|
EXPIRED_TRANSITION)
|
||||||
.addTransition(RMAppAttemptState.LAUNCHED, RMAppAttemptState.KILLED,
|
.addTransition(RMAppAttemptState.LAUNCHED, RMAppAttemptState.KILLED,
|
||||||
RMAppAttemptEventType.KILL,
|
RMAppAttemptEventType.KILL,
|
||||||
new FinalTransition(RMAppAttemptState.KILLED))
|
new FinalTransition(RMAppAttemptState.KILLED))
|
||||||
@ -268,7 +271,7 @@ RMAppAttemptEventType.STATUS_UPDATE, new StatusUpdateTransition())
|
|||||||
.addTransition(
|
.addTransition(
|
||||||
RMAppAttemptState.RUNNING, RMAppAttemptState.FAILED,
|
RMAppAttemptState.RUNNING, RMAppAttemptState.FAILED,
|
||||||
RMAppAttemptEventType.EXPIRE,
|
RMAppAttemptEventType.EXPIRE,
|
||||||
new FinalTransition(RMAppAttemptState.FAILED))
|
EXPIRED_TRANSITION)
|
||||||
.addTransition(
|
.addTransition(
|
||||||
RMAppAttemptState.RUNNING, RMAppAttemptState.KILLED,
|
RMAppAttemptState.RUNNING, RMAppAttemptState.KILLED,
|
||||||
RMAppAttemptEventType.KILL,
|
RMAppAttemptEventType.KILL,
|
||||||
@ -491,6 +494,13 @@ private String generateProxyUriWithoutScheme(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void setTrackingUrlToRMAppPage() {
|
||||||
|
origTrackingUrl = pjoin(
|
||||||
|
YarnConfiguration.getRMWebAppHostAndPort(conf),
|
||||||
|
"cluster", "app", getAppAttemptId().getApplicationId());
|
||||||
|
proxiedTrackingUrl = origTrackingUrl;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ClientToken getClientToken() {
|
public ClientToken getClientToken() {
|
||||||
return this.clientToken;
|
return this.clientToken;
|
||||||
@ -992,7 +1002,23 @@ public void transition(RMAppAttemptImpl appAttempt,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static class ExpiredTransition extends FinalTransition {
|
||||||
|
|
||||||
|
public ExpiredTransition() {
|
||||||
|
super(RMAppAttemptState.FAILED);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void transition(RMAppAttemptImpl appAttempt,
|
||||||
|
RMAppAttemptEvent event) {
|
||||||
|
appAttempt.diagnostics.append("ApplicationMaster for attempt " +
|
||||||
|
appAttempt.getAppAttemptId() + " timed out");
|
||||||
|
appAttempt.setTrackingUrlToRMAppPage();
|
||||||
|
super.transition(appAttempt, event);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private static class UnexpectedAMRegisteredTransition extends
|
private static class UnexpectedAMRegisteredTransition extends
|
||||||
BaseFinalTransition {
|
BaseFinalTransition {
|
||||||
|
|
||||||
@ -1110,10 +1136,7 @@ public RMAppAttemptState transition(RMAppAttemptImpl appAttempt,
|
|||||||
// When the AM dies, the trackingUrl is left pointing to the AM's URL,
|
// When the AM dies, the trackingUrl is left pointing to the AM's URL,
|
||||||
// which shows up in the scheduler UI as a broken link. Direct the
|
// which shows up in the scheduler UI as a broken link. Direct the
|
||||||
// user to the app page on the RM so they can see the status and logs.
|
// user to the app page on the RM so they can see the status and logs.
|
||||||
appAttempt.origTrackingUrl = pjoin(
|
appAttempt.setTrackingUrlToRMAppPage();
|
||||||
YarnConfiguration.getRMWebAppHostAndPort(appAttempt.conf),
|
|
||||||
"cluster", "app", appAttempt.getAppAttemptId().getApplicationId());
|
|
||||||
appAttempt.proxiedTrackingUrl = appAttempt.origTrackingUrl;
|
|
||||||
|
|
||||||
new FinalTransition(RMAppAttemptState.FAILED).transition(
|
new FinalTransition(RMAppAttemptState.FAILED).transition(
|
||||||
appAttempt, containerFinishedEvent);
|
appAttempt, containerFinishedEvent);
|
||||||
|
@ -22,6 +22,7 @@
|
|||||||
import static org.junit.Assert.assertFalse;
|
import static org.junit.Assert.assertFalse;
|
||||||
import static org.junit.Assert.assertNotNull;
|
import static org.junit.Assert.assertNotNull;
|
||||||
import static org.junit.Assert.assertNull;
|
import static org.junit.Assert.assertNull;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
import static org.mockito.Matchers.any;
|
import static org.mockito.Matchers.any;
|
||||||
import static org.mockito.Mockito.mock;
|
import static org.mockito.Mockito.mock;
|
||||||
import static org.mockito.Mockito.times;
|
import static org.mockito.Mockito.times;
|
||||||
@ -665,6 +666,39 @@ public void testRunningToFailed() {
|
|||||||
assertEquals(rmAppPageUrl, applicationAttempt.getTrackingUrl());
|
assertEquals(rmAppPageUrl, applicationAttempt.getTrackingUrl());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(timeout=10000)
|
||||||
|
public void testLaunchedExpire() {
|
||||||
|
Container amContainer = allocateApplicationAttempt();
|
||||||
|
launchApplicationAttempt(amContainer);
|
||||||
|
applicationAttempt.handle(new RMAppAttemptEvent(
|
||||||
|
applicationAttempt.getAppAttemptId(), RMAppAttemptEventType.EXPIRE));
|
||||||
|
assertEquals(RMAppAttemptState.FAILED,
|
||||||
|
applicationAttempt.getAppAttemptState());
|
||||||
|
assertTrue("expire diagnostics missing",
|
||||||
|
applicationAttempt.getDiagnostics().contains("timed out"));
|
||||||
|
String rmAppPageUrl = pjoin(RM_WEBAPP_ADDR, "cluster", "app",
|
||||||
|
applicationAttempt.getAppAttemptId().getApplicationId());
|
||||||
|
assertEquals(rmAppPageUrl, applicationAttempt.getOriginalTrackingUrl());
|
||||||
|
assertEquals(rmAppPageUrl, applicationAttempt.getTrackingUrl());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(timeout=20000)
|
||||||
|
public void testRunningExpire() {
|
||||||
|
Container amContainer = allocateApplicationAttempt();
|
||||||
|
launchApplicationAttempt(amContainer);
|
||||||
|
runApplicationAttempt(amContainer, "host", 8042, "oldtrackingurl");
|
||||||
|
applicationAttempt.handle(new RMAppAttemptEvent(
|
||||||
|
applicationAttempt.getAppAttemptId(), RMAppAttemptEventType.EXPIRE));
|
||||||
|
assertEquals(RMAppAttemptState.FAILED,
|
||||||
|
applicationAttempt.getAppAttemptState());
|
||||||
|
assertTrue("expire diagnostics missing",
|
||||||
|
applicationAttempt.getDiagnostics().contains("timed out"));
|
||||||
|
String rmAppPageUrl = pjoin(RM_WEBAPP_ADDR, "cluster", "app",
|
||||||
|
applicationAttempt.getAppAttemptId().getApplicationId());
|
||||||
|
assertEquals(rmAppPageUrl, applicationAttempt.getOriginalTrackingUrl());
|
||||||
|
assertEquals(rmAppPageUrl, applicationAttempt.getTrackingUrl());
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testUnregisterToKilledFinishing() {
|
public void testUnregisterToKilledFinishing() {
|
||||||
Container amContainer = allocateApplicationAttempt();
|
Container amContainer = allocateApplicationAttempt();
|
||||||
|
Loading…
Reference in New Issue
Block a user