YARN-93. Fixed RM to propagate diagnostics from applications that have finished but failed Contributed by Jason Lowe.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1384169 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Vinod Kumar Vavilapalli 2012-09-13 00:46:39 +00:00
parent 4f312cfa70
commit 7c491887e5
5 changed files with 72 additions and 15 deletions

View File

@ -28,6 +28,9 @@ Release 2.0.3-alpha - Unreleased
YARN-78. Changed UnManagedAM application to use YarnClient. (Bikas Saha via YARN-78. Changed UnManagedAM application to use YarnClient. (Bikas Saha via
vinodkv) vinodkv)
YARN-93. Fixed RM to propagate diagnostics from applications that have
finished but failed (Jason Lowe via vinodkv).
OPTIMIZATIONS OPTIMIZATIONS
BUG FIXES BUG FIXES

View File

@ -0,0 +1,35 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.resourcemanager.rmapp;
import org.apache.hadoop.yarn.api.records.ApplicationId;
public class RMAppFinishedAttemptEvent extends RMAppEvent {
private final String diagnostics;
public RMAppFinishedAttemptEvent(ApplicationId appId, String diagnostics) {
super(appId, RMAppEventType.ATTEMPT_FINISHED);
this.diagnostics = diagnostics;
}
public String getDiagnostics() {
return this.diagnostics;
}
}

View File

@ -101,6 +101,8 @@ public class RMAppImpl implements RMApp {
@SuppressWarnings("rawtypes") @SuppressWarnings("rawtypes")
private EventHandler handler; private EventHandler handler;
private static final FinalTransition FINAL_TRANSITION = new FinalTransition(); private static final FinalTransition FINAL_TRANSITION = new FinalTransition();
private static final AppFinishedTransition FINISHED_TRANSITION =
new AppFinishedTransition();
private static final StateMachineFactory<RMAppImpl, private static final StateMachineFactory<RMAppImpl,
RMAppState, RMAppState,
@ -150,7 +152,7 @@ RMAppEventType.NODE_UPDATE, new RMAppNodeUpdateTransition())
.addTransition(RMAppState.RUNNING, RMAppState.FINISHING, .addTransition(RMAppState.RUNNING, RMAppState.FINISHING,
RMAppEventType.ATTEMPT_FINISHING, new RMAppFinishingTransition()) RMAppEventType.ATTEMPT_FINISHING, new RMAppFinishingTransition())
.addTransition(RMAppState.RUNNING, RMAppState.FINISHED, .addTransition(RMAppState.RUNNING, RMAppState.FINISHED,
RMAppEventType.ATTEMPT_FINISHED, FINAL_TRANSITION) RMAppEventType.ATTEMPT_FINISHED, FINISHED_TRANSITION)
.addTransition(RMAppState.RUNNING, .addTransition(RMAppState.RUNNING,
EnumSet.of(RMAppState.SUBMITTED, RMAppState.FAILED), EnumSet.of(RMAppState.SUBMITTED, RMAppState.FAILED),
RMAppEventType.ATTEMPT_FAILED, RMAppEventType.ATTEMPT_FAILED,
@ -160,7 +162,7 @@ RMAppEventType.KILL, new KillAppAndAttemptTransition())
// Transitions from FINISHING state // Transitions from FINISHING state
.addTransition(RMAppState.FINISHING, RMAppState.FINISHED, .addTransition(RMAppState.FINISHING, RMAppState.FINISHED,
RMAppEventType.ATTEMPT_FINISHED, FINAL_TRANSITION) RMAppEventType.ATTEMPT_FINISHED, FINISHED_TRANSITION)
.addTransition(RMAppState.FINISHING, RMAppState.FINISHED, .addTransition(RMAppState.FINISHING, RMAppState.FINISHED,
RMAppEventType.KILL, new KillAppAndAttemptTransition()) RMAppEventType.KILL, new KillAppAndAttemptTransition())
// ignorable transitions // ignorable transitions
@ -572,6 +574,15 @@ public void transition(RMAppImpl app, RMAppEvent event) {
} }
} }
private static class AppFinishedTransition extends FinalTransition {
public void transition(RMAppImpl app, RMAppEvent event) {
RMAppFinishedAttemptEvent finishedEvent =
(RMAppFinishedAttemptEvent)event;
app.diagnostics.append(finishedEvent.getDiagnostics());
super.transition(app, event);
};
}
private static class AppKilledTransition extends FinalTransition { private static class AppKilledTransition extends FinalTransition {
@Override @Override
public void transition(RMAppImpl app, RMAppEvent event) { public void transition(RMAppImpl app, RMAppEvent event) {

View File

@ -56,6 +56,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppFailedAttemptEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppFailedAttemptEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppFinishedAttemptEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppRejectedEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppRejectedEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerAcquiredEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerAcquiredEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerFinishedEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerFinishedEvent;
@ -688,8 +689,8 @@ public void transition(RMAppAttemptImpl appAttempt,
switch (finalAttemptState) { switch (finalAttemptState) {
case FINISHED: case FINISHED:
{ {
appEvent = appEvent = new RMAppFinishedAttemptEvent(applicationId,
new RMAppEvent(applicationId, RMAppEventType.ATTEMPT_FINISHED); appAttempt.getDiagnostics());
} }
break; break;
case KILLED: case KILLED:

View File

@ -321,7 +321,8 @@ protected RMApp testCreateAppFinishing(
} }
protected RMApp testCreateAppFinished( protected RMApp testCreateAppFinished(
ApplicationSubmissionContext submissionContext) throws IOException { ApplicationSubmissionContext submissionContext,
String diagnostics) throws IOException {
// unmanaged AMs don't use the FINISHING state // unmanaged AMs don't use the FINISHING state
RMApp application = null; RMApp application = null;
if (submissionContext != null && submissionContext.getUnmanagedAM()) { if (submissionContext != null && submissionContext.getUnmanagedAM()) {
@ -330,14 +331,15 @@ protected RMApp testCreateAppFinished(
application = testCreateAppFinishing(submissionContext); application = testCreateAppFinishing(submissionContext);
} }
// RUNNING/FINISHING => FINISHED event RMAppEventType.ATTEMPT_FINISHED // RUNNING/FINISHING => FINISHED event RMAppEventType.ATTEMPT_FINISHED
RMAppEvent finishedEvent = RMAppEvent finishedEvent = new RMAppFinishedAttemptEvent(
new RMAppEvent(application.getApplicationId(), application.getApplicationId(), diagnostics);
RMAppEventType.ATTEMPT_FINISHED);
application.handle(finishedEvent); application.handle(finishedEvent);
assertAppState(RMAppState.FINISHED, application); assertAppState(RMAppState.FINISHED, application);
assertTimesAtFinish(application); assertTimesAtFinish(application);
// finished without a proper unregister implies failed // finished without a proper unregister implies failed
assertFinalAppStatus(FinalApplicationStatus.FAILED, application); assertFinalAppStatus(FinalApplicationStatus.FAILED, application);
Assert.assertTrue("Finished app missing diagnostics",
application.getDiagnostics().indexOf(diagnostics) != -1);
return application; return application;
} }
@ -348,11 +350,14 @@ public void testUnmanagedApp() throws IOException {
// test success path // test success path
LOG.info("--- START: testUnmanagedAppSuccessPath ---"); LOG.info("--- START: testUnmanagedAppSuccessPath ---");
testCreateAppFinished(subContext); final String diagMsg = "some diagnostics";
RMApp application = testCreateAppFinished(subContext, diagMsg);
Assert.assertTrue("Finished app missing diagnostics",
application.getDiagnostics().indexOf(diagMsg) != -1);
// test app fails after 1 app attempt failure // test app fails after 1 app attempt failure
LOG.info("--- START: testUnmanagedAppFailPath ---"); LOG.info("--- START: testUnmanagedAppFailPath ---");
RMApp application = testCreateAppRunning(subContext); application = testCreateAppRunning(subContext);
RMAppEvent event = new RMAppFailedAttemptEvent( RMAppEvent event = new RMAppFailedAttemptEvent(
application.getApplicationId(), RMAppEventType.ATTEMPT_FAILED, ""); application.getApplicationId(), RMAppEventType.ATTEMPT_FAILED, "");
application.handle(event); application.handle(event);
@ -366,7 +371,10 @@ public void testUnmanagedApp() throws IOException {
@Test @Test
public void testAppSuccessPath() throws IOException { public void testAppSuccessPath() throws IOException {
LOG.info("--- START: testAppSuccessPath ---"); LOG.info("--- START: testAppSuccessPath ---");
testCreateAppFinished(null); final String diagMsg = "some diagnostics";
RMApp application = testCreateAppFinished(null, diagMsg);
Assert.assertTrue("Finished application missing diagnostics",
application.getDiagnostics().indexOf(diagMsg) != -1);
} }
@Test @Test
@ -551,7 +559,7 @@ public void testAppFinishingKill() throws IOException {
public void testAppFinishedFinished() throws IOException { public void testAppFinishedFinished() throws IOException {
LOG.info("--- START: testAppFinishedFinished ---"); LOG.info("--- START: testAppFinishedFinished ---");
RMApp application = testCreateAppFinished(null); RMApp application = testCreateAppFinished(null, "");
// FINISHED => FINISHED event RMAppEventType.KILL // FINISHED => FINISHED event RMAppEventType.KILL
RMAppEvent event = RMAppEvent event =
new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL); new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
@ -579,9 +587,8 @@ public void testAppKilledKilled() throws IOException {
assertAppState(RMAppState.KILLED, application); assertAppState(RMAppState.KILLED, application);
// KILLED => KILLED event RMAppEventType.ATTEMPT_FINISHED // KILLED => KILLED event RMAppEventType.ATTEMPT_FINISHED
event = event = new RMAppFinishedAttemptEvent(
new RMAppEvent(application.getApplicationId(), application.getApplicationId(), "");
RMAppEventType.ATTEMPT_FINISHED);
application.handle(event); application.handle(event);
rmDispatcher.await(); rmDispatcher.await();
assertTimesAtFinish(application); assertTimesAtFinish(application);