YARN-93. Fixed RM to propagate diagnostics from applications that have finished but failed Contributed by Jason Lowe.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1384169 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Vinod Kumar Vavilapalli 2012-09-13 00:46:39 +00:00
parent 4f312cfa70
commit 7c491887e5
5 changed files with 72 additions and 15 deletions

View File

@ -28,6 +28,9 @@ Release 2.0.3-alpha - Unreleased
YARN-78. Changed UnManagedAM application to use YarnClient. (Bikas Saha via
vinodkv)
YARN-93. Fixed RM to propagate diagnostics from applications that have
finished but failed (Jason Lowe via vinodkv).
OPTIMIZATIONS
BUG FIXES

View File

@ -0,0 +1,35 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.resourcemanager.rmapp;
import org.apache.hadoop.yarn.api.records.ApplicationId;
public class RMAppFinishedAttemptEvent extends RMAppEvent {
private final String diagnostics;
public RMAppFinishedAttemptEvent(ApplicationId appId, String diagnostics) {
super(appId, RMAppEventType.ATTEMPT_FINISHED);
this.diagnostics = diagnostics;
}
public String getDiagnostics() {
return this.diagnostics;
}
}

View File

@ -101,6 +101,8 @@ public class RMAppImpl implements RMApp {
@SuppressWarnings("rawtypes")
private EventHandler handler;
private static final FinalTransition FINAL_TRANSITION = new FinalTransition();
private static final AppFinishedTransition FINISHED_TRANSITION =
new AppFinishedTransition();
private static final StateMachineFactory<RMAppImpl,
RMAppState,
@ -150,7 +152,7 @@ RMAppEventType.NODE_UPDATE, new RMAppNodeUpdateTransition())
.addTransition(RMAppState.RUNNING, RMAppState.FINISHING,
RMAppEventType.ATTEMPT_FINISHING, new RMAppFinishingTransition())
.addTransition(RMAppState.RUNNING, RMAppState.FINISHED,
RMAppEventType.ATTEMPT_FINISHED, FINAL_TRANSITION)
RMAppEventType.ATTEMPT_FINISHED, FINISHED_TRANSITION)
.addTransition(RMAppState.RUNNING,
EnumSet.of(RMAppState.SUBMITTED, RMAppState.FAILED),
RMAppEventType.ATTEMPT_FAILED,
@ -160,7 +162,7 @@ RMAppEventType.KILL, new KillAppAndAttemptTransition())
// Transitions from FINISHING state
.addTransition(RMAppState.FINISHING, RMAppState.FINISHED,
RMAppEventType.ATTEMPT_FINISHED, FINAL_TRANSITION)
RMAppEventType.ATTEMPT_FINISHED, FINISHED_TRANSITION)
.addTransition(RMAppState.FINISHING, RMAppState.FINISHED,
RMAppEventType.KILL, new KillAppAndAttemptTransition())
// ignorable transitions
@ -572,6 +574,15 @@ public void transition(RMAppImpl app, RMAppEvent event) {
}
}
private static class AppFinishedTransition extends FinalTransition {
public void transition(RMAppImpl app, RMAppEvent event) {
RMAppFinishedAttemptEvent finishedEvent =
(RMAppFinishedAttemptEvent)event;
app.diagnostics.append(finishedEvent.getDiagnostics());
super.transition(app, event);
};
}
private static class AppKilledTransition extends FinalTransition {
@Override
public void transition(RMAppImpl app, RMAppEvent event) {

View File

@ -56,6 +56,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppFailedAttemptEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppFinishedAttemptEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppRejectedEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerAcquiredEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerFinishedEvent;
@ -688,8 +689,8 @@ public void transition(RMAppAttemptImpl appAttempt,
switch (finalAttemptState) {
case FINISHED:
{
appEvent =
new RMAppEvent(applicationId, RMAppEventType.ATTEMPT_FINISHED);
appEvent = new RMAppFinishedAttemptEvent(applicationId,
appAttempt.getDiagnostics());
}
break;
case KILLED:

View File

@ -321,7 +321,8 @@ protected RMApp testCreateAppFinishing(
}
protected RMApp testCreateAppFinished(
ApplicationSubmissionContext submissionContext) throws IOException {
ApplicationSubmissionContext submissionContext,
String diagnostics) throws IOException {
// unmanaged AMs don't use the FINISHING state
RMApp application = null;
if (submissionContext != null && submissionContext.getUnmanagedAM()) {
@ -330,14 +331,15 @@ protected RMApp testCreateAppFinished(
application = testCreateAppFinishing(submissionContext);
}
// RUNNING/FINISHING => FINISHED event RMAppEventType.ATTEMPT_FINISHED
RMAppEvent finishedEvent =
new RMAppEvent(application.getApplicationId(),
RMAppEventType.ATTEMPT_FINISHED);
RMAppEvent finishedEvent = new RMAppFinishedAttemptEvent(
application.getApplicationId(), diagnostics);
application.handle(finishedEvent);
assertAppState(RMAppState.FINISHED, application);
assertTimesAtFinish(application);
// finished without a proper unregister implies failed
assertFinalAppStatus(FinalApplicationStatus.FAILED, application);
Assert.assertTrue("Finished app missing diagnostics",
application.getDiagnostics().indexOf(diagnostics) != -1);
return application;
}
@ -348,11 +350,14 @@ public void testUnmanagedApp() throws IOException {
// test success path
LOG.info("--- START: testUnmanagedAppSuccessPath ---");
testCreateAppFinished(subContext);
final String diagMsg = "some diagnostics";
RMApp application = testCreateAppFinished(subContext, diagMsg);
Assert.assertTrue("Finished app missing diagnostics",
application.getDiagnostics().indexOf(diagMsg) != -1);
// test app fails after 1 app attempt failure
LOG.info("--- START: testUnmanagedAppFailPath ---");
RMApp application = testCreateAppRunning(subContext);
application = testCreateAppRunning(subContext);
RMAppEvent event = new RMAppFailedAttemptEvent(
application.getApplicationId(), RMAppEventType.ATTEMPT_FAILED, "");
application.handle(event);
@ -366,7 +371,10 @@ public void testUnmanagedApp() throws IOException {
@Test
public void testAppSuccessPath() throws IOException {
LOG.info("--- START: testAppSuccessPath ---");
testCreateAppFinished(null);
final String diagMsg = "some diagnostics";
RMApp application = testCreateAppFinished(null, diagMsg);
Assert.assertTrue("Finished application missing diagnostics",
application.getDiagnostics().indexOf(diagMsg) != -1);
}
@Test
@ -551,7 +559,7 @@ public void testAppFinishingKill() throws IOException {
public void testAppFinishedFinished() throws IOException {
LOG.info("--- START: testAppFinishedFinished ---");
RMApp application = testCreateAppFinished(null);
RMApp application = testCreateAppFinished(null, "");
// FINISHED => FINISHED event RMAppEventType.KILL
RMAppEvent event =
new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
@ -579,9 +587,8 @@ public void testAppKilledKilled() throws IOException {
assertAppState(RMAppState.KILLED, application);
// KILLED => KILLED event RMAppEventType.ATTEMPT_FINISHED
event =
new RMAppEvent(application.getApplicationId(),
RMAppEventType.ATTEMPT_FINISHED);
event = new RMAppFinishedAttemptEvent(
application.getApplicationId(), "");
application.handle(event);
rmDispatcher.await();
assertTimesAtFinish(application);