YARN-93. Fixed RM to propagate diagnostics from applications that have finished but failed Contributed by Jason Lowe.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1384169 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4f312cfa70
commit
7c491887e5
@ -28,6 +28,9 @@ Release 2.0.3-alpha - Unreleased
|
||||
YARN-78. Changed UnManagedAM application to use YarnClient. (Bikas Saha via
|
||||
vinodkv)
|
||||
|
||||
YARN-93. Fixed RM to propagate diagnostics from applications that have
|
||||
finished but failed (Jason Lowe via vinodkv).
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
BUG FIXES
|
||||
|
@ -0,0 +1,35 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.server.resourcemanager.rmapp;
|
||||
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||
|
||||
public class RMAppFinishedAttemptEvent extends RMAppEvent {
|
||||
|
||||
private final String diagnostics;
|
||||
|
||||
public RMAppFinishedAttemptEvent(ApplicationId appId, String diagnostics) {
|
||||
super(appId, RMAppEventType.ATTEMPT_FINISHED);
|
||||
this.diagnostics = diagnostics;
|
||||
}
|
||||
|
||||
public String getDiagnostics() {
|
||||
return this.diagnostics;
|
||||
}
|
||||
}
|
@ -101,6 +101,8 @@ public class RMAppImpl implements RMApp {
|
||||
@SuppressWarnings("rawtypes")
|
||||
private EventHandler handler;
|
||||
private static final FinalTransition FINAL_TRANSITION = new FinalTransition();
|
||||
private static final AppFinishedTransition FINISHED_TRANSITION =
|
||||
new AppFinishedTransition();
|
||||
|
||||
private static final StateMachineFactory<RMAppImpl,
|
||||
RMAppState,
|
||||
@ -150,7 +152,7 @@ RMAppEventType.NODE_UPDATE, new RMAppNodeUpdateTransition())
|
||||
.addTransition(RMAppState.RUNNING, RMAppState.FINISHING,
|
||||
RMAppEventType.ATTEMPT_FINISHING, new RMAppFinishingTransition())
|
||||
.addTransition(RMAppState.RUNNING, RMAppState.FINISHED,
|
||||
RMAppEventType.ATTEMPT_FINISHED, FINAL_TRANSITION)
|
||||
RMAppEventType.ATTEMPT_FINISHED, FINISHED_TRANSITION)
|
||||
.addTransition(RMAppState.RUNNING,
|
||||
EnumSet.of(RMAppState.SUBMITTED, RMAppState.FAILED),
|
||||
RMAppEventType.ATTEMPT_FAILED,
|
||||
@ -160,7 +162,7 @@ RMAppEventType.KILL, new KillAppAndAttemptTransition())
|
||||
|
||||
// Transitions from FINISHING state
|
||||
.addTransition(RMAppState.FINISHING, RMAppState.FINISHED,
|
||||
RMAppEventType.ATTEMPT_FINISHED, FINAL_TRANSITION)
|
||||
RMAppEventType.ATTEMPT_FINISHED, FINISHED_TRANSITION)
|
||||
.addTransition(RMAppState.FINISHING, RMAppState.FINISHED,
|
||||
RMAppEventType.KILL, new KillAppAndAttemptTransition())
|
||||
// ignorable transitions
|
||||
@ -572,6 +574,15 @@ public void transition(RMAppImpl app, RMAppEvent event) {
|
||||
}
|
||||
}
|
||||
|
||||
private static class AppFinishedTransition extends FinalTransition {
|
||||
public void transition(RMAppImpl app, RMAppEvent event) {
|
||||
RMAppFinishedAttemptEvent finishedEvent =
|
||||
(RMAppFinishedAttemptEvent)event;
|
||||
app.diagnostics.append(finishedEvent.getDiagnostics());
|
||||
super.transition(app, event);
|
||||
};
|
||||
}
|
||||
|
||||
private static class AppKilledTransition extends FinalTransition {
|
||||
@Override
|
||||
public void transition(RMAppImpl app, RMAppEvent event) {
|
||||
|
@ -56,6 +56,7 @@
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppFailedAttemptEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppFinishedAttemptEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppRejectedEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerAcquiredEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerFinishedEvent;
|
||||
@ -688,8 +689,8 @@ public void transition(RMAppAttemptImpl appAttempt,
|
||||
switch (finalAttemptState) {
|
||||
case FINISHED:
|
||||
{
|
||||
appEvent =
|
||||
new RMAppEvent(applicationId, RMAppEventType.ATTEMPT_FINISHED);
|
||||
appEvent = new RMAppFinishedAttemptEvent(applicationId,
|
||||
appAttempt.getDiagnostics());
|
||||
}
|
||||
break;
|
||||
case KILLED:
|
||||
|
@ -321,7 +321,8 @@ protected RMApp testCreateAppFinishing(
|
||||
}
|
||||
|
||||
protected RMApp testCreateAppFinished(
|
||||
ApplicationSubmissionContext submissionContext) throws IOException {
|
||||
ApplicationSubmissionContext submissionContext,
|
||||
String diagnostics) throws IOException {
|
||||
// unmanaged AMs don't use the FINISHING state
|
||||
RMApp application = null;
|
||||
if (submissionContext != null && submissionContext.getUnmanagedAM()) {
|
||||
@ -330,14 +331,15 @@ protected RMApp testCreateAppFinished(
|
||||
application = testCreateAppFinishing(submissionContext);
|
||||
}
|
||||
// RUNNING/FINISHING => FINISHED event RMAppEventType.ATTEMPT_FINISHED
|
||||
RMAppEvent finishedEvent =
|
||||
new RMAppEvent(application.getApplicationId(),
|
||||
RMAppEventType.ATTEMPT_FINISHED);
|
||||
RMAppEvent finishedEvent = new RMAppFinishedAttemptEvent(
|
||||
application.getApplicationId(), diagnostics);
|
||||
application.handle(finishedEvent);
|
||||
assertAppState(RMAppState.FINISHED, application);
|
||||
assertTimesAtFinish(application);
|
||||
// finished without a proper unregister implies failed
|
||||
assertFinalAppStatus(FinalApplicationStatus.FAILED, application);
|
||||
Assert.assertTrue("Finished app missing diagnostics",
|
||||
application.getDiagnostics().indexOf(diagnostics) != -1);
|
||||
return application;
|
||||
}
|
||||
|
||||
@ -348,11 +350,14 @@ public void testUnmanagedApp() throws IOException {
|
||||
|
||||
// test success path
|
||||
LOG.info("--- START: testUnmanagedAppSuccessPath ---");
|
||||
testCreateAppFinished(subContext);
|
||||
final String diagMsg = "some diagnostics";
|
||||
RMApp application = testCreateAppFinished(subContext, diagMsg);
|
||||
Assert.assertTrue("Finished app missing diagnostics",
|
||||
application.getDiagnostics().indexOf(diagMsg) != -1);
|
||||
|
||||
// test app fails after 1 app attempt failure
|
||||
LOG.info("--- START: testUnmanagedAppFailPath ---");
|
||||
RMApp application = testCreateAppRunning(subContext);
|
||||
application = testCreateAppRunning(subContext);
|
||||
RMAppEvent event = new RMAppFailedAttemptEvent(
|
||||
application.getApplicationId(), RMAppEventType.ATTEMPT_FAILED, "");
|
||||
application.handle(event);
|
||||
@ -366,7 +371,10 @@ public void testUnmanagedApp() throws IOException {
|
||||
@Test
|
||||
public void testAppSuccessPath() throws IOException {
|
||||
LOG.info("--- START: testAppSuccessPath ---");
|
||||
testCreateAppFinished(null);
|
||||
final String diagMsg = "some diagnostics";
|
||||
RMApp application = testCreateAppFinished(null, diagMsg);
|
||||
Assert.assertTrue("Finished application missing diagnostics",
|
||||
application.getDiagnostics().indexOf(diagMsg) != -1);
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -551,7 +559,7 @@ public void testAppFinishingKill() throws IOException {
|
||||
public void testAppFinishedFinished() throws IOException {
|
||||
LOG.info("--- START: testAppFinishedFinished ---");
|
||||
|
||||
RMApp application = testCreateAppFinished(null);
|
||||
RMApp application = testCreateAppFinished(null, "");
|
||||
// FINISHED => FINISHED event RMAppEventType.KILL
|
||||
RMAppEvent event =
|
||||
new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
|
||||
@ -579,9 +587,8 @@ public void testAppKilledKilled() throws IOException {
|
||||
assertAppState(RMAppState.KILLED, application);
|
||||
|
||||
// KILLED => KILLED event RMAppEventType.ATTEMPT_FINISHED
|
||||
event =
|
||||
new RMAppEvent(application.getApplicationId(),
|
||||
RMAppEventType.ATTEMPT_FINISHED);
|
||||
event = new RMAppFinishedAttemptEvent(
|
||||
application.getApplicationId(), "");
|
||||
application.handle(event);
|
||||
rmDispatcher.await();
|
||||
assertTimesAtFinish(application);
|
||||
|
Loading…
Reference in New Issue
Block a user