YARN-93. Fixed RM to propagate diagnostics from applications that have finished but failed Contributed by Jason Lowe.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1384169 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4f312cfa70
commit
7c491887e5
@ -28,6 +28,9 @@ Release 2.0.3-alpha - Unreleased
|
|||||||
YARN-78. Changed UnManagedAM application to use YarnClient. (Bikas Saha via
|
YARN-78. Changed UnManagedAM application to use YarnClient. (Bikas Saha via
|
||||||
vinodkv)
|
vinodkv)
|
||||||
|
|
||||||
|
YARN-93. Fixed RM to propagate diagnostics from applications that have
|
||||||
|
finished but failed (Jason Lowe via vinodkv).
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
BUG FIXES
|
BUG FIXES
|
||||||
|
@ -0,0 +1,35 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.server.resourcemanager.rmapp;
|
||||||
|
|
||||||
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
|
|
||||||
|
public class RMAppFinishedAttemptEvent extends RMAppEvent {
|
||||||
|
|
||||||
|
private final String diagnostics;
|
||||||
|
|
||||||
|
public RMAppFinishedAttemptEvent(ApplicationId appId, String diagnostics) {
|
||||||
|
super(appId, RMAppEventType.ATTEMPT_FINISHED);
|
||||||
|
this.diagnostics = diagnostics;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getDiagnostics() {
|
||||||
|
return this.diagnostics;
|
||||||
|
}
|
||||||
|
}
|
@ -101,6 +101,8 @@ public class RMAppImpl implements RMApp {
|
|||||||
@SuppressWarnings("rawtypes")
|
@SuppressWarnings("rawtypes")
|
||||||
private EventHandler handler;
|
private EventHandler handler;
|
||||||
private static final FinalTransition FINAL_TRANSITION = new FinalTransition();
|
private static final FinalTransition FINAL_TRANSITION = new FinalTransition();
|
||||||
|
private static final AppFinishedTransition FINISHED_TRANSITION =
|
||||||
|
new AppFinishedTransition();
|
||||||
|
|
||||||
private static final StateMachineFactory<RMAppImpl,
|
private static final StateMachineFactory<RMAppImpl,
|
||||||
RMAppState,
|
RMAppState,
|
||||||
@ -150,7 +152,7 @@ RMAppEventType.NODE_UPDATE, new RMAppNodeUpdateTransition())
|
|||||||
.addTransition(RMAppState.RUNNING, RMAppState.FINISHING,
|
.addTransition(RMAppState.RUNNING, RMAppState.FINISHING,
|
||||||
RMAppEventType.ATTEMPT_FINISHING, new RMAppFinishingTransition())
|
RMAppEventType.ATTEMPT_FINISHING, new RMAppFinishingTransition())
|
||||||
.addTransition(RMAppState.RUNNING, RMAppState.FINISHED,
|
.addTransition(RMAppState.RUNNING, RMAppState.FINISHED,
|
||||||
RMAppEventType.ATTEMPT_FINISHED, FINAL_TRANSITION)
|
RMAppEventType.ATTEMPT_FINISHED, FINISHED_TRANSITION)
|
||||||
.addTransition(RMAppState.RUNNING,
|
.addTransition(RMAppState.RUNNING,
|
||||||
EnumSet.of(RMAppState.SUBMITTED, RMAppState.FAILED),
|
EnumSet.of(RMAppState.SUBMITTED, RMAppState.FAILED),
|
||||||
RMAppEventType.ATTEMPT_FAILED,
|
RMAppEventType.ATTEMPT_FAILED,
|
||||||
@ -160,7 +162,7 @@ RMAppEventType.KILL, new KillAppAndAttemptTransition())
|
|||||||
|
|
||||||
// Transitions from FINISHING state
|
// Transitions from FINISHING state
|
||||||
.addTransition(RMAppState.FINISHING, RMAppState.FINISHED,
|
.addTransition(RMAppState.FINISHING, RMAppState.FINISHED,
|
||||||
RMAppEventType.ATTEMPT_FINISHED, FINAL_TRANSITION)
|
RMAppEventType.ATTEMPT_FINISHED, FINISHED_TRANSITION)
|
||||||
.addTransition(RMAppState.FINISHING, RMAppState.FINISHED,
|
.addTransition(RMAppState.FINISHING, RMAppState.FINISHED,
|
||||||
RMAppEventType.KILL, new KillAppAndAttemptTransition())
|
RMAppEventType.KILL, new KillAppAndAttemptTransition())
|
||||||
// ignorable transitions
|
// ignorable transitions
|
||||||
@ -572,6 +574,15 @@ public void transition(RMAppImpl app, RMAppEvent event) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static class AppFinishedTransition extends FinalTransition {
|
||||||
|
public void transition(RMAppImpl app, RMAppEvent event) {
|
||||||
|
RMAppFinishedAttemptEvent finishedEvent =
|
||||||
|
(RMAppFinishedAttemptEvent)event;
|
||||||
|
app.diagnostics.append(finishedEvent.getDiagnostics());
|
||||||
|
super.transition(app, event);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
private static class AppKilledTransition extends FinalTransition {
|
private static class AppKilledTransition extends FinalTransition {
|
||||||
@Override
|
@Override
|
||||||
public void transition(RMAppImpl app, RMAppEvent event) {
|
public void transition(RMAppImpl app, RMAppEvent event) {
|
||||||
|
@ -56,6 +56,7 @@
|
|||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppFailedAttemptEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppFailedAttemptEvent;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppFinishedAttemptEvent;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppRejectedEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppRejectedEvent;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerAcquiredEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerAcquiredEvent;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerFinishedEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerFinishedEvent;
|
||||||
@ -688,8 +689,8 @@ public void transition(RMAppAttemptImpl appAttempt,
|
|||||||
switch (finalAttemptState) {
|
switch (finalAttemptState) {
|
||||||
case FINISHED:
|
case FINISHED:
|
||||||
{
|
{
|
||||||
appEvent =
|
appEvent = new RMAppFinishedAttemptEvent(applicationId,
|
||||||
new RMAppEvent(applicationId, RMAppEventType.ATTEMPT_FINISHED);
|
appAttempt.getDiagnostics());
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case KILLED:
|
case KILLED:
|
||||||
|
@ -321,7 +321,8 @@ protected RMApp testCreateAppFinishing(
|
|||||||
}
|
}
|
||||||
|
|
||||||
protected RMApp testCreateAppFinished(
|
protected RMApp testCreateAppFinished(
|
||||||
ApplicationSubmissionContext submissionContext) throws IOException {
|
ApplicationSubmissionContext submissionContext,
|
||||||
|
String diagnostics) throws IOException {
|
||||||
// unmanaged AMs don't use the FINISHING state
|
// unmanaged AMs don't use the FINISHING state
|
||||||
RMApp application = null;
|
RMApp application = null;
|
||||||
if (submissionContext != null && submissionContext.getUnmanagedAM()) {
|
if (submissionContext != null && submissionContext.getUnmanagedAM()) {
|
||||||
@ -330,14 +331,15 @@ protected RMApp testCreateAppFinished(
|
|||||||
application = testCreateAppFinishing(submissionContext);
|
application = testCreateAppFinishing(submissionContext);
|
||||||
}
|
}
|
||||||
// RUNNING/FINISHING => FINISHED event RMAppEventType.ATTEMPT_FINISHED
|
// RUNNING/FINISHING => FINISHED event RMAppEventType.ATTEMPT_FINISHED
|
||||||
RMAppEvent finishedEvent =
|
RMAppEvent finishedEvent = new RMAppFinishedAttemptEvent(
|
||||||
new RMAppEvent(application.getApplicationId(),
|
application.getApplicationId(), diagnostics);
|
||||||
RMAppEventType.ATTEMPT_FINISHED);
|
|
||||||
application.handle(finishedEvent);
|
application.handle(finishedEvent);
|
||||||
assertAppState(RMAppState.FINISHED, application);
|
assertAppState(RMAppState.FINISHED, application);
|
||||||
assertTimesAtFinish(application);
|
assertTimesAtFinish(application);
|
||||||
// finished without a proper unregister implies failed
|
// finished without a proper unregister implies failed
|
||||||
assertFinalAppStatus(FinalApplicationStatus.FAILED, application);
|
assertFinalAppStatus(FinalApplicationStatus.FAILED, application);
|
||||||
|
Assert.assertTrue("Finished app missing diagnostics",
|
||||||
|
application.getDiagnostics().indexOf(diagnostics) != -1);
|
||||||
return application;
|
return application;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -348,11 +350,14 @@ public void testUnmanagedApp() throws IOException {
|
|||||||
|
|
||||||
// test success path
|
// test success path
|
||||||
LOG.info("--- START: testUnmanagedAppSuccessPath ---");
|
LOG.info("--- START: testUnmanagedAppSuccessPath ---");
|
||||||
testCreateAppFinished(subContext);
|
final String diagMsg = "some diagnostics";
|
||||||
|
RMApp application = testCreateAppFinished(subContext, diagMsg);
|
||||||
|
Assert.assertTrue("Finished app missing diagnostics",
|
||||||
|
application.getDiagnostics().indexOf(diagMsg) != -1);
|
||||||
|
|
||||||
// test app fails after 1 app attempt failure
|
// test app fails after 1 app attempt failure
|
||||||
LOG.info("--- START: testUnmanagedAppFailPath ---");
|
LOG.info("--- START: testUnmanagedAppFailPath ---");
|
||||||
RMApp application = testCreateAppRunning(subContext);
|
application = testCreateAppRunning(subContext);
|
||||||
RMAppEvent event = new RMAppFailedAttemptEvent(
|
RMAppEvent event = new RMAppFailedAttemptEvent(
|
||||||
application.getApplicationId(), RMAppEventType.ATTEMPT_FAILED, "");
|
application.getApplicationId(), RMAppEventType.ATTEMPT_FAILED, "");
|
||||||
application.handle(event);
|
application.handle(event);
|
||||||
@ -366,7 +371,10 @@ public void testUnmanagedApp() throws IOException {
|
|||||||
@Test
|
@Test
|
||||||
public void testAppSuccessPath() throws IOException {
|
public void testAppSuccessPath() throws IOException {
|
||||||
LOG.info("--- START: testAppSuccessPath ---");
|
LOG.info("--- START: testAppSuccessPath ---");
|
||||||
testCreateAppFinished(null);
|
final String diagMsg = "some diagnostics";
|
||||||
|
RMApp application = testCreateAppFinished(null, diagMsg);
|
||||||
|
Assert.assertTrue("Finished application missing diagnostics",
|
||||||
|
application.getDiagnostics().indexOf(diagMsg) != -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -551,7 +559,7 @@ public void testAppFinishingKill() throws IOException {
|
|||||||
public void testAppFinishedFinished() throws IOException {
|
public void testAppFinishedFinished() throws IOException {
|
||||||
LOG.info("--- START: testAppFinishedFinished ---");
|
LOG.info("--- START: testAppFinishedFinished ---");
|
||||||
|
|
||||||
RMApp application = testCreateAppFinished(null);
|
RMApp application = testCreateAppFinished(null, "");
|
||||||
// FINISHED => FINISHED event RMAppEventType.KILL
|
// FINISHED => FINISHED event RMAppEventType.KILL
|
||||||
RMAppEvent event =
|
RMAppEvent event =
|
||||||
new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
|
new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
|
||||||
@ -579,9 +587,8 @@ public void testAppKilledKilled() throws IOException {
|
|||||||
assertAppState(RMAppState.KILLED, application);
|
assertAppState(RMAppState.KILLED, application);
|
||||||
|
|
||||||
// KILLED => KILLED event RMAppEventType.ATTEMPT_FINISHED
|
// KILLED => KILLED event RMAppEventType.ATTEMPT_FINISHED
|
||||||
event =
|
event = new RMAppFinishedAttemptEvent(
|
||||||
new RMAppEvent(application.getApplicationId(),
|
application.getApplicationId(), "");
|
||||||
RMAppEventType.ATTEMPT_FINISHED);
|
|
||||||
application.handle(event);
|
application.handle(event);
|
||||||
rmDispatcher.await();
|
rmDispatcher.await();
|
||||||
assertTimesAtFinish(application);
|
assertTimesAtFinish(application);
|
||||||
|
Loading…
Reference in New Issue
Block a user