From 7c491887e58bade308cea9e6d4995ea3e8acc4c0 Mon Sep 17 00:00:00 2001 From: Vinod Kumar Vavilapalli Date: Thu, 13 Sep 2012 00:46:39 +0000 Subject: [PATCH] YARN-93. Fixed RM to propagate diagnostics from applications that have finished but failed Contributed by Jason Lowe. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1384169 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 ++ .../rmapp/RMAppFinishedAttemptEvent.java | 35 +++++++++++++++++++ .../resourcemanager/rmapp/RMAppImpl.java | 15 ++++++-- .../rmapp/attempt/RMAppAttemptImpl.java | 5 +-- .../rmapp/TestRMAppTransitions.java | 29 +++++++++------ 5 files changed, 72 insertions(+), 15 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppFinishedAttemptEvent.java diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 9f2a70c2e8..325d16a696 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -28,6 +28,9 @@ Release 2.0.3-alpha - Unreleased YARN-78. Changed UnManagedAM application to use YarnClient. (Bikas Saha via vinodkv) + YARN-93. Fixed RM to propagate diagnostics from applications that have + finished but failed (Jason Lowe via vinodkv). + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppFinishedAttemptEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppFinishedAttemptEvent.java new file mode 100644 index 0000000000..f1a6340ba8 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppFinishedAttemptEvent.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.rmapp; + +import org.apache.hadoop.yarn.api.records.ApplicationId; + +public class RMAppFinishedAttemptEvent extends RMAppEvent { + + private final String diagnostics; + + public RMAppFinishedAttemptEvent(ApplicationId appId, String diagnostics) { + super(appId, RMAppEventType.ATTEMPT_FINISHED); + this.diagnostics = diagnostics; + } + + public String getDiagnostics() { + return this.diagnostics; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java index cedaf9f416..25e5684789 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java @@ -101,6 +101,8 @@ public class RMAppImpl implements RMApp { @SuppressWarnings("rawtypes") private EventHandler handler; private static final FinalTransition FINAL_TRANSITION = new FinalTransition(); + private static final AppFinishedTransition FINISHED_TRANSITION = + new AppFinishedTransition(); private static final StateMachineFactory FINISHED event RMAppEventType.ATTEMPT_FINISHED - RMAppEvent finishedEvent = - new RMAppEvent(application.getApplicationId(), - RMAppEventType.ATTEMPT_FINISHED); + RMAppEvent finishedEvent = new RMAppFinishedAttemptEvent( + application.getApplicationId(), diagnostics); application.handle(finishedEvent); assertAppState(RMAppState.FINISHED, application); assertTimesAtFinish(application); // finished without a proper unregister implies failed assertFinalAppStatus(FinalApplicationStatus.FAILED, application); + Assert.assertTrue("Finished app missing diagnostics", + application.getDiagnostics().indexOf(diagnostics) != -1); return application; } @@ -348,11 +350,14 @@ public void testUnmanagedApp() throws IOException { // test success path LOG.info("--- START: testUnmanagedAppSuccessPath ---"); - testCreateAppFinished(subContext); + final String diagMsg = "some diagnostics"; + RMApp application = testCreateAppFinished(subContext, diagMsg); + Assert.assertTrue("Finished app missing diagnostics", + application.getDiagnostics().indexOf(diagMsg) != -1); // test app fails after 1 app attempt failure LOG.info("--- START: testUnmanagedAppFailPath ---"); - RMApp application = testCreateAppRunning(subContext); + application = testCreateAppRunning(subContext); RMAppEvent event = new RMAppFailedAttemptEvent( application.getApplicationId(), RMAppEventType.ATTEMPT_FAILED, ""); application.handle(event); @@ -366,7 +371,10 @@ public void testUnmanagedApp() throws IOException { @Test public void testAppSuccessPath() throws IOException { LOG.info("--- START: testAppSuccessPath ---"); - testCreateAppFinished(null); + final String diagMsg = "some diagnostics"; + RMApp application = testCreateAppFinished(null, diagMsg); + Assert.assertTrue("Finished application missing diagnostics", + application.getDiagnostics().indexOf(diagMsg) != -1); } @Test @@ -551,7 +559,7 @@ public void testAppFinishingKill() throws IOException { public void testAppFinishedFinished() throws IOException { LOG.info("--- START: testAppFinishedFinished ---"); - RMApp application = testCreateAppFinished(null); + RMApp application = testCreateAppFinished(null, ""); // FINISHED => FINISHED event RMAppEventType.KILL RMAppEvent event = new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL); @@ -579,9 +587,8 @@ public void testAppKilledKilled() throws IOException { assertAppState(RMAppState.KILLED, application); // KILLED => KILLED event RMAppEventType.ATTEMPT_FINISHED - event = - new RMAppEvent(application.getApplicationId(), - RMAppEventType.ATTEMPT_FINISHED); + event = new RMAppFinishedAttemptEvent( + application.getApplicationId(), ""); application.handle(event); rmDispatcher.await(); assertTimesAtFinish(application);