diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index eb487b13d0..9daa28dd15 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -724,6 +724,9 @@ Release 0.23.3 - UNRELEASED MAPREDUCE-4283. Display tail of aggregated logs by default (Jason Lowe via bobby) + MAPREDUCE-4448. Fix NM crash during app cleanup if aggregation didn't + init. (Jason Lowe via daryn) + Release 0.23.2 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/LogAggregationService.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/LogAggregationService.java index d00c61ed50..d29e6a0cf4 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/LogAggregationService.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/LogAggregationService.java @@ -342,14 +342,14 @@ private void stopContainer(ContainerId containerId, int exitCode) { // A container is complete. Put this containers' logs up for aggregation if // this containers' logs are needed. - if (!this.appLogAggregators.containsKey( - containerId.getApplicationAttemptId().getApplicationId())) { - throw new YarnException("Application is not initialized yet for " - + containerId); + AppLogAggregator aggregator = this.appLogAggregators.get( + containerId.getApplicationAttemptId().getApplicationId()); + if (aggregator == null) { + LOG.warn("Log aggregation is not initialized for " + containerId + + ", did it fail to start?"); + return; } - this.appLogAggregators.get( - containerId.getApplicationAttemptId().getApplicationId()) - .startContainerLogAggregation(containerId, exitCode == 0); + aggregator.startContainerLogAggregation(containerId, exitCode == 0); } private void stopApp(ApplicationId appId) { @@ -357,11 +357,13 @@ private void stopApp(ApplicationId appId) { // App is complete. Finish up any containers' pending log aggregation and // close the application specific logFile. - if (!this.appLogAggregators.containsKey(appId)) { - throw new YarnException("Application is not initialized yet for " - + appId); + AppLogAggregator aggregator = this.appLogAggregators.get(appId); + if (aggregator == null) { + LOG.warn("Log aggregation is not initialized for " + appId + + ", did it fail to start?"); + return; } - this.appLogAggregators.get(appId).finishLogAggregation(); + aggregator.finishLogAggregation(); } @Override diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java index 2d300310a2..1627804805 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java @@ -380,7 +380,7 @@ public void testMultipleAppsLogAggregation() throws Exception { @Test @SuppressWarnings("unchecked") - public void testLogAggregationInitFailsWithoutKillingNM() throws Exception { + public void testLogAggregationFailsWithoutKillingNM() throws Exception { this.conf.set(YarnConfiguration.NM_LOG_DIRS, localLogDir.getAbsolutePath()); this.conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, @@ -412,7 +412,16 @@ public void testLogAggregationInitFailsWithoutKillingNM() throws Exception { new ApplicationFinishEvent(appId, "Application failed to init aggregation: KABOOM!") }; checkEvents(appEventHandler, expectedEvents, false, - "getType", "getApplicationID", "getDiagnostic"); + "getType", "getApplicationID", "getDiagnostic"); + + // verify trying to collect logs for containers/apps we don't know about + // doesn't blow up and tear down the NM + logAggregationService.handle(new LogHandlerContainerFinishedEvent( + BuilderUtils.newContainerId(4, 1, 1, 1), 0)); + dispatcher.await(); + logAggregationService.handle(new LogHandlerAppFinishedEvent( + BuilderUtils.newApplicationId(1, 5))); + dispatcher.await(); } private void writeContainerLogs(File appLogDir, ContainerId containerId)