From e395711164bfbf6402fa914dbaa87317ed006794 Mon Sep 17 00:00:00 2001 From: Eric Payne Date: Wed, 7 Jul 2021 20:43:44 +0000 Subject: [PATCH] MAPREDUCE-7353: Mapreduce job fails when NM is stopped. Contributed by Bilwa S T (BilwaST) (cherry picked from commit 7581413156da396db218e36a966c5749589b31a7) --- .../v2/app/job/impl/TaskAttemptImpl.java | 7 ++++- .../v2/app/job/impl/TestTaskAttempt.java | 30 +++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java index 3943a3aa91..6738af33fe 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java @@ -492,10 +492,15 @@ TaskAttemptEventType.TA_CONTAINER_CLEANED, new KilledTransition()) TaskAttemptStateInternal.SUCCESS_CONTAINER_CLEANUP, TaskAttemptEventType.TA_DIAGNOSTICS_UPDATE, DIAGNOSTIC_INFORMATION_UPDATE_TRANSITION) + .addTransition(TaskAttemptStateInternal.SUCCESS_CONTAINER_CLEANUP, + EnumSet.of(TaskAttemptStateInternal.SUCCEEDED, + TaskAttemptStateInternal.KILLED), + TaskAttemptEventType.TA_KILL, + new KilledAfterSuccessTransition()) // Ignore-able events .addTransition(TaskAttemptStateInternal.SUCCESS_CONTAINER_CLEANUP, TaskAttemptStateInternal.SUCCESS_CONTAINER_CLEANUP, - EnumSet.of(TaskAttemptEventType.TA_KILL, + EnumSet.of( TaskAttemptEventType.TA_FAILMSG, TaskAttemptEventType.TA_FAILMSG_BY_CLIENT, TaskAttemptEventType.TA_TIMED_OUT, diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttempt.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttempt.java index 4c03c78bb9..15682eeefc 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttempt.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttempt.java @@ -1870,6 +1870,36 @@ public void testReducerCustomResourceTypeWithInvalidUnit() { createReduceTaskAttemptImplForTest(eventHandler, clock, jobConf); } + @Test + public void testKillingTaskWhenContainerCleanup() { + MockEventHandler eventHandler = new MockEventHandler(); + TaskAttemptImpl taImpl = createTaskAttemptImpl(eventHandler); + TaskId maptaskId = MRBuilderUtils.newTaskId(taImpl.getID().getTaskId() + .getJobId(), 1, TaskType.MAP); + TaskAttemptId mapTAId = + MRBuilderUtils.newTaskAttemptId(maptaskId, 0); + + // move in two steps to the desired state (cannot get there directly) + taImpl.handle(new TaskAttemptEvent(taImpl.getID(), + TaskAttemptEventType.TA_DONE)); + assertEquals("Task attempt's internal state is not " + + "SUCCESS_FINISHING_CONTAINER", + TaskAttemptStateInternal.SUCCESS_FINISHING_CONTAINER, + taImpl.getInternalState()); + + taImpl.handle(new TaskAttemptEvent(taImpl.getID(), + TaskAttemptEventType.TA_TIMED_OUT)); + assertEquals("Task attempt's internal state is not " + + "SUCCESS_CONTAINER_CLEANUP", + TaskAttemptStateInternal.SUCCESS_CONTAINER_CLEANUP, + taImpl.getInternalState()); + + taImpl.handle(new TaskAttemptKillEvent(mapTAId, "", true)); + assertEquals("Task attempt is not in KILLED state", + TaskAttemptState.KILLED, + taImpl.getState()); + } + @Test public void testTooManyFetchFailureWhileContainerCleanup() { MockEventHandler eventHandler = new MockEventHandler();