MAPREDUCE-6485. Create a new task attempt with failed map task priority if in-progress attempts are unassigned. (Xianyin Xin via rohithsharmaks)
This commit is contained in:
parent
fd026f535c
commit
439f43ad3d
@ -588,6 +588,9 @@ Release 2.8.0 - UNRELEASED
|
||||
MAPREDUCE-6494. Permission issue when running archive-logs tool as
|
||||
different users (rkanter)
|
||||
|
||||
MAPREDUCE-6485. Create a new task attempt with failed map task priority
|
||||
if in-progress attempts are unassigned. (Xianyin Xin via rohithsharmaks)
|
||||
|
||||
Release 2.7.2 - UNRELEASED
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
@ -1333,7 +1333,7 @@ public TaskAttemptStateInternal recover(TaskAttemptInfo taInfo,
|
||||
return attemptState;
|
||||
}
|
||||
|
||||
private static TaskAttemptState getExternalState(
|
||||
protected static TaskAttemptState getExternalState(
|
||||
TaskAttemptStateInternal smState) {
|
||||
switch (smState) {
|
||||
case ASSIGNED:
|
||||
@ -1365,6 +1365,11 @@ private static TaskAttemptState getExternalState(
|
||||
}
|
||||
}
|
||||
|
||||
// check whether the attempt is assigned if container is not null
|
||||
boolean isContainerAssigned() {
|
||||
return container != null;
|
||||
}
|
||||
|
||||
//always called in write lock
|
||||
private void setFinishTime() {
|
||||
//set the finish time only if launch time is set
|
||||
|
@ -1057,9 +1057,21 @@ public TaskStateInternal transition(TaskImpl task, TaskEvent event) {
|
||||
TaskAttemptCompletionEventStatus.FAILED);
|
||||
// we don't need a new event if we already have a spare
|
||||
task.inProgressAttempts.remove(taskAttemptId);
|
||||
if (task.inProgressAttempts.size() == 0
|
||||
&& task.successfulAttempt == null) {
|
||||
task.addAndScheduleAttempt(Avataar.VIRGIN);
|
||||
if (task.successfulAttempt == null) {
|
||||
boolean shouldAddNewAttempt = true;
|
||||
if (task.inProgressAttempts.size() > 0) {
|
||||
// if not all of the inProgressAttempts are hanging for resource
|
||||
for (TaskAttemptId attemptId : task.inProgressAttempts) {
|
||||
if (((TaskAttemptImpl) task.getAttempt(attemptId))
|
||||
.isContainerAssigned()) {
|
||||
shouldAddNewAttempt = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (shouldAddNewAttempt) {
|
||||
task.addAndScheduleAttempt(Avataar.VIRGIN);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
task.handleTaskAttemptCompletion(
|
||||
@ -1080,7 +1092,7 @@ public TaskStateInternal transition(TaskImpl task, TaskEvent event) {
|
||||
taskFailedEvent));
|
||||
} else {
|
||||
LOG.debug("Not generating HistoryFinish event since start event not" +
|
||||
" generated for task: " + task.getID());
|
||||
" generated for task: " + task.getID());
|
||||
}
|
||||
task.eventHandler.handle(
|
||||
new JobTaskEvent(task.taskId, TaskState.FAILED));
|
||||
|
@ -50,6 +50,8 @@
|
||||
import org.apache.hadoop.mapreduce.v2.app.TaskAttemptListener;
|
||||
import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt;
|
||||
import org.apache.hadoop.mapreduce.v2.app.job.TaskStateInternal;
|
||||
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent;
|
||||
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType;
|
||||
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEvent;
|
||||
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEventType;
|
||||
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptEvent;
|
||||
@ -57,6 +59,7 @@
|
||||
import org.apache.hadoop.security.Credentials;
|
||||
import org.apache.hadoop.security.token.Token;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||
import org.apache.hadoop.yarn.event.Event;
|
||||
import org.apache.hadoop.yarn.event.EventHandler;
|
||||
import org.apache.hadoop.yarn.event.InlineDispatcher;
|
||||
import org.apache.hadoop.yarn.util.Clock;
|
||||
@ -140,6 +143,8 @@ private class MockTaskAttemptImpl extends TaskAttemptImpl {
|
||||
|
||||
private float progress = 0;
|
||||
private TaskAttemptState state = TaskAttemptState.NEW;
|
||||
boolean rescheduled = false;
|
||||
boolean containerAssigned = false;
|
||||
private TaskType taskType;
|
||||
private Counters attemptCounters = TaskAttemptImpl.EMPTY_COUNTERS;
|
||||
|
||||
@ -153,6 +158,15 @@ public MockTaskAttemptImpl(TaskId taskId, int id, EventHandler eventHandler,
|
||||
this.taskType = taskType;
|
||||
}
|
||||
|
||||
public void assignContainer() {
|
||||
containerAssigned = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
boolean isContainerAssigned() {
|
||||
return containerAssigned;
|
||||
}
|
||||
|
||||
public TaskAttemptId getAttemptId() {
|
||||
return getID();
|
||||
}
|
||||
@ -173,11 +187,20 @@ public void setProgress(float progress) {
|
||||
public void setState(TaskAttemptState state) {
|
||||
this.state = state;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public TaskAttemptState getState() {
|
||||
return state;
|
||||
}
|
||||
|
||||
public boolean getRescheduled() {
|
||||
return this.rescheduled;
|
||||
}
|
||||
|
||||
public void setRescheduled(boolean rescheduled) {
|
||||
this.rescheduled = rescheduled;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Counters getCounters() {
|
||||
return attemptCounters;
|
||||
@ -279,7 +302,9 @@ private void killScheduledTaskAttempt(TaskAttemptId attemptId) {
|
||||
private void launchTaskAttempt(TaskAttemptId attemptId) {
|
||||
mockTask.handle(new TaskTAttemptEvent(attemptId,
|
||||
TaskEventType.T_ATTEMPT_LAUNCHED));
|
||||
assertTaskRunningState();
|
||||
((MockTaskAttemptImpl)(mockTask.getAttempt(attemptId)))
|
||||
.assignContainer();
|
||||
assertTaskRunningState();
|
||||
}
|
||||
|
||||
private void commitTaskAttempt(TaskAttemptId attemptId) {
|
||||
@ -708,6 +733,71 @@ protected int getMaxAttempts() {
|
||||
assertEquals(TaskState.FAILED, mockTask.getState());
|
||||
}
|
||||
|
||||
private class PartialAttemptEventHandler implements EventHandler {
|
||||
|
||||
@Override
|
||||
public void handle(Event event) {
|
||||
if (event instanceof TaskAttemptEvent)
|
||||
if (event.getType() == TaskAttemptEventType.TA_RESCHEDULE) {
|
||||
TaskAttempt attempt = mockTask.getAttempt(((TaskAttemptEvent) event).getTaskAttemptID());
|
||||
((MockTaskAttemptImpl)attempt).setRescheduled(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFailedTransitionWithHangingSpeculativeMap() {
|
||||
mockTask = new MockTaskImpl(jobId, partition, new PartialAttemptEventHandler(),
|
||||
remoteJobConfFile, conf, taskAttemptListener, jobToken,
|
||||
credentials, clock, startCount, metrics, appContext, TaskType.MAP) {
|
||||
@Override
|
||||
protected int getMaxAttempts() {
|
||||
return 4;
|
||||
}
|
||||
};
|
||||
|
||||
// start a new task, schedule and launch a new attempt
|
||||
TaskId taskId = getNewTaskID();
|
||||
scheduleTaskAttempt(taskId);
|
||||
launchTaskAttempt(getLastAttempt().getAttemptId());
|
||||
|
||||
// add a speculative attempt(#2), but not launch it
|
||||
mockTask.handle(new TaskTAttemptEvent(getLastAttempt().getAttemptId(),
|
||||
TaskEventType.T_ADD_SPEC_ATTEMPT));
|
||||
|
||||
// have the first attempt(#1) fail, verify task still running since the
|
||||
// max attempts is 4
|
||||
MockTaskAttemptImpl taskAttempt = taskAttempts.get(0);
|
||||
taskAttempt.setState(TaskAttemptState.FAILED);
|
||||
mockTask.handle(new TaskTAttemptEvent(taskAttempt.getAttemptId(),
|
||||
TaskEventType.T_ATTEMPT_FAILED));
|
||||
assertEquals(TaskState.RUNNING, mockTask.getState());
|
||||
|
||||
// verify a new attempt(#3) added because the speculative attempt(#2)
|
||||
// is hanging
|
||||
assertEquals(3, taskAttempts.size());
|
||||
|
||||
// verify the speculative attempt(#2) is not a rescheduled attempt
|
||||
assertEquals(false, taskAttempts.get(1).getRescheduled());
|
||||
|
||||
// verify the third attempt is a rescheduled attempt
|
||||
assertEquals(true, taskAttempts.get(2).getRescheduled());
|
||||
|
||||
// now launch the latest attempt(#3) and set the internal state to running
|
||||
launchTaskAttempt(getLastAttempt().getAttemptId());
|
||||
|
||||
// have the speculative attempt(#2) fail, verify task still since it
|
||||
// hasn't reach the max attempts which is 4
|
||||
MockTaskAttemptImpl taskAttempt1 = taskAttempts.get(1);
|
||||
taskAttempt1.setState(TaskAttemptState.FAILED);
|
||||
mockTask.handle(new TaskTAttemptEvent(taskAttempt1.getAttemptId(),
|
||||
TaskEventType.T_ATTEMPT_FAILED));
|
||||
assertEquals(TaskState.RUNNING, mockTask.getState());
|
||||
|
||||
// verify there's no new attempt added because of the running attempt(#3)
|
||||
assertEquals(3, taskAttempts.size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCountersWithSpeculation() {
|
||||
mockTask = new MockTaskImpl(jobId, partition, dispatcher.getEventHandler(),
|
||||
|
Loading…
Reference in New Issue
Block a user