MAPREDUCE-7028. Concurrent task progress updates causing NPE in Application Master. Contributed by Gergo Repas
This commit is contained in:
parent
c9bf813c9a
commit
fe35103591
@ -585,33 +585,38 @@ public void setCheckpointID(TaskID taskId, TaskCheckpointID cid) {
|
|||||||
private void coalesceStatusUpdate(TaskAttemptId yarnAttemptID,
|
private void coalesceStatusUpdate(TaskAttemptId yarnAttemptID,
|
||||||
TaskAttemptStatus taskAttemptStatus,
|
TaskAttemptStatus taskAttemptStatus,
|
||||||
AtomicReference<TaskAttemptStatus> lastStatusRef) {
|
AtomicReference<TaskAttemptStatus> lastStatusRef) {
|
||||||
boolean asyncUpdatedNeeded = false;
|
List<TaskAttemptId> fetchFailedMaps = taskAttemptStatus.fetchFailedMaps;
|
||||||
TaskAttemptStatus lastStatus = lastStatusRef.get();
|
TaskAttemptStatus lastStatus = null;
|
||||||
|
boolean done = false;
|
||||||
if (lastStatus == null) {
|
while (!done) {
|
||||||
lastStatusRef.set(taskAttemptStatus);
|
lastStatus = lastStatusRef.get();
|
||||||
asyncUpdatedNeeded = true;
|
if (lastStatus != null && lastStatus.fetchFailedMaps != null) {
|
||||||
} else {
|
// merge fetchFailedMaps from the previous update
|
||||||
List<TaskAttemptId> oldFetchFailedMaps =
|
|
||||||
taskAttemptStatus.fetchFailedMaps;
|
|
||||||
|
|
||||||
// merge fetchFailedMaps from the previous update
|
|
||||||
if (lastStatus.fetchFailedMaps != null) {
|
|
||||||
if (taskAttemptStatus.fetchFailedMaps == null) {
|
if (taskAttemptStatus.fetchFailedMaps == null) {
|
||||||
taskAttemptStatus.fetchFailedMaps = lastStatus.fetchFailedMaps;
|
taskAttemptStatus.fetchFailedMaps = lastStatus.fetchFailedMaps;
|
||||||
} else {
|
} else {
|
||||||
taskAttemptStatus.fetchFailedMaps.addAll(lastStatus.fetchFailedMaps);
|
taskAttemptStatus.fetchFailedMaps =
|
||||||
|
new ArrayList<>(lastStatus.fetchFailedMaps.size() +
|
||||||
|
fetchFailedMaps.size());
|
||||||
|
taskAttemptStatus.fetchFailedMaps.addAll(
|
||||||
|
lastStatus.fetchFailedMaps);
|
||||||
|
taskAttemptStatus.fetchFailedMaps.addAll(
|
||||||
|
fetchFailedMaps);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!lastStatusRef.compareAndSet(lastStatus, taskAttemptStatus)) {
|
// lastStatusRef may be changed by either the AsyncDispatcher when
|
||||||
// update failed - async dispatcher has processed it in the meantime
|
// it processes the update, or by another IPC server handler
|
||||||
taskAttemptStatus.fetchFailedMaps = oldFetchFailedMaps;
|
done = lastStatusRef.compareAndSet(lastStatus, taskAttemptStatus);
|
||||||
lastStatusRef.set(taskAttemptStatus);
|
if (!done) {
|
||||||
asyncUpdatedNeeded = true;
|
LOG.info("TaskAttempt " + yarnAttemptID +
|
||||||
|
": lastStatusRef changed by another thread, retrying...");
|
||||||
|
// let's revert taskAttemptStatus.fetchFailedMaps
|
||||||
|
taskAttemptStatus.fetchFailedMaps = fetchFailedMaps;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
boolean asyncUpdatedNeeded = (lastStatus == null);
|
||||||
if (asyncUpdatedNeeded) {
|
if (asyncUpdatedNeeded) {
|
||||||
context.getEventHandler().handle(
|
context.getEventHandler().handle(
|
||||||
new TaskAttemptStatusUpdateEvent(taskAttemptStatus.id,
|
new TaskAttemptStatusUpdateEvent(taskAttemptStatus.id,
|
||||||
|
Loading…
Reference in New Issue
Block a user