MAPREDUCE-5746. Job diagnostics can implicate wrong task for a failed job. (Jason Lowe via kasha)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1567666 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
41ec3cce76
commit
308f5faf18
@ -159,6 +159,9 @@ Release 2.4.0 - UNRELEASED
|
||||
|
||||
BUG FIXES
|
||||
|
||||
MAPREDUCE-5746. Job diagnostics can implicate wrong task for a failed job.
|
||||
(Jason Lowe via kasha)
|
||||
|
||||
Release 2.3.1 - UNRELEASED
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
@ -353,9 +353,11 @@ private void handleTaskFailedEvent(TaskFailedEvent event) {
|
||||
taskInfo.error = StringInterner.weakIntern(event.getError());
|
||||
taskInfo.failedDueToAttemptId = event.getFailedAttemptID();
|
||||
taskInfo.counters = event.getCounters();
|
||||
if (info.errorInfo.isEmpty()) {
|
||||
info.errorInfo = "Task " + taskInfo.taskId + " failed " +
|
||||
taskInfo.attemptsMap.size() + " times ";
|
||||
}
|
||||
}
|
||||
|
||||
private void handleTaskStartedEvent(TaskStartedEvent event) {
|
||||
TaskInfo taskInfo = new TaskInfo();
|
||||
|
@ -40,6 +40,8 @@
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FileContext;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.mapreduce.Counters;
|
||||
import org.apache.hadoop.mapreduce.JobID;
|
||||
import org.apache.hadoop.mapreduce.MRJobConfig;
|
||||
import org.apache.hadoop.mapreduce.TaskID;
|
||||
import org.apache.hadoop.mapreduce.TypeConverter;
|
||||
@ -51,7 +53,9 @@
|
||||
import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.JobInfo;
|
||||
import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo;
|
||||
import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo;
|
||||
import org.apache.hadoop.mapreduce.jobhistory.TaskFailedEvent;
|
||||
import org.apache.hadoop.mapreduce.jobhistory.TaskFinishedEvent;
|
||||
import org.apache.hadoop.mapreduce.jobhistory.TaskStartedEvent;
|
||||
import org.apache.hadoop.mapreduce.v2.api.records.JobId;
|
||||
import org.apache.hadoop.mapreduce.v2.api.records.JobState;
|
||||
import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
|
||||
@ -69,7 +73,6 @@
|
||||
import org.apache.hadoop.mapreduce.v2.hs.HistoryFileManager.HistoryFileInfo;
|
||||
import org.apache.hadoop.mapreduce.v2.hs.TestJobHistoryEvents.MRAppWithHistory;
|
||||
import org.apache.hadoop.mapreduce.v2.hs.webapp.dao.JobsInfo;
|
||||
import org.apache.hadoop.mapreduce.v2.jobhistory.FileNameIndexUtils;
|
||||
import org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils;
|
||||
import org.apache.hadoop.mapreduce.v2.jobhistory.JobIndexInfo;
|
||||
import org.apache.hadoop.net.DNSToSwitchMapping;
|
||||
@ -730,4 +733,40 @@ public void testPartialJob() throws Exception {
|
||||
assertNull(test.getAMInfos());
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultipleFailedTasks() throws Exception {
|
||||
JobHistoryParser parser =
|
||||
new JobHistoryParser(Mockito.mock(FSDataInputStream.class));
|
||||
EventReader reader = Mockito.mock(EventReader.class);
|
||||
final AtomicInteger numEventsRead = new AtomicInteger(0); // Hack!
|
||||
final org.apache.hadoop.mapreduce.TaskType taskType =
|
||||
org.apache.hadoop.mapreduce.TaskType.MAP;
|
||||
final TaskID[] tids = new TaskID[2];
|
||||
JobID jid = new JobID("1", 1);
|
||||
tids[0] = new TaskID(jid, taskType, 0);
|
||||
tids[1] = new TaskID(jid, taskType, 1);
|
||||
Mockito.when(reader.getNextEvent()).thenAnswer(
|
||||
new Answer<HistoryEvent>() {
|
||||
public HistoryEvent answer(InvocationOnMock invocation)
|
||||
throws IOException {
|
||||
// send two task start and two task fail events for tasks 0 and 1
|
||||
int eventId = numEventsRead.getAndIncrement();
|
||||
TaskID tid = tids[eventId & 0x1];
|
||||
if (eventId < 2) {
|
||||
return new TaskStartedEvent(tid, 0, taskType, "");
|
||||
}
|
||||
if (eventId < 4) {
|
||||
TaskFailedEvent tfe = new TaskFailedEvent(tid, 0, taskType,
|
||||
"failed", "FAILED", null, new Counters());
|
||||
tfe.setDatum(tfe.getDatum());
|
||||
return tfe;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
});
|
||||
JobInfo info = parser.parse(reader);
|
||||
assertTrue("Task 0 not implicated",
|
||||
info.getErrorInfo().contains(tids[0].toString()));
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user