MAPREDUCE-4785. TestMRApp occasionally fails (haibochen via rkanter)
This commit is contained in:
parent
0a9f00af5e
commit
ff0ee84d77
@ -332,6 +332,8 @@ Release 2.9.0 - UNRELEASED
|
||||
MAPREDUCE-6620. Jobs that did not start are shown as starting in 1969 in
|
||||
the JHS web UI (haibochen via rkanter)
|
||||
|
||||
MAPREDUCE-4785. TestMRApp occasionally fails (haibochen via rkanter)
|
||||
|
||||
Release 2.8.0 - UNRELEASED
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
@ -25,7 +25,10 @@
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
|
||||
import com.google.common.base.Supplier;
|
||||
import org.apache.hadoop.test.GenericTestUtils;
|
||||
import org.junit.Assert;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
@ -205,10 +208,10 @@ public void testUpdatedNodes() throws Exception {
|
||||
conf.setFloat(MRJobConfig.COMPLETED_MAPS_FOR_REDUCE_SLOWSTART, 0.5f);
|
||||
// uberization forces full slowstart (1.0), so disable that
|
||||
conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
|
||||
Job job = app.submit(conf);
|
||||
app.waitForState(job, JobState.RUNNING);
|
||||
Assert.assertEquals("Num tasks not correct", 4, job.getTasks().size());
|
||||
Iterator<Task> it = job.getTasks().values().iterator();
|
||||
final Job job1 = app.submit(conf);
|
||||
app.waitForState(job1, JobState.RUNNING);
|
||||
Assert.assertEquals("Num tasks not correct", 4, job1.getTasks().size());
|
||||
Iterator<Task> it = job1.getTasks().values().iterator();
|
||||
Task mapTask1 = it.next();
|
||||
Task mapTask2 = it.next();
|
||||
|
||||
@ -240,8 +243,20 @@ public void testUpdatedNodes() throws Exception {
|
||||
app.waitForState(mapTask1, TaskState.SUCCEEDED);
|
||||
app.waitForState(mapTask2, TaskState.SUCCEEDED);
|
||||
|
||||
TaskAttemptCompletionEvent[] events = job.getTaskAttemptCompletionEvents(0,
|
||||
100);
|
||||
final int checkIntervalMillis = 100;
|
||||
final int waitForMillis = 800;
|
||||
|
||||
waitFor(new Supplier<Boolean>() {
|
||||
@Override
|
||||
public Boolean get() {
|
||||
TaskAttemptCompletionEvent[] events = job1
|
||||
.getTaskAttemptCompletionEvents(0, 100);
|
||||
return events.length == 2;
|
||||
}
|
||||
}, checkIntervalMillis, waitForMillis);
|
||||
|
||||
TaskAttemptCompletionEvent[] events = job1.getTaskAttemptCompletionEvents
|
||||
(0, 100);
|
||||
Assert.assertEquals("Expecting 2 completion events for success", 2,
|
||||
events.length);
|
||||
|
||||
@ -253,12 +268,21 @@ public void testUpdatedNodes() throws Exception {
|
||||
nr.setNodeState(NodeState.UNHEALTHY);
|
||||
updatedNodes.add(nr);
|
||||
app.getContext().getEventHandler()
|
||||
.handle(new JobUpdatedNodesEvent(job.getID(), updatedNodes));
|
||||
.handle(new JobUpdatedNodesEvent(job1.getID(), updatedNodes));
|
||||
|
||||
app.waitForState(task1Attempt, TaskAttemptState.KILLED);
|
||||
app.waitForState(task2Attempt, TaskAttemptState.KILLED);
|
||||
|
||||
events = job.getTaskAttemptCompletionEvents(0, 100);
|
||||
waitFor(new Supplier<Boolean>() {
|
||||
@Override
|
||||
public Boolean get() {
|
||||
TaskAttemptCompletionEvent[] events = job1
|
||||
.getTaskAttemptCompletionEvents(0, 100);
|
||||
return events.length == 4;
|
||||
}
|
||||
}, checkIntervalMillis, waitForMillis);
|
||||
|
||||
events = job1.getTaskAttemptCompletionEvents(0, 100);
|
||||
Assert.assertEquals("Expecting 2 more completion events for killed", 4,
|
||||
events.length);
|
||||
|
||||
@ -281,7 +305,16 @@ public void testUpdatedNodes() throws Exception {
|
||||
app.waitForState(mapTask1, TaskState.SUCCEEDED);
|
||||
app.waitForState(mapTask2, TaskState.RUNNING);
|
||||
|
||||
events = job.getTaskAttemptCompletionEvents(0, 100);
|
||||
waitFor(new Supplier<Boolean>() {
|
||||
@Override
|
||||
public Boolean get() {
|
||||
TaskAttemptCompletionEvent[] events = job1
|
||||
.getTaskAttemptCompletionEvents(0, 100);
|
||||
return events.length == 5;
|
||||
}
|
||||
}, checkIntervalMillis, waitForMillis);
|
||||
|
||||
events = job1.getTaskAttemptCompletionEvents(0, 100);
|
||||
Assert.assertEquals("Expecting 1 more completion events for success", 5,
|
||||
events.length);
|
||||
|
||||
@ -295,10 +328,11 @@ public void testUpdatedNodes() throws Exception {
|
||||
conf = new Configuration();
|
||||
conf.setBoolean(MRJobConfig.MR_AM_JOB_RECOVERY_ENABLE, true);
|
||||
conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
|
||||
job = app.submit(conf);
|
||||
app.waitForState(job, JobState.RUNNING);
|
||||
Assert.assertEquals("No of tasks not correct", 4, job.getTasks().size());
|
||||
it = job.getTasks().values().iterator();
|
||||
|
||||
final Job job2 = app.submit(conf);
|
||||
app.waitForState(job2, JobState.RUNNING);
|
||||
Assert.assertEquals("No of tasks not correct", 4, job2.getTasks().size());
|
||||
it = job2.getTasks().values().iterator();
|
||||
mapTask1 = it.next();
|
||||
mapTask2 = it.next();
|
||||
Task reduceTask1 = it.next();
|
||||
@ -308,7 +342,16 @@ public void testUpdatedNodes() throws Exception {
|
||||
app.waitForState(mapTask1, TaskState.SUCCEEDED);
|
||||
app.waitForState(mapTask2, TaskState.RUNNING);
|
||||
|
||||
events = job.getTaskAttemptCompletionEvents(0, 100);
|
||||
waitFor(new Supplier<Boolean>() {
|
||||
@Override
|
||||
public Boolean get() {
|
||||
TaskAttemptCompletionEvent[] events = job2
|
||||
.getTaskAttemptCompletionEvents(0, 100);
|
||||
return events.length == 2;
|
||||
}
|
||||
}, checkIntervalMillis, waitForMillis);
|
||||
|
||||
events = job2.getTaskAttemptCompletionEvents(0, 100);
|
||||
Assert.assertEquals(
|
||||
"Expecting 2 completion events for killed & success of map1", 2,
|
||||
events.length);
|
||||
@ -321,7 +364,16 @@ public void testUpdatedNodes() throws Exception {
|
||||
TaskAttemptEventType.TA_DONE));
|
||||
app.waitForState(mapTask2, TaskState.SUCCEEDED);
|
||||
|
||||
events = job.getTaskAttemptCompletionEvents(0, 100);
|
||||
waitFor(new Supplier<Boolean>() {
|
||||
@Override
|
||||
public Boolean get() {
|
||||
TaskAttemptCompletionEvent[] events = job2
|
||||
.getTaskAttemptCompletionEvents(0, 100);
|
||||
return events.length == 3;
|
||||
}
|
||||
}, checkIntervalMillis, waitForMillis);
|
||||
|
||||
events = job2.getTaskAttemptCompletionEvents(0, 100);
|
||||
Assert.assertEquals("Expecting 1 more completion events for success", 3,
|
||||
events.length);
|
||||
|
||||
@ -350,14 +402,30 @@ public void testUpdatedNodes() throws Exception {
|
||||
.handle(
|
||||
new TaskAttemptEvent(task4Attempt.getID(),
|
||||
TaskAttemptEventType.TA_DONE));
|
||||
app.waitForState(reduceTask2, TaskState.SUCCEEDED);
|
||||
app.waitForState(reduceTask2, TaskState.SUCCEEDED);
|
||||
|
||||
events = job.getTaskAttemptCompletionEvents(0, 100);
|
||||
waitFor(new Supplier<Boolean>() {
|
||||
@Override
|
||||
public Boolean get() {
|
||||
TaskAttemptCompletionEvent[] events = job2
|
||||
.getTaskAttemptCompletionEvents(0, 100);
|
||||
return events.length == 5;
|
||||
}
|
||||
}, checkIntervalMillis, waitForMillis);
|
||||
events = job2.getTaskAttemptCompletionEvents(0, 100);
|
||||
Assert.assertEquals("Expecting 2 more completion events for reduce success",
|
||||
5, events.length);
|
||||
|
||||
// job succeeds
|
||||
app.waitForState(job, JobState.SUCCEEDED);
|
||||
app.waitForState(job2, JobState.SUCCEEDED);
|
||||
}
|
||||
|
||||
private static void waitFor(Supplier<Boolean> predicate, int
|
||||
checkIntervalMillis, int checkTotalMillis) throws InterruptedException {
|
||||
try {
|
||||
GenericTestUtils.waitFor(predicate, checkIntervalMillis, checkTotalMillis);
|
||||
} catch (TimeoutException ex) {
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
|
Loading…
Reference in New Issue
Block a user