MAPREDUCE-2450. Fixed a corner case with interrupted communication threads leading to a long timeout in Task. Contributed by Rajesh Balamohan.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1232314 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c53c94ec46
commit
d05e6d2671
@ -503,6 +503,9 @@ Release 0.23.1 - Unreleased
|
||||
MAPREDUCE-3657. State machine visualize build fails. (Jason Lowe
|
||||
via mahadev)
|
||||
|
||||
MAPREDUCE-2450. Fixed a corner case with interrupted communication threads
|
||||
leading to a long timeout in Task. (Rajesh Balamohan via acmurthy)
|
||||
|
||||
Release 0.23.0 - 2011-11-01
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
@ -552,6 +552,8 @@ protected class TaskReporter
|
||||
private InputSplit split = null;
|
||||
private Progress taskProgress;
|
||||
private Thread pingThread = null;
|
||||
private boolean done = true;
|
||||
private Object lock = new Object();
|
||||
|
||||
/**
|
||||
* flag that indicates whether progress update needs to be sent to parent.
|
||||
@ -648,6 +650,9 @@ public void run() {
|
||||
// get current flag value and reset it as well
|
||||
boolean sendProgress = resetProgressFlag();
|
||||
while (!taskDone.get()) {
|
||||
synchronized (lock) {
|
||||
done = false;
|
||||
}
|
||||
try {
|
||||
boolean taskFound = true; // whether TT knows about this task
|
||||
// sleep for a bit
|
||||
@ -680,6 +685,7 @@ public void run() {
|
||||
// came back up), kill ourselves
|
||||
if (!taskFound) {
|
||||
LOG.warn("Parent died. Exiting "+taskId);
|
||||
resetDoneFlag();
|
||||
System.exit(66);
|
||||
}
|
||||
|
||||
@ -692,10 +698,19 @@ public void run() {
|
||||
if (remainingRetries == 0) {
|
||||
ReflectionUtils.logThreadInfo(LOG, "Communication exception", 0);
|
||||
LOG.warn("Last retry, killing "+taskId);
|
||||
resetDoneFlag();
|
||||
System.exit(65);
|
||||
}
|
||||
}
|
||||
}
|
||||
//Notify that we are done with the work
|
||||
resetDoneFlag();
|
||||
}
|
||||
void resetDoneFlag() {
|
||||
synchronized (lock) {
|
||||
done = true;
|
||||
lock.notify();
|
||||
}
|
||||
}
|
||||
public void startCommunicationThread() {
|
||||
if (pingThread == null) {
|
||||
@ -706,6 +721,11 @@ public void startCommunicationThread() {
|
||||
}
|
||||
public void stopCommunicationThread() throws InterruptedException {
|
||||
if (pingThread != null) {
|
||||
synchronized (lock) {
|
||||
while (!done) {
|
||||
lock.wait();
|
||||
}
|
||||
}
|
||||
pingThread.interrupt();
|
||||
pingThread.join();
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user