MAPREDUCE-6797. Job history server scans can become blocked on a single, slow entry. Contributed by Prabhu Joseph

This commit is contained in:
Jason Lowe 2016-11-14 20:20:50 +00:00
parent ff0b99eafe
commit 99c2bbd337

View File

@ -231,38 +231,36 @@ public HistoryFileInfo addIfAbsent(HistoryFileInfo fileInfo) {
JobId firstMoveFailedKey = null; JobId firstMoveFailedKey = null;
int moveFailedCount = 0; int moveFailedCount = 0;
while(cache.size() > maxSize && keys.hasNext()) { while (cache.size() > maxSize && keys.hasNext()) {
JobId key = keys.next(); JobId key = keys.next();
HistoryFileInfo firstValue = cache.get(key); HistoryFileInfo firstValue = cache.get(key);
if(firstValue != null) { if (firstValue != null) {
synchronized(firstValue) { if (firstValue.isMovePending()) {
if (firstValue.isMovePending()) { if (firstValue.didMoveFail() &&
if(firstValue.didMoveFail() && firstValue.jobIndexInfo.getFinishTime() <= cutoff) {
firstValue.jobIndexInfo.getFinishTime() <= cutoff) { cache.remove(key);
cache.remove(key); // Now lets try to delete it
//Now lets try to delete it try {
try { firstValue.delete();
firstValue.delete(); } catch (IOException e) {
} catch (IOException e) { LOG.error("Error while trying to delete history files" +
LOG.error("Error while trying to delete history files" + " that could not be moved to done.", e);
" that could not be moved to done.", e);
}
} else {
if (firstValue.didMoveFail()) {
if (moveFailedCount == 0) {
firstMoveFailedKey = key;
}
moveFailedCount += 1;
} else {
if (inIntermediateCount == 0) {
firstInIntermediateKey = key;
}
inIntermediateCount += 1;
}
} }
} else { } else {
cache.remove(key); if (firstValue.didMoveFail()) {
if (moveFailedCount == 0) {
firstMoveFailedKey = key;
}
moveFailedCount += 1;
} else {
if (inIntermediateCount == 0) {
firstInIntermediateKey = key;
}
inIntermediateCount += 1;
}
} }
} else {
cache.remove(key);
} }
} }
} }