MAPREDUCE-3490. Fixed MapReduce AM to count failed maps also towards Reduce ramp up. Contributed by Sharad Agarwal and Arun C Murthy.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1227226 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1447274693
commit
55e94dc5ef
@ -389,6 +389,9 @@ Release 0.23.1 - Unreleased
|
|||||||
MAPREDUCE-3608. Fixed compile issue with MAPREDUCE-3522. (mahadev via
|
MAPREDUCE-3608. Fixed compile issue with MAPREDUCE-3522. (mahadev via
|
||||||
acmurthy)
|
acmurthy)
|
||||||
|
|
||||||
|
MAPREDUCE-3490. Fixed MapReduce AM to count failed maps also towards Reduce
|
||||||
|
ramp up. (Sharad Agarwal and Arun C Murthy via vinodkv)
|
||||||
|
|
||||||
Release 0.23.0 - 2011-11-01
|
Release 0.23.0 - 2011-11-01
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
@ -33,6 +33,7 @@
|
|||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.mapreduce.JobCounter;
|
import org.apache.hadoop.mapreduce.JobCounter;
|
||||||
import org.apache.hadoop.mapreduce.JobID;
|
import org.apache.hadoop.mapreduce.JobID;
|
||||||
@ -122,8 +123,6 @@ added to the pending and are ramped up (added to scheduled) based
|
|||||||
private boolean recalculateReduceSchedule = false;
|
private boolean recalculateReduceSchedule = false;
|
||||||
private int mapResourceReqt;//memory
|
private int mapResourceReqt;//memory
|
||||||
private int reduceResourceReqt;//memory
|
private int reduceResourceReqt;//memory
|
||||||
private int completedMaps = 0;
|
|
||||||
private int completedReduces = 0;
|
|
||||||
|
|
||||||
private boolean reduceStarted = false;
|
private boolean reduceStarted = false;
|
||||||
private float maxReduceRampupLimit = 0;
|
private float maxReduceRampupLimit = 0;
|
||||||
@ -169,7 +168,13 @@ protected synchronized void heartbeat() throws Exception {
|
|||||||
|
|
||||||
if (recalculateReduceSchedule) {
|
if (recalculateReduceSchedule) {
|
||||||
preemptReducesIfNeeded();
|
preemptReducesIfNeeded();
|
||||||
scheduleReduces();
|
scheduleReduces(
|
||||||
|
getJob().getTotalMaps(), getJob().getCompletedMaps(),
|
||||||
|
scheduledRequests.maps.size(), scheduledRequests.reduces.size(),
|
||||||
|
assignedRequests.maps.size(), assignedRequests.reduces.size(),
|
||||||
|
mapResourceReqt, reduceResourceReqt,
|
||||||
|
pendingReduces.size(),
|
||||||
|
maxReduceRampupLimit, reduceSlowStart);
|
||||||
recalculateReduceSchedule = false;
|
recalculateReduceSchedule = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -180,6 +185,14 @@ public void stop() {
|
|||||||
LOG.info("Final Stats: " + getStat());
|
LOG.info("Final Stats: " + getStat());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean getIsReduceStarted() {
|
||||||
|
return reduceStarted;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setIsReduceStarted(boolean reduceStarted) {
|
||||||
|
this.reduceStarted = reduceStarted;
|
||||||
|
}
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
@Override
|
@Override
|
||||||
public synchronized void handle(ContainerAllocatorEvent event) {
|
public synchronized void handle(ContainerAllocatorEvent event) {
|
||||||
@ -320,9 +333,16 @@ private void preemptReducesIfNeeded() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void scheduleReduces() {
|
@Private
|
||||||
|
public void scheduleReduces(
|
||||||
|
int totalMaps, int completedMaps,
|
||||||
|
int scheduledMaps, int scheduledReduces,
|
||||||
|
int assignedMaps, int assignedReduces,
|
||||||
|
int mapResourceReqt, int reduceResourceReqt,
|
||||||
|
int numPendingReduces,
|
||||||
|
float maxReduceRampupLimit, float reduceSlowStart) {
|
||||||
|
|
||||||
if (pendingReduces.size() == 0) {
|
if (numPendingReduces == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -330,29 +350,25 @@ private void scheduleReduces() {
|
|||||||
|
|
||||||
//if all maps are assigned, then ramp up all reduces irrespective of the
|
//if all maps are assigned, then ramp up all reduces irrespective of the
|
||||||
//headroom
|
//headroom
|
||||||
if (scheduledRequests.maps.size() == 0 && pendingReduces.size() > 0) {
|
if (scheduledMaps == 0 && numPendingReduces > 0) {
|
||||||
LOG.info("All maps assigned. Ramping up all remaining reduces:" + pendingReduces.size());
|
LOG.info("All maps assigned. " +
|
||||||
for (ContainerRequest req : pendingReduces) {
|
"Ramping up all remaining reduces:" + numPendingReduces);
|
||||||
scheduledRequests.addReduce(req);
|
scheduleAllReduces();
|
||||||
}
|
|
||||||
pendingReduces.clear();
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int totalMaps = assignedRequests.maps.size() + completedMaps + scheduledRequests.maps.size();
|
|
||||||
|
|
||||||
//check for slow start
|
//check for slow start
|
||||||
if (!reduceStarted) {//not set yet
|
if (!getIsReduceStarted()) {//not set yet
|
||||||
int completedMapsForReduceSlowstart = (int)Math.ceil(reduceSlowStart *
|
int completedMapsForReduceSlowstart = (int)Math.ceil(reduceSlowStart *
|
||||||
totalMaps);
|
totalMaps);
|
||||||
if(completedMaps < completedMapsForReduceSlowstart) {
|
if(completedMaps < completedMapsForReduceSlowstart) {
|
||||||
LOG.info("Reduce slow start threshold not met. " +
|
LOG.info("Reduce slow start threshold not met. " +
|
||||||
"completedMapsForReduceSlowstart " + completedMapsForReduceSlowstart);
|
"completedMapsForReduceSlowstart " +
|
||||||
|
completedMapsForReduceSlowstart);
|
||||||
return;
|
return;
|
||||||
} else {
|
} else {
|
||||||
LOG.info("Reduce slow start threshold reached. Scheduling reduces.");
|
LOG.info("Reduce slow start threshold reached. Scheduling reduces.");
|
||||||
reduceStarted = true;
|
setIsReduceStarted(true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -363,20 +379,21 @@ private void scheduleReduces() {
|
|||||||
completedMapPercent = 1;
|
completedMapPercent = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int netScheduledMapMem = scheduledRequests.maps.size() * mapResourceReqt
|
int netScheduledMapMem =
|
||||||
+ assignedRequests.maps.size() * mapResourceReqt;
|
(scheduledMaps + assignedMaps) * mapResourceReqt;
|
||||||
|
|
||||||
int netScheduledReduceMem = scheduledRequests.reduces.size()
|
int netScheduledReduceMem =
|
||||||
* reduceResourceReqt + assignedRequests.reduces.size()
|
(scheduledReduces + assignedReduces) * reduceResourceReqt;
|
||||||
* reduceResourceReqt;
|
|
||||||
|
|
||||||
int finalMapMemLimit = 0;
|
int finalMapMemLimit = 0;
|
||||||
int finalReduceMemLimit = 0;
|
int finalReduceMemLimit = 0;
|
||||||
|
|
||||||
// ramp up the reduces based on completed map percentage
|
// ramp up the reduces based on completed map percentage
|
||||||
int totalMemLimit = getMemLimit();
|
int totalMemLimit = getMemLimit();
|
||||||
int idealReduceMemLimit = Math.min((int)(completedMapPercent * totalMemLimit),
|
int idealReduceMemLimit =
|
||||||
(int) (maxReduceRampupLimit * totalMemLimit));
|
Math.min(
|
||||||
|
(int)(completedMapPercent * totalMemLimit),
|
||||||
|
(int) (maxReduceRampupLimit * totalMemLimit));
|
||||||
int idealMapMemLimit = totalMemLimit - idealReduceMemLimit;
|
int idealMapMemLimit = totalMemLimit - idealReduceMemLimit;
|
||||||
|
|
||||||
// check if there aren't enough maps scheduled, give the free map capacity
|
// check if there aren't enough maps scheduled, give the free map capacity
|
||||||
@ -397,26 +414,43 @@ private void scheduleReduces() {
|
|||||||
" netScheduledMapMem:" + netScheduledMapMem +
|
" netScheduledMapMem:" + netScheduledMapMem +
|
||||||
" netScheduledReduceMem:" + netScheduledReduceMem);
|
" netScheduledReduceMem:" + netScheduledReduceMem);
|
||||||
|
|
||||||
int rampUp = (finalReduceMemLimit - netScheduledReduceMem)
|
int rampUp =
|
||||||
/ reduceResourceReqt;
|
(finalReduceMemLimit - netScheduledReduceMem) / reduceResourceReqt;
|
||||||
|
|
||||||
if (rampUp > 0) {
|
if (rampUp > 0) {
|
||||||
rampUp = Math.min(rampUp, pendingReduces.size());
|
rampUp = Math.min(rampUp, numPendingReduces);
|
||||||
LOG.info("Ramping up " + rampUp);
|
LOG.info("Ramping up " + rampUp);
|
||||||
//more reduce to be scheduled
|
rampUpReduces(rampUp);
|
||||||
for (int i = 0; i < rampUp; i++) {
|
|
||||||
ContainerRequest request = pendingReduces.removeFirst();
|
|
||||||
scheduledRequests.addReduce(request);
|
|
||||||
}
|
|
||||||
} else if (rampUp < 0){
|
} else if (rampUp < 0){
|
||||||
int rampDown = -1 * rampUp;
|
int rampDown = -1 * rampUp;
|
||||||
rampDown = Math.min(rampDown, scheduledRequests.reduces.size());
|
rampDown = Math.min(rampDown, scheduledReduces);
|
||||||
LOG.info("Ramping down " + rampDown);
|
LOG.info("Ramping down " + rampDown);
|
||||||
//remove from the scheduled and move back to pending
|
rampDownReduces(rampDown);
|
||||||
for (int i = 0; i < rampDown; i++) {
|
}
|
||||||
ContainerRequest request = scheduledRequests.removeReduce();
|
}
|
||||||
pendingReduces.add(request);
|
|
||||||
}
|
private void scheduleAllReduces() {
|
||||||
|
for (ContainerRequest req : pendingReduces) {
|
||||||
|
scheduledRequests.addReduce(req);
|
||||||
|
}
|
||||||
|
pendingReduces.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Private
|
||||||
|
public void rampUpReduces(int rampUp) {
|
||||||
|
//more reduce to be scheduled
|
||||||
|
for (int i = 0; i < rampUp; i++) {
|
||||||
|
ContainerRequest request = pendingReduces.removeFirst();
|
||||||
|
scheduledRequests.addReduce(request);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Private
|
||||||
|
public void rampDownReduces(int rampDown) {
|
||||||
|
//remove from the scheduled and move back to pending
|
||||||
|
for (int i = 0; i < rampDown; i++) {
|
||||||
|
ContainerRequest request = scheduledRequests.removeReduce();
|
||||||
|
pendingReduces.add(request);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -429,8 +463,8 @@ private synchronized String getStat() {
|
|||||||
" ScheduledReduces:" + scheduledRequests.reduces.size() +
|
" ScheduledReduces:" + scheduledRequests.reduces.size() +
|
||||||
" AssignedMaps:" + assignedRequests.maps.size() +
|
" AssignedMaps:" + assignedRequests.maps.size() +
|
||||||
" AssignedReduces:" + assignedRequests.reduces.size() +
|
" AssignedReduces:" + assignedRequests.reduces.size() +
|
||||||
" completedMaps:" + completedMaps +
|
" completedMaps:" + getJob().getCompletedMaps() +
|
||||||
" completedReduces:" + completedReduces +
|
" completedReduces:" + getJob().getCompletedReduces() +
|
||||||
" containersAllocated:" + containersAllocated +
|
" containersAllocated:" + containersAllocated +
|
||||||
" containersReleased:" + containersReleased +
|
" containersReleased:" + containersReleased +
|
||||||
" hostLocalAssigned:" + hostLocalAssigned +
|
" hostLocalAssigned:" + hostLocalAssigned +
|
||||||
@ -497,11 +531,7 @@ private List<Container> getResources() throws Exception {
|
|||||||
+ cont.getContainerId());
|
+ cont.getContainerId());
|
||||||
} else {
|
} else {
|
||||||
assignedRequests.remove(attemptID);
|
assignedRequests.remove(attemptID);
|
||||||
if (attemptID.getTaskId().getTaskType().equals(TaskType.MAP)) {
|
|
||||||
completedMaps++;
|
|
||||||
} else {
|
|
||||||
completedReduces++;
|
|
||||||
}
|
|
||||||
// send the container completed event to Task attempt
|
// send the container completed event to Task attempt
|
||||||
eventHandler.handle(new TaskAttemptEvent(attemptID,
|
eventHandler.handle(new TaskAttemptEvent(attemptID,
|
||||||
TaskAttemptEventType.TA_CONTAINER_COMPLETED));
|
TaskAttemptEventType.TA_CONTAINER_COMPLETED));
|
||||||
@ -514,7 +544,8 @@ private List<Container> getResources() throws Exception {
|
|||||||
return newContainers;
|
return newContainers;
|
||||||
}
|
}
|
||||||
|
|
||||||
private int getMemLimit() {
|
@Private
|
||||||
|
public int getMemLimit() {
|
||||||
int headRoom = getAvailableResources() != null ? getAvailableResources().getMemory() : 0;
|
int headRoom = getAvailableResources() != null ? getAvailableResources().getMemory() : 0;
|
||||||
return headRoom + assignedRequests.maps.size() * mapResourceReqt +
|
return headRoom + assignedRequests.maps.size() * mapResourceReqt +
|
||||||
assignedRequests.reduces.size() * reduceResourceReqt;
|
assignedRequests.reduces.size() * reduceResourceReqt;
|
||||||
|
@ -19,8 +19,7 @@
|
|||||||
package org.apache.hadoop.mapreduce.v2.app;
|
package org.apache.hadoop.mapreduce.v2.app;
|
||||||
|
|
||||||
import static org.mockito.Matchers.isA;
|
import static org.mockito.Matchers.isA;
|
||||||
import static org.mockito.Mockito.mock;
|
import static org.mockito.Mockito.*;
|
||||||
import static org.mockito.Mockito.when;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
@ -1218,6 +1217,70 @@ protected void startAllocatorThread() {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testReduceScheduling() throws Exception {
|
||||||
|
int totalMaps = 10;
|
||||||
|
int succeededMaps = 1;
|
||||||
|
int scheduledMaps = 10;
|
||||||
|
int scheduledReduces = 0;
|
||||||
|
int assignedMaps = 2;
|
||||||
|
int assignedReduces = 0;
|
||||||
|
int mapResourceReqt = 1024;
|
||||||
|
int reduceResourceReqt = 2*1024;
|
||||||
|
int numPendingReduces = 4;
|
||||||
|
float maxReduceRampupLimit = 0.5f;
|
||||||
|
float reduceSlowStart = 0.2f;
|
||||||
|
|
||||||
|
RMContainerAllocator allocator = mock(RMContainerAllocator.class);
|
||||||
|
doCallRealMethod().when(allocator).
|
||||||
|
scheduleReduces(anyInt(), anyInt(), anyInt(), anyInt(), anyInt(),
|
||||||
|
anyInt(), anyInt(), anyInt(), anyInt(), anyFloat(), anyFloat());
|
||||||
|
|
||||||
|
// Test slow-start
|
||||||
|
allocator.scheduleReduces(
|
||||||
|
totalMaps, succeededMaps,
|
||||||
|
scheduledMaps, scheduledReduces,
|
||||||
|
assignedMaps, assignedReduces,
|
||||||
|
mapResourceReqt, reduceResourceReqt,
|
||||||
|
numPendingReduces,
|
||||||
|
maxReduceRampupLimit, reduceSlowStart);
|
||||||
|
verify(allocator, never()).setIsReduceStarted(true);
|
||||||
|
|
||||||
|
succeededMaps = 3;
|
||||||
|
allocator.scheduleReduces(
|
||||||
|
totalMaps, succeededMaps,
|
||||||
|
scheduledMaps, scheduledReduces,
|
||||||
|
assignedMaps, assignedReduces,
|
||||||
|
mapResourceReqt, reduceResourceReqt,
|
||||||
|
numPendingReduces,
|
||||||
|
maxReduceRampupLimit, reduceSlowStart);
|
||||||
|
verify(allocator, times(1)).setIsReduceStarted(true);
|
||||||
|
|
||||||
|
// Test reduce ramp-up
|
||||||
|
doReturn(100 * 1024).when(allocator).getMemLimit();
|
||||||
|
allocator.scheduleReduces(
|
||||||
|
totalMaps, succeededMaps,
|
||||||
|
scheduledMaps, scheduledReduces,
|
||||||
|
assignedMaps, assignedReduces,
|
||||||
|
mapResourceReqt, reduceResourceReqt,
|
||||||
|
numPendingReduces,
|
||||||
|
maxReduceRampupLimit, reduceSlowStart);
|
||||||
|
verify(allocator).rampUpReduces(anyInt());
|
||||||
|
verify(allocator, never()).rampDownReduces(anyInt());
|
||||||
|
|
||||||
|
// Test reduce ramp-down
|
||||||
|
scheduledReduces = 3;
|
||||||
|
doReturn(10 * 1024).when(allocator).getMemLimit();
|
||||||
|
allocator.scheduleReduces(
|
||||||
|
totalMaps, succeededMaps,
|
||||||
|
scheduledMaps, scheduledReduces,
|
||||||
|
assignedMaps, assignedReduces,
|
||||||
|
mapResourceReqt, reduceResourceReqt,
|
||||||
|
numPendingReduces,
|
||||||
|
maxReduceRampupLimit, reduceSlowStart);
|
||||||
|
verify(allocator).rampDownReduces(anyInt());
|
||||||
|
}
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
TestRMContainerAllocator t = new TestRMContainerAllocator();
|
TestRMContainerAllocator t = new TestRMContainerAllocator();
|
||||||
t.testSimple();
|
t.testSimple();
|
||||||
|
Loading…
Reference in New Issue
Block a user