YARN-11100. Fix StackOverflowError in SLS scheduler event handling. Contributed by Szilard Nemeth.
This commit is contained in:
parent
61e809b245
commit
adbaf48082
@ -175,11 +175,12 @@ public void setConf(Configuration conf) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private void init(Configuration tempConf) throws ClassNotFoundException {
|
private void init(Configuration tempConf) throws ClassNotFoundException {
|
||||||
|
// runner configuration
|
||||||
|
setConf(tempConf);
|
||||||
|
|
||||||
nmMap = new ConcurrentHashMap<>();
|
nmMap = new ConcurrentHashMap<>();
|
||||||
queueAppNumMap = new HashMap<>();
|
queueAppNumMap = new HashMap<>();
|
||||||
amRunner = new AMRunner(runner, this);
|
amRunner = new AMRunner(runner, this);
|
||||||
// runner configuration
|
|
||||||
setConf(tempConf);
|
|
||||||
|
|
||||||
// runner
|
// runner
|
||||||
poolSize = tempConf.getInt(SLSConfiguration.RUNNER_POOL_SIZE,
|
poolSize = tempConf.getInt(SLSConfiguration.RUNNER_POOL_SIZE,
|
||||||
|
@ -39,7 +39,7 @@
|
|||||||
@Private
|
@Private
|
||||||
@Unstable
|
@Unstable
|
||||||
public class SLSCapacityScheduler extends CapacityScheduler implements
|
public class SLSCapacityScheduler extends CapacityScheduler implements
|
||||||
SchedulerWrapper,Configurable {
|
SchedulerWrapper, Configurable {
|
||||||
|
|
||||||
private final SLSSchedulerCommons schedulerCommons;
|
private final SLSSchedulerCommons schedulerCommons;
|
||||||
private Configuration conf;
|
private Configuration conf;
|
||||||
@ -65,6 +65,15 @@ public Allocation allocate(ApplicationAttemptId attemptId,
|
|||||||
containerIds, blacklistAdditions, blacklistRemovals, updateRequests);
|
containerIds, blacklistAdditions, blacklistRemovals, updateRequests);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Allocation allocatePropagated(ApplicationAttemptId attemptId,
|
||||||
|
List<ResourceRequest> resourceRequests,
|
||||||
|
List<SchedulingRequest> schedulingRequests,
|
||||||
|
List<ContainerId> containerIds, List<String> blacklistAdditions,
|
||||||
|
List<String> blacklistRemovals, ContainerUpdates updateRequests) {
|
||||||
|
return super.allocate(attemptId, resourceRequests, schedulingRequests,
|
||||||
|
containerIds, blacklistAdditions, blacklistRemovals, updateRequests);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean tryCommit(Resource cluster, ResourceCommitRequest r,
|
public boolean tryCommit(Resource cluster, ResourceCommitRequest r,
|
||||||
@ -97,6 +106,11 @@ public void handle(SchedulerEvent schedulerEvent) {
|
|||||||
schedulerCommons.handle(schedulerEvent);
|
schedulerCommons.handle(schedulerEvent);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void propagatedHandle(SchedulerEvent schedulerEvent) {
|
||||||
|
super.handle(schedulerEvent);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void serviceStop() throws Exception {
|
public void serviceStop() throws Exception {
|
||||||
schedulerCommons.stopMetrics();
|
schedulerCommons.stopMetrics();
|
||||||
|
@ -63,6 +63,21 @@ public void handle(SchedulerEvent schedulerEvent) {
|
|||||||
schedulerCommons.handle(schedulerEvent);
|
schedulerCommons.handle(schedulerEvent);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void propagatedHandle(SchedulerEvent schedulerEvent) {
|
||||||
|
super.handle(schedulerEvent);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Allocation allocatePropagated(ApplicationAttemptId attemptId,
|
||||||
|
List<ResourceRequest> resourceRequests,
|
||||||
|
List<SchedulingRequest> schedulingRequests,
|
||||||
|
List<ContainerId> containerIds, List<String> blacklistAdditions,
|
||||||
|
List<String> blacklistRemovals, ContainerUpdates updateRequests) {
|
||||||
|
return super.allocate(attemptId, resourceRequests, schedulingRequests,
|
||||||
|
containerIds, blacklistAdditions, blacklistRemovals, updateRequests);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void serviceStop() throws Exception {
|
public void serviceStop() throws Exception {
|
||||||
schedulerCommons.stopMetrics();
|
schedulerCommons.stopMetrics();
|
||||||
|
@ -100,7 +100,8 @@ public Allocation allocate(ApplicationAttemptId attemptId,
|
|||||||
.time();
|
.time();
|
||||||
Allocation allocation = null;
|
Allocation allocation = null;
|
||||||
try {
|
try {
|
||||||
allocation = scheduler.allocate(attemptId, resourceRequests,
|
allocation = ((SchedulerWrapper)scheduler).allocatePropagated(
|
||||||
|
attemptId, resourceRequests,
|
||||||
schedulingRequests, containerIds,
|
schedulingRequests, containerIds,
|
||||||
blacklistAdditions, blacklistRemovals, updateRequests);
|
blacklistAdditions, blacklistRemovals, updateRequests);
|
||||||
return allocation;
|
return allocation;
|
||||||
@ -118,7 +119,8 @@ public Allocation allocate(ApplicationAttemptId attemptId,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
return scheduler.allocate(attemptId, resourceRequests, schedulingRequests,
|
return ((SchedulerWrapper)scheduler).allocatePropagated(
|
||||||
|
attemptId, resourceRequests, schedulingRequests,
|
||||||
containerIds,
|
containerIds,
|
||||||
blacklistAdditions, blacklistRemovals, updateRequests);
|
blacklistAdditions, blacklistRemovals, updateRequests);
|
||||||
}
|
}
|
||||||
@ -204,7 +206,7 @@ private void updateQueueWithAllocateRequest(Allocation allocation,
|
|||||||
|
|
||||||
public void handle(SchedulerEvent schedulerEvent) {
|
public void handle(SchedulerEvent schedulerEvent) {
|
||||||
if (!metricsON) {
|
if (!metricsON) {
|
||||||
scheduler.handle(schedulerEvent);
|
((SchedulerWrapper)scheduler).propagatedHandle(schedulerEvent);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -245,7 +247,7 @@ public void handle(SchedulerEvent schedulerEvent) {
|
|||||||
operationTimer = schedulerMetrics.getSchedulerHandleTimer(
|
operationTimer = schedulerMetrics.getSchedulerHandleTimer(
|
||||||
schedulerEvent.getType()).time();
|
schedulerEvent.getType()).time();
|
||||||
|
|
||||||
scheduler.handle(schedulerEvent);
|
((SchedulerWrapper)scheduler).propagatedHandle(schedulerEvent);
|
||||||
} finally {
|
} finally {
|
||||||
if (handlerTimer != null) {
|
if (handlerTimer != null) {
|
||||||
handlerTimer.stop();
|
handlerTimer.stop();
|
||||||
|
@ -19,7 +19,16 @@
|
|||||||
|
|
||||||
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
||||||
import org.apache.hadoop.classification.InterfaceStability.Unstable;
|
import org.apache.hadoop.classification.InterfaceStability.Unstable;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||||
|
import org.apache.hadoop.yarn.api.records.SchedulingRequest;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ContainerUpdates;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
@Private
|
@Private
|
||||||
@Unstable
|
@Unstable
|
||||||
@ -29,4 +38,14 @@ public interface SchedulerWrapper {
|
|||||||
Tracker getTracker();
|
Tracker getTracker();
|
||||||
|
|
||||||
String getRealQueueName(String queue) throws YarnException;
|
String getRealQueueName(String queue) throws YarnException;
|
||||||
|
|
||||||
|
void propagatedHandle(SchedulerEvent schedulerEvent);
|
||||||
|
|
||||||
|
Allocation allocatePropagated(ApplicationAttemptId attemptId,
|
||||||
|
List<ResourceRequest> resourceRequests,
|
||||||
|
List<SchedulingRequest> schedulingRequests,
|
||||||
|
List<ContainerId> containerIds,
|
||||||
|
List<String> blacklistAdditions,
|
||||||
|
List<String> blacklistRemovals,
|
||||||
|
ContainerUpdates updateRequests);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user