YARN-3790. usedResource from rootQueue metrics may get stale data for FS scheduler after recovering the container (Zhihai Xu via rohithsharmaks)
This commit is contained in:
parent
8d58512d6e
commit
dd4b387d96
@ -705,6 +705,9 @@ Release 2.7.1 - UNRELEASED
|
||||
YARN-3832. Resource Localization fails on a cluster due to existing cache
|
||||
directories (Brahma Reddy Battula via jlowe)
|
||||
|
||||
YARN-3790. usedResource from rootQueue metrics may get stale data for FS
|
||||
scheduler after recovering the container (Zhihai Xu via rohithsharmaks)
|
||||
|
||||
Release 2.7.0 - 2015-04-20
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
@ -45,6 +45,7 @@
|
||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
||||
import org.apache.hadoop.yarn.proto.YarnServiceProtos.SchedulerResourceTypes;
|
||||
import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.reservation.ReservationConstants;
|
||||
@ -841,11 +842,11 @@ protected synchronized void completedContainer(RMContainer rmContainer,
|
||||
+ " with event: " + event);
|
||||
}
|
||||
|
||||
private synchronized void addNode(RMNode node) {
|
||||
private synchronized void addNode(List<NMContainerStatus> containerReports,
|
||||
RMNode node) {
|
||||
FSSchedulerNode schedulerNode = new FSSchedulerNode(node, usePortForNodeName);
|
||||
nodes.put(node.getNodeID(), schedulerNode);
|
||||
Resources.addTo(clusterResource, node.getTotalCapability());
|
||||
updateRootQueueMetrics();
|
||||
updateMaximumAllocation(schedulerNode, true);
|
||||
|
||||
triggerUpdate();
|
||||
@ -854,6 +855,9 @@ private synchronized void addNode(RMNode node) {
|
||||
queueMgr.getRootQueue().recomputeSteadyShares();
|
||||
LOG.info("Added node " + node.getNodeAddress() +
|
||||
" cluster capacity: " + clusterResource);
|
||||
|
||||
recoverContainersOnNode(containerReports, node);
|
||||
updateRootQueueMetrics();
|
||||
}
|
||||
|
||||
private synchronized void removeNode(RMNode rmNode) {
|
||||
@ -1147,8 +1151,7 @@ public void handle(SchedulerEvent event) {
|
||||
throw new RuntimeException("Unexpected event type: " + event);
|
||||
}
|
||||
NodeAddedSchedulerEvent nodeAddedEvent = (NodeAddedSchedulerEvent)event;
|
||||
addNode(nodeAddedEvent.getAddedRMNode());
|
||||
recoverContainersOnNode(nodeAddedEvent.getContainerReports(),
|
||||
addNode(nodeAddedEvent.getContainerReports(),
|
||||
nodeAddedEvent.getAddedRMNode());
|
||||
break;
|
||||
case NODE_REMOVED:
|
||||
|
Loading…
Reference in New Issue
Block a user