YARN-11732. Fix potential NPE when calling SchedulerNode#reservedContainer for CapacityScheduler (#7065). Contributed by Tao Yang.

Reviewed-by: Syed Shameerur Rahman <syedthameem1@gmail.com>
Signed-off-by: He Xiaoqiao <hexiaoqiao@apache.org>
This commit is contained in:
Tao Yang 2024-10-16 21:11:31 +08:00 committed by GitHub
parent 78a08b3b78
commit c63aafd7d1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 12 additions and 9 deletions

View File

@ -170,6 +170,9 @@ private NodeForPreemption getPreemptionCandidatesOnNode(
Map<ApplicationAttemptId, Set<RMContainer>> selectedCandidates, Map<ApplicationAttemptId, Set<RMContainer>> selectedCandidates,
Resource totalPreemptionAllowed, boolean readOnly) { Resource totalPreemptionAllowed, boolean readOnly) {
RMContainer reservedContainer = node.getReservedContainer(); RMContainer reservedContainer = node.getReservedContainer();
if (reservedContainer == null) {
return null;
}
Resource available = Resources.clone(node.getUnallocatedResource()); Resource available = Resources.clone(node.getUnallocatedResource());
Resource totalSelected = Resources.createResource(0); Resource totalSelected = Resources.createResource(0);
List<RMContainer> sortedRunningContainers = List<RMContainer> sortedRunningContainers =

View File

@ -876,10 +876,9 @@ private void completeOustandingUpdatesWhichAreReserved(
RMContainer rmContainer, ContainerStatus containerStatus, RMContainer rmContainer, ContainerStatus containerStatus,
RMContainerEventType event) { RMContainerEventType event) {
N schedulerNode = getSchedulerNode(rmContainer.getNodeId()); N schedulerNode = getSchedulerNode(rmContainer.getNodeId());
if (schedulerNode != null && if (schedulerNode != null) {
schedulerNode.getReservedContainer() != null) {
RMContainer resContainer = schedulerNode.getReservedContainer(); RMContainer resContainer = schedulerNode.getReservedContainer();
if (resContainer.getReservedSchedulerKey() != null) { if (resContainer != null && resContainer.getReservedSchedulerKey() != null) {
ContainerId containerToUpdate = resContainer ContainerId containerToUpdate = resContainer
.getReservedSchedulerKey().getContainerToUpdate(); .getReservedSchedulerKey().getContainerToUpdate();
if (containerToUpdate != null && if (containerToUpdate != null &&

View File

@ -858,12 +858,13 @@ private ContainerAllocation allocate(Resource clusterResource,
FiCaSchedulerNode node = iter.next(); FiCaSchedulerNode node = iter.next();
// Do not schedule if there are any reservations to fulfill on the node // Do not schedule if there are any reservations to fulfill on the node
RMContainer nodeReservedContainer = node.getReservedContainer();
if (iter.hasNext() && if (iter.hasNext() &&
node.getReservedContainer() != null && nodeReservedContainer != null &&
isSkipAllocateOnNodesWithReservedContainer()) { isSkipAllocateOnNodesWithReservedContainer()) {
LOG.debug("Skipping scheduling on node {} since it has already been" LOG.debug("Skipping scheduling on node {} since it has already been"
+ " reserved by {}", node.getNodeID(), + " reserved by {}", node.getNodeID(),
node.getReservedContainer().getContainerId()); nodeReservedContainer.getContainerId());
ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(
activitiesManager, node, application, schedulerKey, activitiesManager, node, application, schedulerKey,
ActivityDiagnosticConstant.NODE_HAS_BEEN_RESERVED, ActivityLevel.NODE); ActivityDiagnosticConstant.NODE_HAS_BEEN_RESERVED, ActivityLevel.NODE);

View File

@ -520,13 +520,13 @@ public boolean accept(Resource cluster,
// When reserve a resource (state == NEW is for new container, // When reserve a resource (state == NEW is for new container,
// state == RUNNING is for increase container). // state == RUNNING is for increase container).
// Just check if the node is not already reserved by someone // Just check if the node is not already reserved by someone
if (schedulerContainer.getSchedulerNode().getReservedContainer() RMContainer reservedContainer =
!= null) { schedulerContainer.getSchedulerNode().getReservedContainer();
if (reservedContainer != null) {
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
LOG.debug("Try to reserve a container, but the node is " LOG.debug("Try to reserve a container, but the node is "
+ "already reserved by another container=" + "already reserved by another container="
+ schedulerContainer.getSchedulerNode() + reservedContainer.getContainerId());
.getReservedContainer().getContainerId());
} }
return false; return false;
} }