From 890e14c02a612c772cecd5dff2411060efd418a3 Mon Sep 17 00:00:00 2001 From: Arun Suresh Date: Fri, 28 Jul 2017 16:32:43 -0700 Subject: [PATCH] YARN-6870. Fix floating point inaccuracies in resource availability check in AllocationBasedResourceUtilizationTracker. (Brook Zhou via asuresh) --- ...cationBasedResourceUtilizationTracker.java | 31 ++++++- .../scheduler/ContainerScheduler.java | 5 +- ...cationBasedResourceUtilizationTracker.java | 93 +++++++++++++++++++ 3 files changed, 123 insertions(+), 6 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestAllocationBasedResourceUtilizationTracker.java diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/AllocationBasedResourceUtilizationTracker.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/AllocationBasedResourceUtilizationTracker.java index 9839aeb6bb..6e2b617472 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/AllocationBasedResourceUtilizationTracker.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/AllocationBasedResourceUtilizationTracker.java @@ -118,19 +118,40 @@ private boolean hasResourcesAvailable(long pMemBytes, long vMemBytes, return false; } - float vCores = (float) cpuVcores / - getContainersMonitor().getVCoresAllocatedForContainers(); if (LOG.isDebugEnabled()) { LOG.debug("before cpuCheck [asked={} > allowed={}]", - this.containersAllocation.getCPU(), vCores); + this.containersAllocation.getCPU(), + getContainersMonitor().getVCoresAllocatedForContainers()); } - // Check CPU. - if (this.containersAllocation.getCPU() + vCores > 1.0f) { + // Check CPU. Compare using integral values of cores to avoid decimal + // inaccuracies. + if (!hasEnoughCpu(this.containersAllocation.getCPU(), + getContainersMonitor().getVCoresAllocatedForContainers(), cpuVcores)) { return false; } return true; } + /** + * Returns whether there is enough space for coresRequested in totalCores. + * Converts currentAllocation usage to nearest integer count before comparing, + * as floats are inherently imprecise. NOTE: this calculation assumes that + * requested core counts must be integers, and currentAllocation core count + * must also be an integer. + * + * @param currentAllocation The current allocation, a float value from 0 to 1. + * @param totalCores The total cores in the system. + * @param coresRequested The number of cores requested. + * @return True if currentAllocationtotalCores*coresRequested <= + * totalCores. + */ + public boolean hasEnoughCpu(float currentAllocation, long totalCores, + int coresRequested) { + // Must not cast here, as it would truncate the decimal digits. + return Math.round(currentAllocation * totalCores) + + coresRequested <= totalCores; + } + public ContainersMonitor getContainersMonitor() { return this.scheduler.getContainersMonitor(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerScheduler.java index 19243acd66..c119bf28f9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerScheduler.java @@ -392,7 +392,10 @@ private boolean hasSufficientResources( ResourceUtilization resourcesToFreeUp) { return resourcesToFreeUp.getPhysicalMemory() <= 0 && resourcesToFreeUp.getVirtualMemory() <= 0 && - resourcesToFreeUp.getCPU() <= 0.0f; + // Convert the number of cores to nearest integral number, due to + // imprecision of direct float comparison. + Math.round(resourcesToFreeUp.getCPU() + * getContainersMonitor().getVCoresAllocatedForContainers()) <= 0; } private ResourceUtilization resourcesToFreeUp( diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestAllocationBasedResourceUtilizationTracker.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestAllocationBasedResourceUtilizationTracker.java new file mode 100644 index 0000000000..82c21473c9 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestAllocationBasedResourceUtilizationTracker.java @@ -0,0 +1,93 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.event.AsyncDispatcher; +import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; +import org.apache.hadoop.yarn.server.nodemanager.Context; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitor; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +/** + * Tests for the {@link AllocationBasedResourceUtilizationTracker} class. + */ +public class TestAllocationBasedResourceUtilizationTracker { + + private ContainerScheduler mockContainerScheduler; + + @Before + public void setup() { + mockContainerScheduler = mock(ContainerScheduler.class); + ContainersMonitor containersMonitor = + new ContainersMonitorImpl(mock(ContainerExecutor.class), + mock(AsyncDispatcher.class), mock(Context.class)); + YarnConfiguration conf = new YarnConfiguration(); + conf.setInt(YarnConfiguration.NM_PMEM_MB, 1024); + conf.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, true); + conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, true); + conf.setFloat(YarnConfiguration.NM_VMEM_PMEM_RATIO, 2.0f); + conf.setInt(YarnConfiguration.NM_VCORES, 8); + containersMonitor.init(conf); + when(mockContainerScheduler.getContainersMonitor()) + .thenReturn(containersMonitor); + } + + /** + * Node has capacity for 1024 MB and 8 cores. Saturate the node. When full the + * hasResourceAvailable should return false. + */ + @Test + public void testHasResourcesAvailable() { + AllocationBasedResourceUtilizationTracker tracker = + new AllocationBasedResourceUtilizationTracker(mockContainerScheduler); + Container testContainer = mock(Container.class); + when(testContainer.getResource()).thenReturn(Resource.newInstance(512, 4)); + for (int i = 0; i < 2; i++) { + Assert.assertTrue(tracker.hasResourcesAvailable(testContainer)); + tracker.addContainerResources(testContainer); + } + Assert.assertFalse(tracker.hasResourcesAvailable(testContainer)); + } + + /** + * Test the case where the current allocation has been truncated to 0.8888891 + * (8/9 cores used). Request 1 additional core - hasEnoughCpu should return + * true. + */ + @Test + public void testHasEnoughCpu() { + AllocationBasedResourceUtilizationTracker tracker = + new AllocationBasedResourceUtilizationTracker(mockContainerScheduler); + float currentAllocation = 0.8888891f; + long totalCores = 9; + int alreadyUsedCores = 8; + Assert.assertTrue(tracker.hasEnoughCpu(currentAllocation, totalCores, + (int) totalCores - alreadyUsedCores)); + Assert.assertFalse(tracker.hasEnoughCpu(currentAllocation, totalCores, + (int) totalCores - alreadyUsedCores + 1)); + } +}