YARN-6870. Fix floating point inaccuracies in resource availability check in AllocationBasedResourceUtilizationTracker. (Brook Zhou via asuresh)

This commit is contained in:
Arun Suresh 2017-07-28 16:32:43 -07:00
parent bcde66bed1
commit 890e14c02a
3 changed files with 123 additions and 6 deletions

View File

@ -118,19 +118,40 @@ private boolean hasResourcesAvailable(long pMemBytes, long vMemBytes,
return false; return false;
} }
float vCores = (float) cpuVcores /
getContainersMonitor().getVCoresAllocatedForContainers();
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
LOG.debug("before cpuCheck [asked={} > allowed={}]", LOG.debug("before cpuCheck [asked={} > allowed={}]",
this.containersAllocation.getCPU(), vCores); this.containersAllocation.getCPU(),
getContainersMonitor().getVCoresAllocatedForContainers());
} }
// Check CPU. // Check CPU. Compare using integral values of cores to avoid decimal
if (this.containersAllocation.getCPU() + vCores > 1.0f) { // inaccuracies.
if (!hasEnoughCpu(this.containersAllocation.getCPU(),
getContainersMonitor().getVCoresAllocatedForContainers(), cpuVcores)) {
return false; return false;
} }
return true; return true;
} }
/**
* Returns whether there is enough space for coresRequested in totalCores.
* Converts currentAllocation usage to nearest integer count before comparing,
* as floats are inherently imprecise. NOTE: this calculation assumes that
* requested core counts must be integers, and currentAllocation core count
* must also be an integer.
*
* @param currentAllocation The current allocation, a float value from 0 to 1.
* @param totalCores The total cores in the system.
* @param coresRequested The number of cores requested.
* @return True if currentAllocationtotalCores*coresRequested <=
* totalCores.
*/
public boolean hasEnoughCpu(float currentAllocation, long totalCores,
int coresRequested) {
// Must not cast here, as it would truncate the decimal digits.
return Math.round(currentAllocation * totalCores)
+ coresRequested <= totalCores;
}
public ContainersMonitor getContainersMonitor() { public ContainersMonitor getContainersMonitor() {
return this.scheduler.getContainersMonitor(); return this.scheduler.getContainersMonitor();
} }

View File

@ -392,7 +392,10 @@ private boolean hasSufficientResources(
ResourceUtilization resourcesToFreeUp) { ResourceUtilization resourcesToFreeUp) {
return resourcesToFreeUp.getPhysicalMemory() <= 0 && return resourcesToFreeUp.getPhysicalMemory() <= 0 &&
resourcesToFreeUp.getVirtualMemory() <= 0 && resourcesToFreeUp.getVirtualMemory() <= 0 &&
resourcesToFreeUp.getCPU() <= 0.0f; // Convert the number of cores to nearest integral number, due to
// imprecision of direct float comparison.
Math.round(resourcesToFreeUp.getCPU()
* getContainersMonitor().getVCoresAllocatedForContainers()) <= 0;
} }
private ResourceUtilization resourcesToFreeUp( private ResourceUtilization resourcesToFreeUp(

View File

@ -0,0 +1,93 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.AsyncDispatcher;
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
import org.apache.hadoop.yarn.server.nodemanager.Context;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitor;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
/**
* Tests for the {@link AllocationBasedResourceUtilizationTracker} class.
*/
public class TestAllocationBasedResourceUtilizationTracker {
private ContainerScheduler mockContainerScheduler;
@Before
public void setup() {
mockContainerScheduler = mock(ContainerScheduler.class);
ContainersMonitor containersMonitor =
new ContainersMonitorImpl(mock(ContainerExecutor.class),
mock(AsyncDispatcher.class), mock(Context.class));
YarnConfiguration conf = new YarnConfiguration();
conf.setInt(YarnConfiguration.NM_PMEM_MB, 1024);
conf.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, true);
conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, true);
conf.setFloat(YarnConfiguration.NM_VMEM_PMEM_RATIO, 2.0f);
conf.setInt(YarnConfiguration.NM_VCORES, 8);
containersMonitor.init(conf);
when(mockContainerScheduler.getContainersMonitor())
.thenReturn(containersMonitor);
}
/**
* Node has capacity for 1024 MB and 8 cores. Saturate the node. When full the
* hasResourceAvailable should return false.
*/
@Test
public void testHasResourcesAvailable() {
AllocationBasedResourceUtilizationTracker tracker =
new AllocationBasedResourceUtilizationTracker(mockContainerScheduler);
Container testContainer = mock(Container.class);
when(testContainer.getResource()).thenReturn(Resource.newInstance(512, 4));
for (int i = 0; i < 2; i++) {
Assert.assertTrue(tracker.hasResourcesAvailable(testContainer));
tracker.addContainerResources(testContainer);
}
Assert.assertFalse(tracker.hasResourcesAvailable(testContainer));
}
/**
* Test the case where the current allocation has been truncated to 0.8888891
* (8/9 cores used). Request 1 additional core - hasEnoughCpu should return
* true.
*/
@Test
public void testHasEnoughCpu() {
AllocationBasedResourceUtilizationTracker tracker =
new AllocationBasedResourceUtilizationTracker(mockContainerScheduler);
float currentAllocation = 0.8888891f;
long totalCores = 9;
int alreadyUsedCores = 8;
Assert.assertTrue(tracker.hasEnoughCpu(currentAllocation, totalCores,
(int) totalCores - alreadyUsedCores));
Assert.assertFalse(tracker.hasEnoughCpu(currentAllocation, totalCores,
(int) totalCores - alreadyUsedCores + 1));
}
}