From 61b0c2bb7c0f18c4a666b96ca1603cbd4d27eb6d Mon Sep 17 00:00:00 2001 From: Szilard Nemeth Date: Fri, 12 Jul 2019 17:28:14 +0200 Subject: [PATCH] YARN-9337. GPU auto-discovery script runs even when the resource is given by hand. Contributed by Adam Antal --- .../resourceplugin/gpu/GpuDiscoverer.java | 58 +++++++++++-------- .../resourceplugin/gpu/TestGpuDiscoverer.java | 19 +++++- 2 files changed, 52 insertions(+), 25 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java index 0c55478d59..b52d767c61 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java @@ -69,6 +69,8 @@ public class GpuDiscoverer { private int numOfErrorExecutionSinceLastSucceed = 0; private GpuDeviceInformation lastDiscoveredGpuInformation = null; + private List gpuDevicesFromUser; + private void validateConfOrThrowException() throws YarnException { if (conf == null) { throw new YarnException("Please initialize (call initialize) before use " @@ -141,6 +143,14 @@ public class GpuDiscoverer { } } + private boolean IsAutoDiscoveryEnabled() { + String allowedDevicesStr = conf.get( + YarnConfiguration.NM_GPU_ALLOWED_DEVICES, + YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES); + return allowedDevicesStr.equals( + YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES); + } + /** * Get list of GPU devices usable by YARN. * @@ -151,15 +161,13 @@ public class GpuDiscoverer { throws YarnException { validateConfOrThrowException(); - String allowedDevicesStr = conf.get( - YarnConfiguration.NM_GPU_ALLOWED_DEVICES, - YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES); - - if (allowedDevicesStr.equals( - YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES)) { + if (IsAutoDiscoveryEnabled()) { return parseGpuDevicesFromAutoDiscoveredGpuInfo(); } else { - return parseGpuDevicesFromUserDefinedValues(allowedDevicesStr); + if (gpuDevicesFromUser == null) { + gpuDevicesFromUser = parseGpuDevicesFromUserDefinedValues(); + } + return gpuDevicesFromUser; } } @@ -191,16 +199,16 @@ public class GpuDiscoverer { } /** - * @param devices allowed devices coming from the config. - * Individual devices should be separated by commas. - *
The format of individual devices should be: - * <index:><minorNumber> * @return List of GpuDevices * @throws YarnException when a GPU device is defined as a duplicate. * The first duplicate GPU device will be added to the exception message. */ - private List parseGpuDevicesFromUserDefinedValues(String devices) + private List parseGpuDevicesFromUserDefinedValues() throws YarnException { + String devices = conf.get( + YarnConfiguration.NM_GPU_ALLOWED_DEVICES, + YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES); + if (devices.trim().isEmpty()) { throw GpuDeviceSpecificationException.createWithEmptyValueSpecified(); } @@ -242,19 +250,21 @@ public class GpuDiscoverer { public synchronized void initialize(Configuration config) throws YarnException { this.conf = config; - numOfErrorExecutionSinceLastSucceed = 0; - lookUpAutoDiscoveryBinary(config); + if (IsAutoDiscoveryEnabled()) { + numOfErrorExecutionSinceLastSucceed = 0; + lookUpAutoDiscoveryBinary(config); - // Try to discover GPU information once and print - try { - LOG.info("Trying to discover GPU information ..."); - GpuDeviceInformation info = getGpuDeviceInformation(); - LOG.info("Discovered GPU information: " + info.toString()); - } catch (YarnException e) { - String msg = - "Failed to discover GPU information from system, exception message:" - + e.getMessage() + " continue..."; - LOG.warn(msg); + // Try to discover GPU information once and print + try { + LOG.info("Trying to discover GPU information ..."); + GpuDeviceInformation info = getGpuDeviceInformation(); + LOG.info("Discovered GPU information: " + info.toString()); + } catch (YarnException e) { + String msg = + "Failed to discover GPU information from system, exception message:" + + e.getMessage() + " continue..."; + LOG.warn(msg); + } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java index ff64e042e1..a70e668146 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java @@ -40,6 +40,7 @@ import java.util.List; import java.util.function.Consumer; import static org.apache.hadoop.test.PlatformAssumptions.assumeNotWindows; +import static org.apache.hadoop.yarn.conf.YarnConfiguration.NM_GPU_ALLOWED_DEVICES; import static org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuDiscoverer.DEFAULT_BINARY_NAME; import static org.hamcrest.CoreMatchers.containsString; import static org.hamcrest.CoreMatchers.not; @@ -49,6 +50,9 @@ import static org.junit.Assert.assertNull; import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.verify; public class TestGpuDiscoverer { private static final Logger LOG = LoggerFactory.getLogger( @@ -96,7 +100,7 @@ public class TestGpuDiscoverer { private Configuration createConfigWithAllowedDevices(String s) { Configuration conf = new Configuration(false); - conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, s); + conf.set(NM_GPU_ALLOWED_DEVICES, s); setupFakeBinary(conf); return conf; } @@ -495,4 +499,17 @@ public class TestGpuDiscoverer { "executable in the default directories:")); } } + + @Test + public void testScriptNotCalled() throws YarnException { + Configuration conf = new Configuration(); + conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:1,2:3"); + + GpuDiscoverer gpuSpy = spy(GpuDiscoverer.class); + + gpuSpy.initialize(conf); + gpuSpy.getGpusUsableByYarn(); + + verify(gpuSpy, never()).getGpuDeviceInformation(); + } }