From c416284bb7581747beef36d7899d8680fe33abbd Mon Sep 17 00:00:00 2001 From: Szilard Nemeth Date: Fri, 12 Jul 2019 16:51:58 +0200 Subject: [PATCH] YARN-9235. If linux container executor is not set for a GPU cluster GpuResourceHandlerImpl is not initialized and NPE is thrown. Contributed by Antal Balint Steinbach, Adam Antal --- .../resourceplugin/gpu/GpuResourcePlugin.java | 22 ++++++++ .../gpu/TestGpuResourcePlugin.java | 54 +++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuResourcePlugin.java diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java index 393d76e1f5..1ac6f83846 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java @@ -18,6 +18,7 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu; +import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor; @@ -33,8 +34,14 @@ import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.NMGpuResourceInfo; import java.util.List; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class GpuResourcePlugin implements ResourcePlugin { + + private static final Logger LOG = + LoggerFactory.getLogger(GpuResourcePlugin.class); + private final GpuNodeResourceUpdateHandler resourceDiscoverHandler; private final GpuDiscoverer gpuDiscoverer; private GpuResourceHandlerImpl gpuResourceHandler = null; @@ -84,6 +91,10 @@ public DockerCommandPlugin getDockerCommandPluginInstance() { public synchronized NMResourceInfo getNMResourceInfo() throws YarnException { GpuDeviceInformation gpuDeviceInformation = gpuDiscoverer.getGpuDeviceInformation(); + + //At this point the gpu plugin is already enabled + checkGpuResourceHandler(); + GpuResourceAllocator gpuResourceAllocator = gpuResourceHandler.getGpuAllocator(); List totalGpus = gpuResourceAllocator.getAllowedGpusCopy(); @@ -94,6 +105,17 @@ public synchronized NMResourceInfo getNMResourceInfo() throws YarnException { assignedGpuDevices); } + private void checkGpuResourceHandler() throws YarnException { + if(gpuResourceHandler == null) { + String errorMsg = + "Linux Container Executor is not configured for the NodeManager. " + + "To fully enable GPU feature on the node also set " + + YarnConfiguration.NM_CONTAINER_EXECUTOR + " properly."; + LOG.warn(errorMsg); + throw new YarnException(errorMsg); + } + } + @Override public String toString() { return GpuResourcePlugin.class.getName(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuResourcePlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuResourcePlugin.java new file mode 100644 index 0000000000..888f8999d5 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuResourcePlugin.java @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu; + +import static org.mockito.Mockito.mock; + +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.junit.Test; + +public class TestGpuResourcePlugin { + + @Test(expected = YarnException.class) + public void testResourceHandlerNotInitialized() throws YarnException { + GpuDiscoverer gpuDiscoverer = mock(GpuDiscoverer.class); + GpuNodeResourceUpdateHandler gpuNodeResourceUpdateHandler = + mock(GpuNodeResourceUpdateHandler.class); + + GpuResourcePlugin target = + new GpuResourcePlugin(gpuNodeResourceUpdateHandler, gpuDiscoverer); + + target.getNMResourceInfo(); + } + + @Test + public void testResourceHandlerIsInitialized() throws YarnException { + GpuDiscoverer gpuDiscoverer = mock(GpuDiscoverer.class); + GpuNodeResourceUpdateHandler gpuNodeResourceUpdateHandler = + mock(GpuNodeResourceUpdateHandler.class); + + GpuResourcePlugin target = + new GpuResourcePlugin(gpuNodeResourceUpdateHandler, gpuDiscoverer); + + target.createResourceHandler(null, null, null); + + //Not throwing any exception + target.getNMResourceInfo(); + } +}