YARN-9235. If linux container executor is not set for a GPU cluster GpuResourceHandlerImpl is not initialized and NPE is thrown. Contributed by Antal Balint Steinbach, Adam Antal
This commit is contained in:
parent
190e4349d7
commit
c416284bb7
@ -18,6 +18,7 @@
|
||||
|
||||
package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
|
||||
|
||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.Context;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
|
||||
@ -33,8 +34,14 @@
|
||||
import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.NMGpuResourceInfo;
|
||||
|
||||
import java.util.List;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class GpuResourcePlugin implements ResourcePlugin {
|
||||
|
||||
private static final Logger LOG =
|
||||
LoggerFactory.getLogger(GpuResourcePlugin.class);
|
||||
|
||||
private final GpuNodeResourceUpdateHandler resourceDiscoverHandler;
|
||||
private final GpuDiscoverer gpuDiscoverer;
|
||||
private GpuResourceHandlerImpl gpuResourceHandler = null;
|
||||
@ -84,6 +91,10 @@ public DockerCommandPlugin getDockerCommandPluginInstance() {
|
||||
public synchronized NMResourceInfo getNMResourceInfo() throws YarnException {
|
||||
GpuDeviceInformation gpuDeviceInformation =
|
||||
gpuDiscoverer.getGpuDeviceInformation();
|
||||
|
||||
//At this point the gpu plugin is already enabled
|
||||
checkGpuResourceHandler();
|
||||
|
||||
GpuResourceAllocator gpuResourceAllocator =
|
||||
gpuResourceHandler.getGpuAllocator();
|
||||
List<GpuDevice> totalGpus = gpuResourceAllocator.getAllowedGpusCopy();
|
||||
@ -94,6 +105,17 @@ public synchronized NMResourceInfo getNMResourceInfo() throws YarnException {
|
||||
assignedGpuDevices);
|
||||
}
|
||||
|
||||
private void checkGpuResourceHandler() throws YarnException {
|
||||
if(gpuResourceHandler == null) {
|
||||
String errorMsg =
|
||||
"Linux Container Executor is not configured for the NodeManager. "
|
||||
+ "To fully enable GPU feature on the node also set "
|
||||
+ YarnConfiguration.NM_CONTAINER_EXECUTOR + " properly.";
|
||||
LOG.warn(errorMsg);
|
||||
throw new YarnException(errorMsg);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return GpuResourcePlugin.class.getName();
|
||||
|
@ -0,0 +1,54 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
|
||||
|
||||
import static org.mockito.Mockito.mock;
|
||||
|
||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestGpuResourcePlugin {
|
||||
|
||||
@Test(expected = YarnException.class)
|
||||
public void testResourceHandlerNotInitialized() throws YarnException {
|
||||
GpuDiscoverer gpuDiscoverer = mock(GpuDiscoverer.class);
|
||||
GpuNodeResourceUpdateHandler gpuNodeResourceUpdateHandler =
|
||||
mock(GpuNodeResourceUpdateHandler.class);
|
||||
|
||||
GpuResourcePlugin target =
|
||||
new GpuResourcePlugin(gpuNodeResourceUpdateHandler, gpuDiscoverer);
|
||||
|
||||
target.getNMResourceInfo();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testResourceHandlerIsInitialized() throws YarnException {
|
||||
GpuDiscoverer gpuDiscoverer = mock(GpuDiscoverer.class);
|
||||
GpuNodeResourceUpdateHandler gpuNodeResourceUpdateHandler =
|
||||
mock(GpuNodeResourceUpdateHandler.class);
|
||||
|
||||
GpuResourcePlugin target =
|
||||
new GpuResourcePlugin(gpuNodeResourceUpdateHandler, gpuDiscoverer);
|
||||
|
||||
target.createResourceHandler(null, null, null);
|
||||
|
||||
//Not throwing any exception
|
||||
target.getNMResourceInfo();
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user