usableGpus =
+ GpuDiscoverer.getInstance().getGpusUsableByYarn();
if (null == usableGpus || usableGpus.isEmpty()) {
- LOG.info("Didn't find any usable GPUs on the NodeManager.");
+ String message = "GPU is enabled, but couldn't find any usable GPUs on the "
+ + "NodeManager.";
+ LOG.error(message);
// No gpu can be used by YARN.
- return;
+ throw new YarnException(message);
}
long nUsableGpus = usableGpus.size();
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java
index 9576ce7fec..4ff186fe6e 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java
@@ -24,17 +24,22 @@
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandler;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.gpu.GpuResourceHandlerImpl;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.DockerCommandPlugin;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.NodeResourceUpdaterPlugin;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin;
public class GpuResourcePlugin implements ResourcePlugin {
private ResourceHandler gpuResourceHandler = null;
private GpuNodeResourceUpdateHandler resourceDiscoverHandler = null;
+ private DockerCommandPlugin dockerCommandPlugin = null;
@Override
public synchronized void initialize(Context context) throws YarnException {
resourceDiscoverHandler = new GpuNodeResourceUpdateHandler();
GpuDiscoverer.getInstance().initialize(context.getConf());
+ dockerCommandPlugin =
+ GpuDockerCommandPluginFactory.createGpuDockerCommandPlugin(
+ context.getConf());
}
@Override
@@ -58,4 +63,8 @@ public synchronized NodeResourceUpdaterPlugin getNodeResourceHandlerInstance() {
public void cleanup() throws YarnException {
// Do nothing.
}
+
+ public DockerCommandPlugin getDockerCommandPluginInstance() {
+ return dockerCommandPlugin;
+ }
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/NvidiaDockerV1CommandPlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/NvidiaDockerV1CommandPlugin.java
new file mode 100644
index 0000000000..73d70483df
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/NvidiaDockerV1CommandPlugin.java
@@ -0,0 +1,319 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.api.records.ResourceInformation;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ResourceMappings;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.gpu.GpuResourceAllocator;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerRunCommand;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerVolumeCommand;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.DockerCommandPlugin;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.io.StringWriter;
+import java.net.URL;
+import java.net.URLConnection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerVolumeCommand.VOLUME_NAME_PATTERN;
+
+/**
+ * Implementation to use nvidia-docker v1 as GPU docker command plugin.
+ */
+public class NvidiaDockerV1CommandPlugin implements DockerCommandPlugin {
+ final static Log LOG = LogFactory.getLog(NvidiaDockerV1CommandPlugin.class);
+
+ private Configuration conf;
+ private Map> additionalCommands = null;
+ private String volumeDriver = "local";
+
+ // Known option
+ private String DEVICE_OPTION = "--device";
+ private String VOLUME_DRIVER_OPTION = "--volume-driver";
+ private String MOUNT_RO_OPTION = "--volume";
+
+ public NvidiaDockerV1CommandPlugin(Configuration conf) {
+ this.conf = conf;
+ }
+
+ // Get value from key=value
+ // Throw exception if '=' not found
+ private String getValue(String input) throws IllegalArgumentException {
+ int index = input.indexOf('=');
+ if (index < 0) {
+ throw new IllegalArgumentException(
+ "Failed to locate '=' from input=" + input);
+ }
+ return input.substring(index + 1);
+ }
+
+ private void addToCommand(String key, String value) {
+ if (additionalCommands == null) {
+ additionalCommands = new HashMap<>();
+ }
+ if (!additionalCommands.containsKey(key)) {
+ additionalCommands.put(key, new HashSet<>());
+ }
+ additionalCommands.get(key).add(value);
+ }
+
+ private void init() throws ContainerExecutionException {
+ String endpoint = conf.get(
+ YarnConfiguration.NVIDIA_DOCKER_PLUGIN_V1_ENDPOINT,
+ YarnConfiguration.DEFAULT_NVIDIA_DOCKER_PLUGIN_V1_ENDPOINT);
+ if (null == endpoint || endpoint.isEmpty()) {
+ LOG.info(YarnConfiguration.NVIDIA_DOCKER_PLUGIN_V1_ENDPOINT
+ + " set to empty, skip init ..");
+ return;
+ }
+ String cliOptions;
+ try {
+ // Talk to plugin server and get options
+ URL url = new URL(endpoint);
+ URLConnection uc = url.openConnection();
+ uc.setRequestProperty("X-Requested-With", "Curl");
+
+ StringWriter writer = new StringWriter();
+ IOUtils.copy(uc.getInputStream(), writer, "utf-8");
+ cliOptions = writer.toString();
+
+ LOG.info("Additional docker CLI options from plugin to run GPU "
+ + "containers:" + cliOptions);
+
+ // Parse cli options
+ // Examples like:
+ // --device=/dev/nvidiactl --device=/dev/nvidia-uvm --device=/dev/nvidia0
+ // --volume-driver=nvidia-docker
+ // --volume=nvidia_driver_352.68:/usr/local/nvidia:ro
+
+ for (String str : cliOptions.split(" ")) {
+ str = str.trim();
+ if (str.startsWith(DEVICE_OPTION)) {
+ addToCommand(DEVICE_OPTION, getValue(str));
+ } else if (str.startsWith(VOLUME_DRIVER_OPTION)) {
+ volumeDriver = getValue(str);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Found volume-driver:" + volumeDriver);
+ }
+ } else if (str.startsWith(MOUNT_RO_OPTION)) {
+ String mount = getValue(str);
+ if (!mount.endsWith(":ro")) {
+ throw new IllegalArgumentException(
+ "Should not have mount other than ro, command=" + str);
+ }
+ addToCommand(MOUNT_RO_OPTION,
+ mount.substring(0, mount.lastIndexOf(':')));
+ } else{
+ throw new IllegalArgumentException("Unsupported option:" + str);
+ }
+ }
+ } catch (RuntimeException e) {
+ LOG.warn(
+ "RuntimeException of " + this.getClass().getSimpleName() + " init:",
+ e);
+ throw new ContainerExecutionException(e);
+ } catch (IOException e) {
+ LOG.warn("IOException of " + this.getClass().getSimpleName() + " init:",
+ e);
+ throw new ContainerExecutionException(e);
+ }
+ }
+
+ private int getGpuIndexFromDeviceName(String device) {
+ final String NVIDIA = "nvidia";
+ int idx = device.lastIndexOf(NVIDIA);
+ if (idx < 0) {
+ return -1;
+ }
+ // Get last part
+ String str = device.substring(idx + NVIDIA.length());
+ for (int i = 0; i < str.length(); i++) {
+ if (!Character.isDigit(str.charAt(i))) {
+ return -1;
+ }
+ }
+ return Integer.parseInt(str);
+ }
+
+ private Set getAssignedGpus(Container container) {
+ ResourceMappings resourceMappings = container.getResourceMappings();
+
+ // Copy of assigned Resources
+ Set assignedResources = null;
+ if (resourceMappings != null) {
+ assignedResources = new HashSet<>();
+ for (Serializable s : resourceMappings.getAssignedResources(
+ ResourceInformation.GPU_URI)) {
+ assignedResources.add((GpuDevice) s);
+ }
+ }
+
+ if (assignedResources == null || assignedResources.isEmpty()) {
+ // When no GPU resource assigned, don't need to update docker command.
+ return Collections.emptySet();
+ }
+
+ return assignedResources;
+ }
+
+ @VisibleForTesting
+ protected boolean requestsGpu(Container container) {
+ return GpuResourceAllocator.getRequestedGpus(container.getResource()) > 0;
+ }
+
+ /**
+ * Do initialize when GPU requested
+ * @param container nmContainer
+ * @return if #GPU-requested > 0
+ * @throws ContainerExecutionException when any issue happens
+ */
+ private boolean initializeWhenGpuRequested(Container container)
+ throws ContainerExecutionException {
+ if (!requestsGpu(container)) {
+ return false;
+ }
+
+ // Do lazy initialization of gpu-docker plugin
+ if (additionalCommands == null) {
+ init();
+ }
+
+ return true;
+ }
+
+ @Override
+ public synchronized void updateDockerRunCommand(
+ DockerRunCommand dockerRunCommand, Container container)
+ throws ContainerExecutionException {
+ if (!initializeWhenGpuRequested(container)) {
+ return;
+ }
+
+ Set assignedResources = getAssignedGpus(container);
+ if (assignedResources == null || assignedResources.isEmpty()) {
+ return;
+ }
+
+ // Write to dockerRunCommand
+ for (Map.Entry> option : additionalCommands
+ .entrySet()) {
+ String key = option.getKey();
+ Set values = option.getValue();
+ if (key.equals(DEVICE_OPTION)) {
+ int foundGpuDevices = 0;
+ for (String deviceName : values) {
+ // When specified is a GPU card (device name like /dev/nvidia[n]
+ // Get index of the GPU (which is [n]).
+ Integer gpuIdx = getGpuIndexFromDeviceName(deviceName);
+ if (gpuIdx >= 0) {
+ // Use assignedResources to filter --device given by
+ // nvidia-docker-plugin.
+ for (GpuDevice gpuDevice : assignedResources) {
+ if (gpuDevice.getIndex() == gpuIdx) {
+ foundGpuDevices++;
+ dockerRunCommand.addDevice(deviceName, deviceName);
+ }
+ }
+ } else{
+ // When gpuIdx < 0, it is a controller device (such as
+ // /dev/nvidiactl). In this case, add device directly.
+ dockerRunCommand.addDevice(deviceName, deviceName);
+ }
+ }
+
+ // Cannot get all assigned Gpu devices from docker plugin output
+ if (foundGpuDevices < assignedResources.size()) {
+ throw new ContainerExecutionException(
+ "Cannot get all assigned Gpu devices from docker plugin output");
+ }
+ } else if (key.equals(MOUNT_RO_OPTION)) {
+ for (String value : values) {
+ int idx = value.indexOf(':');
+ String source = value.substring(0, idx);
+ String target = value.substring(idx + 1);
+ dockerRunCommand.addReadOnlyMountLocation(source, target, true);
+ }
+ } else{
+ throw new ContainerExecutionException("Unsupported option:" + key);
+ }
+ }
+ }
+
+ @Override
+ public DockerVolumeCommand getCreateDockerVolumeCommand(Container container)
+ throws ContainerExecutionException {
+ if (!initializeWhenGpuRequested(container)) {
+ return null;
+ }
+
+ String newVolumeName = null;
+
+ // Get volume name
+ Set mounts = additionalCommands.get(MOUNT_RO_OPTION);
+ for (String mount : mounts) {
+ int idx = mount.indexOf(':');
+ if (idx >= 0) {
+ String mountSource = mount.substring(0, idx);
+ if (VOLUME_NAME_PATTERN.matcher(mountSource).matches()) {
+ // This is a valid named volume
+ newVolumeName = mountSource;
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Found volume name for GPU:" + newVolumeName);
+ }
+ break;
+ } else{
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Failed to match " + mountSource
+ + " to named-volume regex pattern");
+ }
+ }
+ }
+ }
+
+ if (newVolumeName != null) {
+ DockerVolumeCommand command = new DockerVolumeCommand(
+ DockerVolumeCommand.VOLUME_CREATE_COMMAND);
+ command.setDriverName(volumeDriver);
+ command.setVolumeName(newVolumeName);
+ return command;
+ }
+
+ return null;
+ }
+
+ @Override
+ public DockerVolumeCommand getCleanupDockerVolumesCommand(Container container)
+ throws ContainerExecutionException {
+ // No cleanup needed.
+ return null;
+ }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java
index c361d00f78..34558749dc 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java
@@ -18,11 +18,47 @@
package org.apache.hadoop.yarn.server.nodemanager.recovery;
-import static org.fusesource.leveldbjni.JniDBFactory.asString;
-import static org.fusesource.leveldbjni.JniDBFactory.bytes;
-
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.ArrayListMultimap;
+import com.google.common.collect.ListMultimap;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.util.Time;
+import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.StartContainerRequestPBImpl;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.Token;
+import org.apache.hadoop.yarn.api.records.impl.pb.ResourcePBImpl;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto;
+import org.apache.hadoop.yarn.proto.YarnSecurityTokenProtos.ContainerTokenIdentifierProto;
+import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.MasterKeyProto;
+import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.VersionProto;
+import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.ContainerManagerApplicationProto;
+import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.DeletionServiceDeleteTaskProto;
+import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LocalizedResourceProto;
+import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LogDeleterProto;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.StartContainerRequestProto;
import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
+import org.apache.hadoop.yarn.server.api.records.MasterKey;
+import org.apache.hadoop.yarn.server.api.records.impl.pb.MasterKeyPBImpl;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ResourceMappings;
+import org.apache.hadoop.yarn.server.records.Version;
+import org.apache.hadoop.yarn.server.records.impl.pb.VersionPBImpl;
+import org.apache.hadoop.yarn.server.utils.LeveldbIterator;
+import org.apache.hadoop.yarn.util.ConverterUtils;
+import org.fusesource.leveldbjni.JniDBFactory;
+import org.fusesource.leveldbjni.internal.NativeDB;
+import org.iq80.leveldb.DB;
+import org.iq80.leveldb.DBException;
+import org.iq80.leveldb.Options;
+import org.iq80.leveldb.WriteBatch;
import org.slf4j.LoggerFactory;
import java.io.File;
@@ -35,49 +71,12 @@
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
+import java.util.Set;
import java.util.Timer;
import java.util.TimerTask;
-import java.util.Set;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.permission.FsPermission;
-import org.apache.hadoop.util.StringUtils;
-import org.apache.hadoop.util.Time;
-import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.StartContainerRequestPBImpl;
-import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
-import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.api.records.ContainerId;
-import org.apache.hadoop.yarn.api.records.impl.pb.ResourcePBImpl;
-import org.apache.hadoop.yarn.conf.YarnConfiguration;
-import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto;
-import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.MasterKeyProto;
-import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.VersionProto;
-import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.ContainerManagerApplicationProto;
-import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.DeletionServiceDeleteTaskProto;
-import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LocalizedResourceProto;
-import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LogDeleterProto;
-import org.apache.hadoop.yarn.proto.YarnServiceProtos.StartContainerRequestProto;
-import org.apache.hadoop.yarn.proto.YarnSecurityTokenProtos.ContainerTokenIdentifierProto;
-import org.apache.hadoop.yarn.server.api.records.MasterKey;
-import org.apache.hadoop.yarn.server.api.records.impl.pb.MasterKeyPBImpl;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ResourceMappings;
-import org.apache.hadoop.yarn.server.records.Version;
-import org.apache.hadoop.yarn.server.records.impl.pb.VersionPBImpl;
-import org.apache.hadoop.yarn.server.utils.LeveldbIterator;
-import org.apache.hadoop.yarn.util.ConverterUtils;
-import org.fusesource.leveldbjni.JniDBFactory;
-import org.fusesource.leveldbjni.internal.NativeDB;
-import org.iq80.leveldb.DB;
-import org.iq80.leveldb.DBException;
-import org.iq80.leveldb.Options;
-import org.iq80.leveldb.WriteBatch;
-
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.collect.ArrayListMultimap;
-import com.google.common.collect.ListMultimap;
+import static org.fusesource.leveldbjni.JniDBFactory.asString;
+import static org.fusesource.leveldbjni.JniDBFactory.bytes;
public class NMLeveldbStateStoreService extends NMStateStoreService {
@@ -1173,15 +1172,17 @@ public void removeLogDeleter(ApplicationId appId) throws IOException {
}
@Override
- public void storeAssignedResources(ContainerId containerId,
+ public void storeAssignedResources(Container container,
String resourceType, List assignedResources)
throws IOException {
if (LOG.isDebugEnabled()) {
- LOG.debug("storeAssignedResources: containerId=" + containerId
- + ", assignedResources=" + StringUtils.join(",", assignedResources));
+ LOG.debug(
+ "storeAssignedResources: containerId=" + container.getContainerId()
+ + ", assignedResources=" + StringUtils
+ .join(",", assignedResources));
}
- String keyResChng = CONTAINERS_KEY_PREFIX + containerId.toString()
+ String keyResChng = CONTAINERS_KEY_PREFIX + container.getContainerId().toString()
+ CONTAINER_ASSIGNED_RESOURCES_KEY_SUFFIX + resourceType;
try {
WriteBatch batch = db.createWriteBatch();
@@ -1199,6 +1200,9 @@ public void storeAssignedResources(ContainerId containerId,
} catch (DBException e) {
throw new IOException(e);
}
+
+ // update container resource mapping.
+ updateContainerResourceMapping(container, resourceType, assignedResources);
}
@SuppressWarnings("deprecation")
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java
index ca6d018290..2d522a9b6f 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java
@@ -35,6 +35,7 @@
import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LogDeleterProto;
import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
import org.apache.hadoop.yarn.server.api.records.MasterKey;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
// The state store to use when state isn't being stored
public class NMNullStateStoreService extends NMStateStoreService {
@@ -268,7 +269,7 @@ public void removeAMRMProxyAppContext(ApplicationAttemptId attempt)
}
@Override
- public void storeAssignedResources(ContainerId containerId,
+ public void storeAssignedResources(Container container,
String resourceType, List assignedResources)
throws IOException {
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java
index 5e2b8a5cdc..598ea9e319 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java
@@ -44,6 +44,7 @@
import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LogDeleterProto;
import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
import org.apache.hadoop.yarn.server.api.records.MasterKey;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ResourceMappings;
@Private
@@ -731,12 +732,12 @@ public abstract void removeAMRMProxyAppContext(ApplicationAttemptId attempt)
/**
* Store the assigned resources to a container.
*
- * @param containerId Container Id
+ * @param container NMContainer
* @param resourceType Resource Type
* @param assignedResources Assigned resources
* @throws IOException if fails
*/
- public abstract void storeAssignedResources(ContainerId containerId,
+ public abstract void storeAssignedResources(Container container,
String resourceType, List assignedResources)
throws IOException;
@@ -745,4 +746,14 @@ public abstract void storeAssignedResources(ContainerId containerId,
protected abstract void startStorage() throws IOException;
protected abstract void closeStorage() throws IOException;
+
+ protected void updateContainerResourceMapping(Container container,
+ String resourceType, List assignedResources) {
+ // Update Container#getResourceMapping.
+ ResourceMappings.AssignedResources newAssigned =
+ new ResourceMappings.AssignedResources();
+ newAssigned.updateAssignedResources(assignedResources);
+ container.getResourceMappings().addAssignedResources(resourceType,
+ newAssigned);
+ }
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.c
index e8e2b9e9aa..e88eeac35f 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.c
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.c
@@ -159,6 +159,11 @@ static int add_docker_config_param(const struct configuration *command_config, c
return add_param_to_command(command_config, "docker-config", "--config=", 1, out, outlen);
}
+static int validate_volume_name(const char *volume_name) {
+ const char *regex_str = "^[a-zA-Z0-9]([a-zA-Z0-9_.-]*)$";
+ return execute_regex_match(regex_str, volume_name);
+}
+
static int validate_container_name(const char *container_name) {
const char *CONTAINER_NAME_PREFIX = "container_";
if (0 == strncmp(container_name, CONTAINER_NAME_PREFIX, strlen(CONTAINER_NAME_PREFIX))) {
@@ -206,6 +211,12 @@ const char *get_docker_error_message(const int error_code) {
return "Mount access error";
case INVALID_DOCKER_DEVICE:
return "Invalid docker device";
+ case INVALID_DOCKER_VOLUME_DRIVER:
+ return "Invalid docker volume-driver";
+ case INVALID_DOCKER_VOLUME_NAME:
+ return "Invalid docker volume name";
+ case INVALID_DOCKER_VOLUME_COMMAND:
+ return "Invalid docker volume command";
default:
return "Unknown error";
}
@@ -252,11 +263,125 @@ int get_docker_command(const char *command_file, const struct configuration *con
return get_docker_run_command(command_file, conf, out, outlen);
} else if (strcmp(DOCKER_STOP_COMMAND, command) == 0) {
return get_docker_stop_command(command_file, conf, out, outlen);
+ } else if (strcmp(DOCKER_VOLUME_COMMAND, command) == 0) {
+ return get_docker_volume_command(command_file, conf, out, outlen);
} else {
return UNKNOWN_DOCKER_COMMAND;
}
}
+// check if a key is permitted in the configuration
+// return 1 if permitted
+static int value_permitted(const struct configuration* executor_cfg,
+ const char* key, const char* value) {
+ char **permitted_values = get_configuration_values_delimiter(key,
+ CONTAINER_EXECUTOR_CFG_DOCKER_SECTION, executor_cfg, ",");
+ if (!permitted_values) {
+ return 0;
+ }
+
+ char** permitted = permitted_values;
+ int found = 0;
+
+ while (*permitted) {
+ if (0 == strncmp(*permitted, value, 1024)) {
+ found = 1;
+ break;
+ }
+ permitted++;
+ }
+
+ free_values(permitted_values);
+
+ return found;
+}
+
+int get_docker_volume_command(const char *command_file, const struct configuration *conf, char *out,
+ const size_t outlen) {
+ int ret = 0;
+ char *driver = NULL, *volume_name = NULL, *sub_command = NULL;
+ struct configuration command_config = {0, NULL};
+ ret = read_and_verify_command_file(command_file, DOCKER_VOLUME_COMMAND, &command_config);
+ if (ret != 0) {
+ return ret;
+ }
+ sub_command = get_configuration_value("sub-command", DOCKER_COMMAND_FILE_SECTION, &command_config);
+ if (sub_command == NULL || 0 != strcmp(sub_command, "create")) {
+ fprintf(ERRORFILE, "\"create\" is the only acceptable sub-command of volume.\n");
+ ret = INVALID_DOCKER_VOLUME_COMMAND;
+ goto cleanup;
+ }
+
+ volume_name = get_configuration_value("volume", DOCKER_COMMAND_FILE_SECTION, &command_config);
+ if (volume_name == NULL || validate_volume_name(volume_name) != 0) {
+ fprintf(ERRORFILE, "%s is not a valid volume name.\n", volume_name);
+ ret = INVALID_DOCKER_VOLUME_NAME;
+ goto cleanup;
+ }
+
+ driver = get_configuration_value("driver", DOCKER_COMMAND_FILE_SECTION, &command_config);
+ if (driver == NULL) {
+ ret = INVALID_DOCKER_VOLUME_DRIVER;
+ goto cleanup;
+ }
+
+ memset(out, 0, outlen);
+
+ ret = add_docker_config_param(&command_config, out, outlen);
+ if (ret != 0) {
+ ret = BUFFER_TOO_SMALL;
+ goto cleanup;
+ }
+
+ ret = add_to_buffer(out, outlen, DOCKER_VOLUME_COMMAND);
+ if (ret != 0) {
+ goto cleanup;
+ }
+
+ ret = add_to_buffer(out, outlen, " create");
+ if (ret != 0) {
+ goto cleanup;
+ }
+
+ ret = add_to_buffer(out, outlen, " --name=");
+ if (ret != 0) {
+ goto cleanup;
+ }
+
+ ret = add_to_buffer(out, outlen, volume_name);
+ if (ret != 0) {
+ goto cleanup;
+ }
+
+ if (!value_permitted(conf, "docker.allowed.volume-drivers", driver)) {
+ fprintf(ERRORFILE, "%s is not permitted docker.allowed.volume-drivers\n",
+ driver);
+ ret = INVALID_DOCKER_VOLUME_DRIVER;
+ goto cleanup;
+ }
+
+ ret = add_to_buffer(out, outlen, " --driver=");
+ if (ret != 0) {
+ goto cleanup;
+ }
+
+ ret = add_to_buffer(out, outlen, driver);
+ if (ret != 0) {
+ goto cleanup;
+ }
+
+cleanup:
+ free(driver);
+ free(volume_name);
+ free(sub_command);
+
+ // clean up out buffer
+ if (ret != 0) {
+ out[0] = 0;
+ }
+ return ret;
+}
+
int get_docker_inspect_command(const char *command_file, const struct configuration *conf, char *out,
const size_t outlen) {
const char *valid_format_strings[] = { "{{.State.Status}}",
@@ -623,6 +748,11 @@ static char* normalize_mount(const char* mount) {
}
real_mount = realpath(mount, NULL);
if (real_mount == NULL) {
+ // If mount is a valid named volume, just return it and let docker decide
+ if (validate_volume_name(mount) == 0) {
+ return strdup(mount);
+ }
+
fprintf(ERRORFILE, "Could not determine real path of mount '%s'\n", mount);
free(real_mount);
return NULL;
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.h b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.h
index 37ec88077c..9c42abe8a6 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.h
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.h
@@ -30,6 +30,7 @@
#define DOCKER_RM_COMMAND "rm"
#define DOCKER_RUN_COMMAND "run"
#define DOCKER_STOP_COMMAND "stop"
+#define DOCKER_VOLUME_COMMAND "volume"
enum docker_error_codes {
@@ -49,7 +50,10 @@ enum docker_error_codes {
INVALID_DOCKER_RW_MOUNT,
MOUNT_ACCESS_ERROR,
INVALID_DOCKER_DEVICE,
- INVALID_DOCKER_STOP_COMMAND
+ INVALID_DOCKER_STOP_COMMAND,
+ INVALID_DOCKER_VOLUME_DRIVER,
+ INVALID_DOCKER_VOLUME_NAME,
+ INVALID_DOCKER_VOLUME_COMMAND
};
/**
@@ -130,6 +134,18 @@ int get_docker_run_command(const char* command_file, const struct configuration*
*/
int get_docker_stop_command(const char* command_file, const struct configuration* conf, char *out, const size_t outlen);
+/**
+ * Get the Docker volume command line string. The function will verify that the
+ * params file is meant for the volume command.
+ * @param command_file File containing the params for the Docker volume command
+ * @param conf Configuration struct containing the container-executor.cfg details
+ * @param out Buffer to fill with the volume command
+ * @param outlen Size of the output buffer
+ * @return Return code with 0 indicating success and non-zero codes indicating error
+ */
+int get_docker_volume_command(const char *command_file, const struct configuration *conf, char *out,
+ const size_t outlen);
+
/**
* Give an error message for the supplied error code
* @param error_code the error code
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test_docker_util.cc b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test_docker_util.cc
index c42cd787ef..80de58d6c6 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test_docker_util.cc
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test_docker_util.cc
@@ -1120,4 +1120,46 @@ namespace ContainerExecutor {
}
}
+ TEST_F(TestDockerUtil, test_docker_volume_command) {
+ std::string container_executor_contents = "[docker]\n docker.allowed.volume-drivers=driver1\n";
+ write_file(container_executor_cfg_file, container_executor_contents);
+ int ret = read_config(container_executor_cfg_file.c_str(), &container_executor_cfg);
+ if (ret != 0) {
+ FAIL();
+ }
+
+ std::vector > file_cmd_vec;
+ file_cmd_vec.push_back(std::make_pair(
+ "[docker-command-execution]\n docker-command=volume\n sub-command=create\n volume=volume1 \n driver=driver1",
+ "volume create --name=volume1 --driver=driver1"));
+
+ std::vector > bad_file_cmd_vec;
+
+ // Wrong subcommand
+ bad_file_cmd_vec.push_back(std::make_pair(
+ "[docker-command-execution]\n docker-command=volume\n sub-command=ls\n volume=volume1 \n driver=driver1",
+ static_cast(INVALID_DOCKER_VOLUME_COMMAND)));
+
+ // Volume not specified
+ bad_file_cmd_vec.push_back(std::make_pair(
+ "[docker-command-execution]\n docker-command=volume\n sub-command=create\n driver=driver1",
+ static_cast(INVALID_DOCKER_VOLUME_NAME)));
+
+ // Invalid volume name
+ bad_file_cmd_vec.push_back(std::make_pair(
+ "[docker-command-execution]\n docker-command=volume\n sub-command=create\n volume=/a/b/c \n driver=driver1",
+ static_cast(INVALID_DOCKER_VOLUME_NAME)));
+
+ // Driver not specified
+ bad_file_cmd_vec.push_back(std::make_pair(
+ "[docker-command-execution]\n docker-command=volume\n sub-command=create\n volume=volume1 \n",
+ static_cast(INVALID_DOCKER_VOLUME_DRIVER)));
+
+ // Invalid driver name
+ bad_file_cmd_vec.push_back(std::make_pair(
+ "[docker-command-execution]\n docker-command=volume\n sub-command=create\n volume=volume1 \n driver=driver2",
+ static_cast(INVALID_DOCKER_VOLUME_DRIVER)));
+
+ run_docker_command_test(file_cmd_vec, bad_file_cmd_vec, get_docker_volume_command);
+ }
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java
index 3dfa625ecd..e1d9c69444 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java
@@ -158,7 +158,7 @@ public void setup() throws IOException, ContainerExecutionException {
mockPrivilegedExec);
dirsHandler = new LocalDirsHandlerService();
dirsHandler.init(conf);
- linuxContainerRuntime.initialize(conf);
+ linuxContainerRuntime.initialize(conf, null);
mockExec = new LinuxContainerExecutor(linuxContainerRuntime);
mockExec.setConf(conf);
mockExecMockRuntime = new LinuxContainerExecutor(mockLinuxContainerRuntime);
@@ -315,7 +315,7 @@ public void testContainerLaunchError()
DefaultLinuxContainerRuntime(PrivilegedOperationExecutor.getInstance(
conf));
- linuxContainerRuntime.initialize(conf);
+ linuxContainerRuntime.initialize(conf, null);
exec = new LinuxContainerExecutor(linuxContainerRuntime);
mockExec = spy(exec);
@@ -545,7 +545,7 @@ public void testNoExitCodeFromPrivilegedOperation() throws Exception {
any(File.class), any(Map.class), anyBoolean(), anyBoolean());
LinuxContainerRuntime runtime = new DefaultLinuxContainerRuntime(
spyPrivilegedExecutor);
- runtime.initialize(conf);
+ runtime.initialize(conf, null);
mockExec = new LinuxContainerExecutor(runtime);
mockExec.setConf(conf);
LinuxContainerExecutor lce = new LinuxContainerExecutor(runtime) {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java
index 0e629d4ff1..cad835cfb9 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java
@@ -462,16 +462,18 @@ public void testResourceMappingRecoveryForContainer() throws Exception {
commonLaunchContainer(appId, cid, cm);
+ Container nmContainer = context.getContainers().get(cid);
+
Application app = context.getApplications().get(appId);
assertNotNull(app);
// store resource mapping of the container
List gpuResources = Arrays.asList("1", "2", "3");
- stateStore.storeAssignedResources(cid, "gpu", gpuResources);
+ stateStore.storeAssignedResources(nmContainer, "gpu", gpuResources);
List numaResources = Arrays.asList("numa1");
- stateStore.storeAssignedResources(cid, "numa", numaResources);
+ stateStore.storeAssignedResources(nmContainer, "numa", numaResources);
List fpgaResources = Arrays.asList("fpga1", "fpga2");
- stateStore.storeAssignedResources(cid, "fpga", fpgaResources);
+ stateStore.storeAssignedResources(nmContainer, "fpga", fpgaResources);
cm.stop();
context = createContext(conf, stateStore);
@@ -483,7 +485,6 @@ public void testResourceMappingRecoveryForContainer() throws Exception {
app = context.getApplications().get(appId);
assertNotNull(app);
- Container nmContainer = context.getContainers().get(cid);
Assert.assertNotNull(nmContainer);
ResourceMappings resourceMappings = nmContainer.getResourceMappings();
List assignedResource = resourceMappings
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/TestGpuResourceHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/TestGpuResourceHandler.java
index 5c70f7a3e4..1e0eb7b52a 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/TestGpuResourceHandler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/TestGpuResourceHandler.java
@@ -20,7 +20,6 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.StringUtils;
-import org.apache.hadoop.yarn.api.protocolrecords.ResourceTypes;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ContainerId;
@@ -36,15 +35,17 @@
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerException;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuDevice;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuDiscoverer;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntimeConstants;
import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
-import org.apache.hadoop.yarn.util.resource.ResourceUtils;
import org.apache.hadoop.yarn.util.resource.TestResourceUtils;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
@@ -90,7 +91,7 @@ public void setup() {
@Test
public void testBootStrap() throws Exception {
Configuration conf = new YarnConfiguration();
- conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0");
+ conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0");
GpuDiscoverer.getInstance().initialize(conf);
@@ -104,8 +105,8 @@ private static ContainerId getContainerId(int id) {
.newInstance(ApplicationId.newInstance(1234L, 1), 1), id);
}
- private static Container mockContainerWithGpuRequest(int id,
- int numGpuRequest) {
+ private static Container mockContainerWithGpuRequest(int id, int numGpuRequest,
+ boolean dockerContainerEnabled) {
Container c = mock(Container.class);
when(c.getContainerId()).thenReturn(getContainerId(id));
@@ -115,29 +116,46 @@ private static Container mockContainerWithGpuRequest(int id,
res.setResourceValue(ResourceInformation.GPU_URI, numGpuRequest);
when(c.getResource()).thenReturn(res);
when(c.getResourceMappings()).thenReturn(resMapping);
+
+ ContainerLaunchContext clc = mock(ContainerLaunchContext.class);
+ Map env = new HashMap<>();
+ if (dockerContainerEnabled) {
+ env.put(ContainerRuntimeConstants.ENV_CONTAINER_TYPE, "docker");
+ }
+ when(clc.getEnvironment()).thenReturn(env);
+ when(c.getLaunchContext()).thenReturn(clc);
return c;
}
+ private static Container mockContainerWithGpuRequest(int id,
+ int numGpuRequest) {
+ return mockContainerWithGpuRequest(id, numGpuRequest, false);
+ }
+
private void verifyDeniedDevices(ContainerId containerId,
- List deniedDevices)
+ List deniedDevices)
throws ResourceHandlerException, PrivilegedOperationException {
verify(mockCGroupsHandler, times(1)).createCGroup(
CGroupsHandler.CGroupController.DEVICES, containerId.toString());
if (null != deniedDevices && !deniedDevices.isEmpty()) {
+ List deniedDevicesMinorNumber = new ArrayList<>();
+ for (GpuDevice deniedDevice : deniedDevices) {
+ deniedDevicesMinorNumber.add(deniedDevice.getMinorNumber());
+ }
verify(mockPrivilegedExecutor, times(1)).executePrivilegedOperation(
new PrivilegedOperation(PrivilegedOperation.OperationType.GPU, Arrays
.asList(GpuResourceHandlerImpl.CONTAINER_ID_CLI_OPTION,
containerId.toString(),
GpuResourceHandlerImpl.EXCLUDED_GPUS_CLI_OPTION,
- StringUtils.join(",", deniedDevices))), true);
+ StringUtils.join(",", deniedDevicesMinorNumber))), true);
}
}
- @Test
- public void testAllocation() throws Exception {
+ private void commonTestAllocation(boolean dockerContainerEnabled)
+ throws Exception {
Configuration conf = new YarnConfiguration();
- conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0,1,3,4");
+ conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:3,3:4");
GpuDiscoverer.getInstance().initialize(conf);
gpuResourceHandler.bootstrap(conf);
@@ -145,31 +163,52 @@ public void testAllocation() throws Exception {
gpuResourceHandler.getGpuAllocator().getAvailableGpus());
/* Start container 1, asks 3 containers */
- gpuResourceHandler.preStart(mockContainerWithGpuRequest(1, 3));
+ gpuResourceHandler.preStart(
+ mockContainerWithGpuRequest(1, 3, dockerContainerEnabled));
// Only device=4 will be blocked.
- verifyDeniedDevices(getContainerId(1), Arrays.asList(4));
+ if (dockerContainerEnabled) {
+ verifyDeniedDevices(getContainerId(1), Collections.emptyList());
+ } else{
+ verifyDeniedDevices(getContainerId(1), Arrays.asList(new GpuDevice(3,4)));
+ }
/* Start container 2, asks 2 containers. Excepted to fail */
boolean failedToAllocate = false;
try {
- gpuResourceHandler.preStart(mockContainerWithGpuRequest(2, 2));
+ gpuResourceHandler.preStart(
+ mockContainerWithGpuRequest(2, 2, dockerContainerEnabled));
} catch (ResourceHandlerException e) {
failedToAllocate = true;
}
Assert.assertTrue(failedToAllocate);
/* Start container 3, ask 1 container, succeeded */
- gpuResourceHandler.preStart(mockContainerWithGpuRequest(3, 1));
+ gpuResourceHandler.preStart(
+ mockContainerWithGpuRequest(3, 1, dockerContainerEnabled));
// devices = 0/1/3 will be blocked
- verifyDeniedDevices(getContainerId(3), Arrays.asList(0, 1, 3));
+ if (dockerContainerEnabled) {
+ verifyDeniedDevices(getContainerId(3), Collections.emptyList());
+ } else {
+ verifyDeniedDevices(getContainerId(3), Arrays
+ .asList(new GpuDevice(0, 0), new GpuDevice(1, 1),
+ new GpuDevice(2, 3)));
+ }
+
/* Start container 4, ask 0 container, succeeded */
- gpuResourceHandler.preStart(mockContainerWithGpuRequest(4, 0));
+ gpuResourceHandler.preStart(
+ mockContainerWithGpuRequest(4, 0, dockerContainerEnabled));
- // All devices will be blocked
- verifyDeniedDevices(getContainerId(4), Arrays.asList(0, 1, 3, 4));
+ if (dockerContainerEnabled) {
+ verifyDeniedDevices(getContainerId(4), Collections.emptyList());
+ } else{
+ // All devices will be blocked
+ verifyDeniedDevices(getContainerId(4), Arrays
+ .asList(new GpuDevice(0, 0), new GpuDevice(1, 1), new GpuDevice(2, 3),
+ new GpuDevice(3, 4)));
+ }
/* Release container-1, expect cgroups deleted */
gpuResourceHandler.postComplete(getContainerId(1));
@@ -188,12 +227,24 @@ public void testAllocation() throws Exception {
gpuResourceHandler.getGpuAllocator().getAvailableGpus());
}
+ @Test
+ public void testAllocationWhenDockerContainerEnabled() throws Exception {
+ // When docker container is enabled, no devices should be written to
+ // devices.deny.
+ commonTestAllocation(true);
+ }
+
+ @Test
+ public void testAllocation() throws Exception {
+ commonTestAllocation(false);
+ }
+
@SuppressWarnings("unchecked")
@Test
public void testAssignedGpuWillBeCleanedupWhenStoreOpFails()
throws Exception {
Configuration conf = new YarnConfiguration();
- conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0,1,3,4");
+ conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:3,3:4");
GpuDiscoverer.getInstance().initialize(conf);
gpuResourceHandler.bootstrap(conf);
@@ -202,7 +253,7 @@ public void testAssignedGpuWillBeCleanedupWhenStoreOpFails()
doThrow(new IOException("Exception ...")).when(mockNMStateStore)
.storeAssignedResources(
- any(ContainerId.class), anyString(), anyList());
+ any(Container.class), anyString(), anyList());
boolean exception = false;
/* Start container 1, asks 3 containers */
@@ -225,9 +276,12 @@ public void testAllocationWithoutAllowedGpus() throws Exception {
conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, " ");
GpuDiscoverer.getInstance().initialize(conf);
- gpuResourceHandler.bootstrap(conf);
- Assert.assertEquals(0,
- gpuResourceHandler.getGpuAllocator().getAvailableGpus());
+ try {
+ gpuResourceHandler.bootstrap(conf);
+ Assert.fail("Should fail because no GPU available");
+ } catch (ResourceHandlerException e) {
+ // Expected because of no resource available
+ }
/* Start container 1, asks 0 containers */
gpuResourceHandler.preStart(mockContainerWithGpuRequest(1, 0));
@@ -254,7 +308,7 @@ public void testAllocationWithoutAllowedGpus() throws Exception {
@Test
public void testAllocationStored() throws Exception {
Configuration conf = new YarnConfiguration();
- conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0,1,3,4");
+ conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:3,3:4");
GpuDiscoverer.getInstance().initialize(conf);
gpuResourceHandler.bootstrap(conf);
@@ -265,33 +319,33 @@ public void testAllocationStored() throws Exception {
Container container = mockContainerWithGpuRequest(1, 3);
gpuResourceHandler.preStart(container);
- verify(mockNMStateStore).storeAssignedResources(getContainerId(1),
- ResourceInformation.GPU_URI,
- Arrays.asList("0", "1", "3"));
-
- Assert.assertEquals(3, container.getResourceMappings()
- .getAssignedResources(ResourceInformation.GPU_URI).size());
+ verify(mockNMStateStore).storeAssignedResources(container,
+ ResourceInformation.GPU_URI, Arrays
+ .asList(new GpuDevice(0, 0), new GpuDevice(1, 1),
+ new GpuDevice(2, 3)));
// Only device=4 will be blocked.
- verifyDeniedDevices(getContainerId(1), Arrays.asList(4));
+ verifyDeniedDevices(getContainerId(1), Arrays.asList(new GpuDevice(3, 4)));
/* Start container 2, ask 0 container, succeeded */
container = mockContainerWithGpuRequest(2, 0);
gpuResourceHandler.preStart(container);
- verifyDeniedDevices(getContainerId(2), Arrays.asList(0, 1, 3, 4));
+ verifyDeniedDevices(getContainerId(2), Arrays
+ .asList(new GpuDevice(0, 0), new GpuDevice(1, 1), new GpuDevice(2, 3),
+ new GpuDevice(3, 4)));
Assert.assertEquals(0, container.getResourceMappings()
.getAssignedResources(ResourceInformation.GPU_URI).size());
// Store assigned resource will not be invoked.
verify(mockNMStateStore, never()).storeAssignedResources(
- eq(getContainerId(2)), eq(ResourceInformation.GPU_URI), anyList());
+ eq(container), eq(ResourceInformation.GPU_URI), anyList());
}
@Test
public void testRecoverResourceAllocation() throws Exception {
Configuration conf = new YarnConfiguration();
- conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0,1,3,4");
+ conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:3,3:4");
GpuDiscoverer.getInstance().initialize(conf);
gpuResourceHandler.bootstrap(conf);
@@ -302,7 +356,8 @@ public void testRecoverResourceAllocation() throws Exception {
ResourceMappings rmap = new ResourceMappings();
ResourceMappings.AssignedResources ar =
new ResourceMappings.AssignedResources();
- ar.updateAssignedResources(Arrays.asList("1", "3"));
+ ar.updateAssignedResources(
+ Arrays.asList(new GpuDevice(1, 1), new GpuDevice(2, 3)));
rmap.addAssignedResources(ResourceInformation.GPU_URI, ar);
when(nmContainer.getResourceMappings()).thenReturn(rmap);
@@ -312,12 +367,15 @@ public void testRecoverResourceAllocation() throws Exception {
// Reacquire container restore state of GPU Resource Allocator.
gpuResourceHandler.reacquireContainer(getContainerId(1));
- Map deviceAllocationMapping =
+ Map deviceAllocationMapping =
gpuResourceHandler.getGpuAllocator().getDeviceAllocationMapping();
Assert.assertEquals(2, deviceAllocationMapping.size());
Assert.assertTrue(
- deviceAllocationMapping.keySet().containsAll(Arrays.asList(1, 3)));
- Assert.assertEquals(deviceAllocationMapping.get(1), getContainerId(1));
+ deviceAllocationMapping.keySet().contains(new GpuDevice(1, 1)));
+ Assert.assertTrue(
+ deviceAllocationMapping.keySet().contains(new GpuDevice(2, 3)));
+ Assert.assertEquals(deviceAllocationMapping.get(new GpuDevice(1, 1)),
+ getContainerId(1));
// TEST CASE
// Try to reacquire a container but requested device is not in allowed list.
@@ -325,7 +383,8 @@ public void testRecoverResourceAllocation() throws Exception {
rmap = new ResourceMappings();
ar = new ResourceMappings.AssignedResources();
// id=5 is not in allowed list.
- ar.updateAssignedResources(Arrays.asList("4", "5"));
+ ar.updateAssignedResources(
+ Arrays.asList(new GpuDevice(3, 4), new GpuDevice(4, 5)));
rmap.addAssignedResources(ResourceInformation.GPU_URI, ar);
when(nmContainer.getResourceMappings()).thenReturn(rmap);
@@ -345,9 +404,10 @@ public void testRecoverResourceAllocation() throws Exception {
deviceAllocationMapping =
gpuResourceHandler.getGpuAllocator().getDeviceAllocationMapping();
Assert.assertEquals(2, deviceAllocationMapping.size());
- Assert.assertTrue(
- deviceAllocationMapping.keySet().containsAll(Arrays.asList(1, 3)));
- Assert.assertEquals(deviceAllocationMapping.get(1), getContainerId(1));
+ Assert.assertTrue(deviceAllocationMapping.keySet()
+ .containsAll(Arrays.asList(new GpuDevice(1, 1), new GpuDevice(2, 3))));
+ Assert.assertEquals(deviceAllocationMapping.get(new GpuDevice(1, 1)),
+ getContainerId(1));
// TEST CASE
// Try to reacquire a container but requested device is already assigned.
@@ -355,7 +415,8 @@ public void testRecoverResourceAllocation() throws Exception {
rmap = new ResourceMappings();
ar = new ResourceMappings.AssignedResources();
// id=3 is already assigned
- ar.updateAssignedResources(Arrays.asList("4", "3"));
+ ar.updateAssignedResources(
+ Arrays.asList(new GpuDevice(3, 4), new GpuDevice(2, 3)));
rmap.addAssignedResources("gpu", ar);
when(nmContainer.getResourceMappings()).thenReturn(rmap);
@@ -375,8 +436,9 @@ public void testRecoverResourceAllocation() throws Exception {
deviceAllocationMapping =
gpuResourceHandler.getGpuAllocator().getDeviceAllocationMapping();
Assert.assertEquals(2, deviceAllocationMapping.size());
- Assert.assertTrue(
- deviceAllocationMapping.keySet().containsAll(Arrays.asList(1, 3)));
- Assert.assertEquals(deviceAllocationMapping.get(1), getContainerId(1));
+ Assert.assertTrue(deviceAllocationMapping.keySet()
+ .containsAll(Arrays.asList(new GpuDevice(1, 1), new GpuDevice(2, 3))));
+ Assert.assertEquals(deviceAllocationMapping.get(new GpuDevice(1, 1)),
+ getContainerId(1));
}
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDelegatingLinuxContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDelegatingLinuxContainerRuntime.java
index 7f4bbc4d37..907b1222ed 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDelegatingLinuxContainerRuntime.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDelegatingLinuxContainerRuntime.java
@@ -50,7 +50,7 @@ public void testIsRuntimeAllowedDefault() throws Exception {
YarnConfiguration.DEFAULT_LINUX_CONTAINER_RUNTIME_ALLOWED_RUNTIMES[0]);
System.out.println(conf.get(
YarnConfiguration.LINUX_CONTAINER_RUNTIME_ALLOWED_RUNTIMES));
- delegatingLinuxContainerRuntime.initialize(conf);
+ delegatingLinuxContainerRuntime.initialize(conf, null);
assertTrue(delegatingLinuxContainerRuntime.isRuntimeAllowed(
LinuxContainerRuntimeConstants.RuntimeType.DEFAULT));
assertFalse(delegatingLinuxContainerRuntime.isRuntimeAllowed(
@@ -63,7 +63,7 @@ public void testIsRuntimeAllowedDefault() throws Exception {
public void testIsRuntimeAllowedDocker() throws Exception {
conf.set(YarnConfiguration.LINUX_CONTAINER_RUNTIME_ALLOWED_RUNTIMES,
"docker");
- delegatingLinuxContainerRuntime.initialize(conf);
+ delegatingLinuxContainerRuntime.initialize(conf, null);
assertTrue(delegatingLinuxContainerRuntime.isRuntimeAllowed(
LinuxContainerRuntimeConstants.RuntimeType.DOCKER));
assertFalse(delegatingLinuxContainerRuntime.isRuntimeAllowed(
@@ -76,7 +76,7 @@ public void testIsRuntimeAllowedDocker() throws Exception {
public void testIsRuntimeAllowedJavaSandbox() throws Exception {
conf.set(YarnConfiguration.LINUX_CONTAINER_RUNTIME_ALLOWED_RUNTIMES,
"javasandbox");
- delegatingLinuxContainerRuntime.initialize(conf);
+ delegatingLinuxContainerRuntime.initialize(conf, null);
assertTrue(delegatingLinuxContainerRuntime.isRuntimeAllowed(
LinuxContainerRuntimeConstants.RuntimeType.JAVASANDBOX));
assertFalse(delegatingLinuxContainerRuntime.isRuntimeAllowed(
@@ -89,7 +89,7 @@ public void testIsRuntimeAllowedJavaSandbox() throws Exception {
public void testIsRuntimeAllowedMultiple() throws Exception {
conf.set(YarnConfiguration.LINUX_CONTAINER_RUNTIME_ALLOWED_RUNTIMES,
"docker,javasandbox");
- delegatingLinuxContainerRuntime.initialize(conf);
+ delegatingLinuxContainerRuntime.initialize(conf, null);
assertTrue(delegatingLinuxContainerRuntime.isRuntimeAllowed(
LinuxContainerRuntimeConstants.RuntimeType.DOCKER));
assertTrue(delegatingLinuxContainerRuntime.isRuntimeAllowed(
@@ -102,7 +102,7 @@ public void testIsRuntimeAllowedMultiple() throws Exception {
public void testIsRuntimeAllowedAll() throws Exception {
conf.set(YarnConfiguration.LINUX_CONTAINER_RUNTIME_ALLOWED_RUNTIMES,
"default,docker,javasandbox");
- delegatingLinuxContainerRuntime.initialize(conf);
+ delegatingLinuxContainerRuntime.initialize(conf, null);
assertTrue(delegatingLinuxContainerRuntime.isRuntimeAllowed(
LinuxContainerRuntimeConstants.RuntimeType.DEFAULT));
assertTrue(delegatingLinuxContainerRuntime.isRuntimeAllowed(
@@ -116,7 +116,7 @@ public void testJavaSandboxNotAllowedButPermissive() throws Exception {
conf.set(YarnConfiguration.LINUX_CONTAINER_RUNTIME_ALLOWED_RUNTIMES,
"default,docker");
conf.set(YarnConfiguration.YARN_CONTAINER_SANDBOX, "permissive");
- delegatingLinuxContainerRuntime.initialize(conf);
+ delegatingLinuxContainerRuntime.initialize(conf, null);
ContainerRuntime runtime =
delegatingLinuxContainerRuntime.pickContainerRuntime(env);
assertTrue(runtime instanceof DefaultLinuxContainerRuntime);
@@ -129,7 +129,7 @@ public void testJavaSandboxNotAllowedButPermissiveDockerRequested()
conf.set(YarnConfiguration.LINUX_CONTAINER_RUNTIME_ALLOWED_RUNTIMES,
"default,docker");
conf.set(YarnConfiguration.YARN_CONTAINER_SANDBOX, "permissive");
- delegatingLinuxContainerRuntime.initialize(conf);
+ delegatingLinuxContainerRuntime.initialize(conf, null);
ContainerRuntime runtime =
delegatingLinuxContainerRuntime.pickContainerRuntime(env);
assertTrue(runtime instanceof DockerLinuxContainerRuntime);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java
index fbfee545f5..b5a64975e6 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java
@@ -20,15 +20,18 @@
package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime;
+import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.registry.client.binding.RegistryPathUtils;
import org.apache.hadoop.util.Shell;
+import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
+import org.apache.hadoop.yarn.server.nodemanager.Context;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperation;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationException;
@@ -36,6 +39,10 @@
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerRunCommand;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerVolumeCommand;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.DockerCommandPlugin;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePluginManager;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntimeConstants;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntimeContext;
@@ -48,22 +55,48 @@
import org.slf4j.LoggerFactory;
import java.io.File;
+import java.io.FileInputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Paths;
-import java.util.*;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
+import java.util.Random;
import java.util.Set;
-import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.*;
+import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.APPID;
+import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.CONTAINER_ID_STR;
+import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.CONTAINER_LOCAL_DIRS;
+import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.CONTAINER_LOG_DIRS;
+import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.CONTAINER_WORK_DIR;
+import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.FILECACHE_DIRS;
+import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.LOCALIZED_RESOURCES;
+import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.LOCAL_DIRS;
+import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.LOG_DIRS;
+import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.NM_PRIVATE_CONTAINER_SCRIPT_PATH;
+import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.NM_PRIVATE_TOKENS_PATH;
+import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.PID;
+import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.PID_FILE_PATH;
+import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.RESOURCES_OPTIONS;
+import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.RUN_AS_USER;
+import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.SIGNAL;
+import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.USER;
+import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.USER_LOCAL_DIRS;
import static org.mockito.Matchers.eq;
-import static org.mockito.Mockito.*;
+import static org.mockito.Mockito.any;
+import static org.mockito.Mockito.anyBoolean;
+import static org.mockito.Mockito.anyList;
+import static org.mockito.Mockito.anyMap;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
public class TestDockerContainerRuntime {
private static final Logger LOG =
@@ -217,7 +250,7 @@ private PrivilegedOperation capturePrivilegedOperation()
return opCaptor.getValue();
}
- @SuppressWarnings("unchecked")
+ @SuppressWarnings("unchecked")
private PrivilegedOperation capturePrivilegedOperationAndVerifyArgs()
throws PrivilegedOperationException {
@@ -288,7 +321,7 @@ public void testDockerContainerLaunch()
IOException {
DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime(
mockExecutor, mockCGroupsHandler);
- runtime.initialize(conf);
+ runtime.initialize(conf, null);
runtime.launchContainer(builder.build());
PrivilegedOperation op = capturePrivilegedOperationAndVerifyArgs();
@@ -343,7 +376,7 @@ public void testContainerLaunchWithUserRemapping()
DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime(
mockExecutor, mockCGroupsHandler);
- runtime.initialize(conf);
+ runtime.initialize(conf, null);
runtime.launchContainer(builder.build());
PrivilegedOperation op = capturePrivilegedOperationAndVerifyArgs();
@@ -425,7 +458,7 @@ public void testAllowedNetworksConfiguration() throws
DockerLinuxContainerRuntime runtime =
new DockerLinuxContainerRuntime(mockExecutor, mockCGroupsHandler);
- runtime.initialize(conf);
+ runtime.initialize(conf, null);
//invalid default network configuration - sdn2 is included in allowed
// networks
@@ -441,7 +474,7 @@ public void testAllowedNetworksConfiguration() throws
try {
runtime =
new DockerLinuxContainerRuntime(mockExecutor, mockCGroupsHandler);
- runtime.initialize(conf);
+ runtime.initialize(conf, null);
Assert.fail("Invalid default network configuration should did not "
+ "trigger initialization failure.");
} catch (ContainerExecutionException e) {
@@ -457,7 +490,7 @@ public void testAllowedNetworksConfiguration() throws
validDefaultNetwork);
runtime =
new DockerLinuxContainerRuntime(mockExecutor, mockCGroupsHandler);
- runtime.initialize(conf);
+ runtime.initialize(conf, null);
}
@Test
@@ -467,7 +500,7 @@ public void testContainerLaunchWithNetworkingDefaults()
PrivilegedOperationException {
DockerLinuxContainerRuntime runtime =
new DockerLinuxContainerRuntime(mockExecutor, mockCGroupsHandler);
- runtime.initialize(conf);
+ runtime.initialize(conf, null);
Random randEngine = new Random();
String disallowedNetwork = "sdn" + Integer.toString(randEngine.nextInt());
@@ -557,7 +590,7 @@ public void testContainerLaunchWithCustomNetworks()
customNetwork1);
//this should cause no failures.
- runtime.initialize(conf);
+ runtime.initialize(conf, null);
runtime.launchContainer(builder.build());
PrivilegedOperation op = capturePrivilegedOperationAndVerifyArgs();
List args = op.getArguments();
@@ -661,7 +694,7 @@ public void testLaunchPrivilegedContainersInvalidEnvVar()
IOException{
DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime(
mockExecutor, mockCGroupsHandler);
- runtime.initialize(conf);
+ runtime.initialize(conf, null);
env.put(DockerLinuxContainerRuntime
.ENV_DOCKER_CONTAINER_RUN_PRIVILEGED_CONTAINER, "invalid-value");
@@ -690,7 +723,7 @@ public void testLaunchPrivilegedContainersWithDisabledSetting()
IOException{
DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime(
mockExecutor, mockCGroupsHandler);
- runtime.initialize(conf);
+ runtime.initialize(conf, null);
env.put(DockerLinuxContainerRuntime
.ENV_DOCKER_CONTAINER_RUN_PRIVILEGED_CONTAINER, "true");
@@ -713,7 +746,7 @@ public void testLaunchPrivilegedContainersWithEnabledSettingAndDefaultACL()
DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime(
mockExecutor, mockCGroupsHandler);
- runtime.initialize(conf);
+ runtime.initialize(conf, null);
env.put(DockerLinuxContainerRuntime
.ENV_DOCKER_CONTAINER_RUN_PRIVILEGED_CONTAINER, "true");
@@ -743,7 +776,7 @@ public void testLaunchPrivilegedContainersWithEnabledSettingAndDefaultACL()
DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime(
mockExecutor, mockCGroupsHandler);
- runtime.initialize(conf);
+ runtime.initialize(conf, null);
env.put(DockerLinuxContainerRuntime
.ENV_DOCKER_CONTAINER_RUN_PRIVILEGED_CONTAINER, "true");
@@ -770,7 +803,7 @@ public void testLaunchPrivilegedContainersWithEnabledSettingAndDefaultACL()
DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime(
mockExecutor, mockCGroupsHandler);
- runtime.initialize(conf);
+ runtime.initialize(conf, null);
env.put(DockerLinuxContainerRuntime
.ENV_DOCKER_CONTAINER_RUN_PRIVILEGED_CONTAINER, "true");
@@ -822,7 +855,7 @@ public void testCGroupParent() throws ContainerExecutionException {
DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime
(mockExecutor, mockCGroupsHandler);
- runtime.initialize(conf);
+ runtime.initialize(conf, null);
String resourceOptionsNone = "cgroups=none";
DockerRunCommand command = Mockito.mock(DockerRunCommand.class);
@@ -849,7 +882,7 @@ public void testCGroupParent() throws ContainerExecutionException {
runtime = new DockerLinuxContainerRuntime
(mockExecutor, null);
- runtime.initialize(conf);
+ runtime.initialize(conf, null);
runtime.addCGroupParentIfRequired(resourceOptionsNone, containerIdStr,
command);
@@ -866,7 +899,7 @@ public void testMountSourceOnly()
IOException{
DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime(
mockExecutor, mockCGroupsHandler);
- runtime.initialize(conf);
+ runtime.initialize(conf, null);
env.put(
DockerLinuxContainerRuntime.ENV_DOCKER_CONTAINER_LOCAL_RESOURCE_MOUNTS,
@@ -886,7 +919,7 @@ public void testMountSourceTarget()
IOException{
DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime(
mockExecutor, mockCGroupsHandler);
- runtime.initialize(conf);
+ runtime.initialize(conf, null);
env.put(
DockerLinuxContainerRuntime.ENV_DOCKER_CONTAINER_LOCAL_RESOURCE_MOUNTS,
@@ -935,7 +968,7 @@ public void testMountInvalid()
IOException{
DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime(
mockExecutor, mockCGroupsHandler);
- runtime.initialize(conf);
+ runtime.initialize(conf, null);
env.put(
DockerLinuxContainerRuntime.ENV_DOCKER_CONTAINER_LOCAL_RESOURCE_MOUNTS,
@@ -955,7 +988,7 @@ public void testMountMultiple()
IOException{
DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime(
mockExecutor, mockCGroupsHandler);
- runtime.initialize(conf);
+ runtime.initialize(conf, null);
env.put(
DockerLinuxContainerRuntime.ENV_DOCKER_CONTAINER_LOCAL_RESOURCE_MOUNTS,
@@ -1011,7 +1044,7 @@ public void testContainerLivelinessCheck()
.setExecutionAttribute(USER, user)
.setExecutionAttribute(PID, signalPid)
.setExecutionAttribute(SIGNAL, ContainerExecutor.Signal.NULL);
- runtime.initialize(enableMockContainerExecutor(conf));
+ runtime.initialize(enableMockContainerExecutor(conf), null);
runtime.signalContainer(builder.build());
PrivilegedOperation op = capturePrivilegedOperation();
@@ -1071,7 +1104,7 @@ private List getDockerCommandsForSignal(
.setExecutionAttribute(USER, user)
.setExecutionAttribute(PID, signalPid)
.setExecutionAttribute(SIGNAL, signal);
- runtime.initialize(enableMockContainerExecutor(conf));
+ runtime.initialize(enableMockContainerExecutor(conf), null);
runtime.signalContainer(builder.build());
PrivilegedOperation op = capturePrivilegedOperation();
@@ -1148,4 +1181,127 @@ public void testDockerHostnamePattern() throws Exception {
}
}
}
+
+ @SuppressWarnings("unchecked")
+ private void checkVolumeCreateCommand()
+ throws PrivilegedOperationException, IOException {
+ ArgumentCaptor opCaptor = ArgumentCaptor.forClass(
+ PrivilegedOperation.class);
+
+ //single invocation expected
+ //due to type erasure + mocking, this verification requires a suppress
+ // warning annotation on the entire method
+ verify(mockExecutor, times(1))
+ .executePrivilegedOperation(anyList(), opCaptor.capture(), any(
+ File.class), anyMap(), anyBoolean(), anyBoolean());
+
+ //verification completed. we need to isolate specific invications.
+ // hence, reset mock here
+ Mockito.reset(mockExecutor);
+
+ PrivilegedOperation op = opCaptor.getValue();
+ Assert.assertEquals(PrivilegedOperation.OperationType
+ .RUN_DOCKER_CMD, op.getOperationType());
+
+ File commandFile = new File(StringUtils.join(",", op.getArguments()));
+ FileInputStream fileInputStream = new FileInputStream(commandFile);
+ String fileContent = new String(IOUtils.toByteArray(fileInputStream));
+ Assert.assertEquals("[docker-command-execution]\n"
+ + " docker-command=volume\n" + " sub-command=create\n"
+ + " volume=volume1\n", fileContent);
+ }
+
+ @Test
+ public void testDockerCommandPlugin() throws Exception {
+ DockerLinuxContainerRuntime runtime =
+ new DockerLinuxContainerRuntime(mockExecutor, mockCGroupsHandler);
+
+ Context nmContext = mock(Context.class);
+ ResourcePluginManager rpm = mock(ResourcePluginManager.class);
+ Map pluginsMap = new HashMap<>();
+ ResourcePlugin plugin1 = mock(ResourcePlugin.class);
+
+ // Create the docker command plugin logic, which will set volume driver
+ DockerCommandPlugin dockerCommandPlugin = new DockerCommandPlugin() {
+ @Override
+ public void updateDockerRunCommand(DockerRunCommand dockerRunCommand,
+ Container container) throws ContainerExecutionException {
+ dockerRunCommand.setVolumeDriver("driver-1");
+ dockerRunCommand.addReadOnlyMountLocation("/source/path",
+ "/destination/path", true);
+ }
+
+ @Override
+ public DockerVolumeCommand getCreateDockerVolumeCommand(Container container)
+ throws ContainerExecutionException {
+ return new DockerVolumeCommand("create").setVolumeName("volume1");
+ }
+
+ @Override
+ public DockerVolumeCommand getCleanupDockerVolumesCommand(Container container)
+ throws ContainerExecutionException {
+ return null;
+ }
+ };
+
+ when(plugin1.getDockerCommandPluginInstance()).thenReturn(
+ dockerCommandPlugin);
+ ResourcePlugin plugin2 = mock(ResourcePlugin.class);
+ pluginsMap.put("plugin1", plugin1);
+ pluginsMap.put("plugin2", plugin2);
+
+ when(rpm.getNameToPlugins()).thenReturn(pluginsMap);
+
+ when(nmContext.getResourcePluginManager()).thenReturn(rpm);
+
+ runtime.initialize(conf, nmContext);
+
+ ContainerRuntimeContext containerRuntimeContext = builder.build();
+
+ runtime.prepareContainer(containerRuntimeContext);
+ checkVolumeCreateCommand();
+
+ runtime.launchContainer(containerRuntimeContext);
+ PrivilegedOperation op = capturePrivilegedOperationAndVerifyArgs();
+ List args = op.getArguments();
+ String dockerCommandFile = args.get(11);
+
+ List dockerCommands = Files.readAllLines(Paths.get
+ (dockerCommandFile), Charset.forName("UTF-8"));
+
+ int expected = 15;
+ int counter = 0;
+ Assert.assertEquals(expected, dockerCommands.size());
+ Assert.assertEquals("[docker-command-execution]",
+ dockerCommands.get(counter++));
+ Assert.assertEquals(" cap-add=SYS_CHROOT,NET_BIND_SERVICE",
+ dockerCommands.get(counter++));
+ Assert.assertEquals(" cap-drop=ALL", dockerCommands.get(counter++));
+ Assert.assertEquals(" detach=true", dockerCommands.get(counter++));
+ Assert.assertEquals(" docker-command=run", dockerCommands.get(counter++));
+ Assert.assertEquals(" hostname=ctr-id", dockerCommands.get(counter++));
+ Assert
+ .assertEquals(" image=busybox:latest", dockerCommands.get(counter++));
+ Assert.assertEquals(
+ " launch-command=bash,/test_container_work_dir/launch_container.sh",
+ dockerCommands.get(counter++));
+ Assert.assertEquals(" name=container_id", dockerCommands.get(counter++));
+ Assert.assertEquals(" net=host", dockerCommands.get(counter++));
+ Assert.assertEquals(" ro-mounts=/source/path:/destination/path",
+ dockerCommands.get(counter++));
+ Assert.assertEquals(
+ " rw-mounts=/test_container_local_dir:/test_container_local_dir,"
+ + "/test_filecache_dir:/test_filecache_dir,"
+ + "/test_container_work_dir:/test_container_work_dir,"
+ + "/test_container_log_dir:/test_container_log_dir,"
+ + "/test_user_local_dir:/test_user_local_dir",
+ dockerCommands.get(counter++));
+ Assert.assertEquals(" user=run_as_user", dockerCommands.get(counter++));
+
+ // Verify volume-driver is set to expected value.
+ Assert.assertEquals(" volume-driver=driver-1",
+ dockerCommands.get(counter++));
+ Assert.assertEquals(" workdir=/test_container_work_dir",
+ dockerCommands.get(counter++));
+ }
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestJavaSandboxLinuxContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestJavaSandboxLinuxContainerRuntime.java
index bdd435eb2a..67252ea651 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestJavaSandboxLinuxContainerRuntime.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestJavaSandboxLinuxContainerRuntime.java
@@ -55,7 +55,6 @@
import java.util.regex.Pattern;
import static org.apache.hadoop.yarn.api.ApplicationConstants.Environment.JAVA_HOME;
-import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.JavaSandboxLinuxContainerRuntime.NMContainerPolicyUtils.LOG;
import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.JavaSandboxLinuxContainerRuntime.NMContainerPolicyUtils.MULTI_COMMAND_REGEX;
import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.JavaSandboxLinuxContainerRuntime.NMContainerPolicyUtils.CLEAN_CMD_REGEX;
import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.JavaSandboxLinuxContainerRuntime.NMContainerPolicyUtils.CONTAINS_JAVA_CMD;
@@ -134,7 +133,7 @@ public void setup() throws Exception {
mockExecutor = mock(PrivilegedOperationExecutor.class);
runtime = new JavaSandboxLinuxContainerRuntime(mockExecutor);
- runtime.initialize(conf);
+ runtime.initialize(conf, null);
resources = new HashMap<>();
grantDir = new File(baseTestDirectory, "grantDir");
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/TestDockerCommandExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/TestDockerCommandExecutor.java
index 05b44b8a93..c362787b5c 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/TestDockerCommandExecutor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/TestDockerCommandExecutor.java
@@ -85,7 +85,8 @@ public void setUp() throws Exception {
builder.setExecutionAttribute(CONTAINER_ID_STR, MOCK_CONTAINER_ID);
runtime.initialize(
- TestDockerContainerRuntime.enableMockContainerExecutor(configuration));
+ TestDockerContainerRuntime.enableMockContainerExecutor(configuration),
+ null);
}
@Test
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/TestDockerVolumeCommand.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/TestDockerVolumeCommand.java
new file mode 100644
index 0000000000..4d07c0a26b
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/TestDockerVolumeCommand.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestDockerVolumeCommand {
+ @Test
+ public void testDockerVolumeCommand() {
+ DockerVolumeCommand dockerVolumeCommand = new DockerVolumeCommand("create");
+ assertEquals("volume", dockerVolumeCommand.getCommandOption());
+ Assert.assertTrue(
+ dockerVolumeCommand.getDockerCommandWithArguments().get("sub-command")
+ .contains("create"));
+
+ dockerVolumeCommand.setDriverName("driver1");
+ dockerVolumeCommand.setVolumeName("volume1");
+
+ Assert.assertTrue(
+ dockerVolumeCommand.getDockerCommandWithArguments().get("driver")
+ .contains("driver1"));
+
+ Assert.assertTrue(
+ dockerVolumeCommand.getDockerCommandWithArguments().get("volume")
+ .contains("volume1"));
+ }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java
index 83bace2c65..4abb633a69 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java
@@ -101,23 +101,41 @@ public void testGpuDiscover() throws YarnException {
GpuDeviceInformation info = plugin.getGpuDeviceInformation();
Assert.assertTrue(info.getGpus().size() > 0);
- Assert.assertEquals(plugin.getMinorNumbersOfGpusUsableByYarn().size(),
+ Assert.assertEquals(plugin.getGpusUsableByYarn().size(),
info.getGpus().size());
}
@Test
public void getNumberOfUsableGpusFromConfig() throws YarnException {
Configuration conf = new Configuration(false);
- conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0,1,2,4");
+
+ // Illegal format
+ conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:2,3");
GpuDiscoverer plugin = new GpuDiscoverer();
+ try {
+ plugin.initialize(conf);
+ plugin.getGpusUsableByYarn();
+ Assert.fail("Illegal format, should fail.");
+ } catch (YarnException e) {
+ // Expected
+ }
+
+ // Valid format
+ conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:2,3:4");
+ plugin = new GpuDiscoverer();
plugin.initialize(conf);
- List minorNumbers = plugin.getMinorNumbersOfGpusUsableByYarn();
- Assert.assertEquals(4, minorNumbers.size());
+ List usableGpuDevices = plugin.getGpusUsableByYarn();
+ Assert.assertEquals(4, usableGpuDevices.size());
- Assert.assertTrue(0 == minorNumbers.get(0));
- Assert.assertTrue(1 == minorNumbers.get(1));
- Assert.assertTrue(2 == minorNumbers.get(2));
- Assert.assertTrue(4 == minorNumbers.get(3));
+ Assert.assertTrue(0 == usableGpuDevices.get(0).getIndex());
+ Assert.assertTrue(1 == usableGpuDevices.get(1).getIndex());
+ Assert.assertTrue(2 == usableGpuDevices.get(2).getIndex());
+ Assert.assertTrue(3 == usableGpuDevices.get(3).getIndex());
+
+ Assert.assertTrue(0 == usableGpuDevices.get(0).getMinorNumber());
+ Assert.assertTrue(1 == usableGpuDevices.get(1).getMinorNumber());
+ Assert.assertTrue(2 == usableGpuDevices.get(2).getMinorNumber());
+ Assert.assertTrue(4 == usableGpuDevices.get(3).getMinorNumber());
}
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestNvidiaDockerV1CommandPlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestNvidiaDockerV1CommandPlugin.java
new file mode 100644
index 0000000000..70578479ef
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestNvidiaDockerV1CommandPlugin.java
@@ -0,0 +1,217 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Sets;
+import com.sun.net.httpserver.HttpExchange;
+import com.sun.net.httpserver.HttpHandler;
+import com.sun.net.httpserver.HttpServer;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.api.records.ResourceInformation;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ResourceMappings;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerRunCommand;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerVolumeCommand;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.net.InetSocketAddress;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+public class TestNvidiaDockerV1CommandPlugin {
+ private Map> copyCommandLine(
+ Map> map) {
+ Map> ret = new HashMap<>();
+ for (Map.Entry> entry : map.entrySet()) {
+ ret.put(entry.getKey(), new ArrayList<>(entry.getValue()));
+ }
+ return ret;
+ }
+
+ private boolean commandlinesEquals(Map> cli1,
+ Map> cli2) {
+ if (!Sets.symmetricDifference(cli1.keySet(), cli2.keySet()).isEmpty()) {
+ return false;
+ }
+
+ for (String key : cli1.keySet()) {
+ List value1 = cli1.get(key);
+ List value2 = cli2.get(key);
+ if (!value1.equals(value2)) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ static class MyHandler implements HttpHandler {
+ String response = "This is the response";
+
+ @Override
+ public void handle(HttpExchange t) throws IOException {
+ t.sendResponseHeaders(200, response.length());
+ OutputStream os = t.getResponseBody();
+ os.write(response.getBytes());
+ os.close();
+ }
+ }
+
+ static class MyNvidiaDockerV1CommandPlugin
+ extends NvidiaDockerV1CommandPlugin {
+ private boolean requestsGpu = false;
+
+ public MyNvidiaDockerV1CommandPlugin(Configuration conf) {
+ super(conf);
+ }
+
+ public void setRequestsGpu(boolean r) {
+ requestsGpu = r;
+ }
+
+ @Override
+ protected boolean requestsGpu(Container container) {
+ return requestsGpu;
+ }
+ }
+
+ @Test
+ public void testPlugin() throws Exception {
+ Configuration conf = new Configuration();
+
+ DockerRunCommand runCommand = new DockerRunCommand("container_1", "user",
+ "fakeimage");
+
+ Map> originalCommandline = copyCommandLine(
+ runCommand.getDockerCommandWithArguments());
+
+ MyNvidiaDockerV1CommandPlugin
+ commandPlugin = new MyNvidiaDockerV1CommandPlugin(conf);
+
+ Container nmContainer = mock(Container.class);
+
+ // getResourceMapping is null, so commandline won't be updated
+ commandPlugin.updateDockerRunCommand(runCommand, nmContainer);
+ Assert.assertTrue(commandlinesEquals(originalCommandline,
+ runCommand.getDockerCommandWithArguments()));
+
+ // no GPU resource assigned, so commandline won't be updated
+ ResourceMappings resourceMappings = new ResourceMappings();
+ when(nmContainer.getResourceMappings()).thenReturn(resourceMappings);
+ commandPlugin.updateDockerRunCommand(runCommand, nmContainer);
+ Assert.assertTrue(commandlinesEquals(originalCommandline,
+ runCommand.getDockerCommandWithArguments()));
+
+ // Assign GPU resource, init will be invoked
+ ResourceMappings.AssignedResources assigned =
+ new ResourceMappings.AssignedResources();
+ assigned.updateAssignedResources(
+ ImmutableList.of(new GpuDevice(0, 0), new GpuDevice(1, 1)));
+ resourceMappings.addAssignedResources(ResourceInformation.GPU_URI,
+ assigned);
+
+ commandPlugin.setRequestsGpu(true);
+
+ // Since there's no HTTP server running, so we will see exception
+ boolean caughtException = false;
+ try {
+ commandPlugin.updateDockerRunCommand(runCommand, nmContainer);
+ } catch (ContainerExecutionException e) {
+ caughtException = true;
+ }
+ Assert.assertTrue(caughtException);
+
+ // Start HTTP server
+ MyHandler handler = new MyHandler();
+ HttpServer server = HttpServer.create(new InetSocketAddress(60111), 0);
+ server.createContext("/test", handler);
+ server.start();
+
+ String hostName = server.getAddress().getHostName();
+ int port = server.getAddress().getPort();
+ String httpUrl = "http://" + hostName + ":" + port + "/test";
+
+ conf.set(YarnConfiguration.NVIDIA_DOCKER_PLUGIN_V1_ENDPOINT, httpUrl);
+
+ commandPlugin = new MyNvidiaDockerV1CommandPlugin(conf);
+
+ // Start use invalid options
+ handler.response = "INVALID_RESPONSE";
+ try {
+ commandPlugin.updateDockerRunCommand(runCommand, nmContainer);
+ } catch (ContainerExecutionException e) {
+ caughtException = true;
+ }
+ Assert.assertTrue(caughtException);
+
+ // Start use invalid options
+ handler.response = "INVALID_RESPONSE";
+ try {
+ commandPlugin.updateDockerRunCommand(runCommand, nmContainer);
+ } catch (ContainerExecutionException e) {
+ caughtException = true;
+ }
+ Assert.assertTrue(caughtException);
+
+ /* Test get docker run command */
+ handler.response = "--device=/dev/nvidiactl --device=/dev/nvidia-uvm "
+ + "--device=/dev/nvidia0 --device=/dev/nvidia1 "
+ + "--volume-driver=nvidia-docker "
+ + "--volume=nvidia_driver_352.68:/usr/local/nvidia:ro";
+
+ commandPlugin.setRequestsGpu(true);
+ commandPlugin.updateDockerRunCommand(runCommand, nmContainer);
+ Map> newCommandLine =
+ runCommand.getDockerCommandWithArguments();
+
+ // Command line will be updated
+ Assert.assertFalse(commandlinesEquals(originalCommandline, newCommandLine));
+ // Volume driver should not be included by final commandline
+ Assert.assertFalse(newCommandLine.containsKey("volume-driver"));
+ Assert.assertTrue(newCommandLine.containsKey("devices"));
+ Assert.assertTrue(newCommandLine.containsKey("ro-mounts"));
+
+ /* Test get docker volume command */
+ commandPlugin = new MyNvidiaDockerV1CommandPlugin(conf);
+
+ // When requests Gpu == false, returned docker volume command is null,
+ Assert.assertNull(commandPlugin.getCreateDockerVolumeCommand(nmContainer));
+
+ // set requests Gpu to true
+ commandPlugin.setRequestsGpu(true);
+
+ DockerVolumeCommand dockerVolumeCommand = commandPlugin.getCreateDockerVolumeCommand(
+ nmContainer);
+ Assert.assertEquals(
+ "volume docker-command=volume " + "driver=nvidia-docker "
+ + "sub-command=create " + "volume=nvidia_driver_352.68",
+ dockerVolumeCommand.toString());
+ }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java
index eb222cd002..3dca3676b5 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java
@@ -42,6 +42,7 @@
import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
import org.apache.hadoop.yarn.server.api.records.MasterKey;
import org.apache.hadoop.yarn.server.api.records.impl.pb.MasterKeyPBImpl;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ResourceMappings;
public class NMMemoryStateStoreService extends NMStateStoreService {
@@ -503,14 +504,17 @@ public synchronized void removeAMRMProxyAppContext(
}
@Override
- public void storeAssignedResources(ContainerId containerId,
+ public void storeAssignedResources(Container container,
String resourceType, List assignedResources)
throws IOException {
ResourceMappings.AssignedResources ar =
new ResourceMappings.AssignedResources();
ar.updateAssignedResources(assignedResources);
- containerStates.get(containerId).getResourceMappings()
+ containerStates.get(container.getContainerId()).getResourceMappings()
.addAssignedResources(resourceType, ar);
+
+ // update container resource mapping.
+ updateContainerResourceMapping(container, resourceType, assignedResources);
}
private static class TrackerState {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java
index 1ff2119b2c..3cac5b40bc 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java
@@ -29,6 +29,7 @@
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.timeout;
import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
import java.io.File;
import java.io.IOException;
@@ -69,6 +70,8 @@
import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
import org.apache.hadoop.yarn.server.api.records.MasterKey;
import org.apache.hadoop.yarn.server.nodemanager.amrmproxy.AMRMProxyTokenSecretManager;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ResourceMappings;
import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService.LocalResourceTrackerState;
import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService.RecoveredAMRMProxyState;
import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService.RecoveredApplicationsState;
@@ -1124,16 +1127,21 @@ public void testStateStoreForResourceMapping() throws IOException {
ContainerId containerId = ContainerId.newContainerId(appAttemptId, 5);
storeMockContainer(containerId);
+ Container container = mock(Container.class);
+ when(container.getContainerId()).thenReturn(containerId);
+ ResourceMappings resourceMappings = new ResourceMappings();
+ when(container.getResourceMappings()).thenReturn(resourceMappings);
+
// Store ResourceMapping
- stateStore.storeAssignedResources(containerId, "gpu",
+ stateStore.storeAssignedResources(container, "gpu",
Arrays.asList("1", "2", "3"));
// This will overwrite above
List gpuRes1 = Arrays.asList("1", "2", "4");
- stateStore.storeAssignedResources(containerId, "gpu", gpuRes1);
+ stateStore.storeAssignedResources(container, "gpu", gpuRes1);
List fpgaRes = Arrays.asList("3", "4", "5", "6");
- stateStore.storeAssignedResources(containerId, "fpga", fpgaRes);
+ stateStore.storeAssignedResources(container, "fpga", fpgaRes);
List numaRes = Arrays.asList("numa1");
- stateStore.storeAssignedResources(containerId, "numa", numaRes);
+ stateStore.storeAssignedResources(container, "numa", numaRes);
// add a invalid key
restartStateStore();
@@ -1143,12 +1151,18 @@ public void testStateStoreForResourceMapping() throws IOException {
List res = rcs.getResourceMappings()
.getAssignedResources("gpu");
Assert.assertTrue(res.equals(gpuRes1));
+ Assert.assertTrue(
+ resourceMappings.getAssignedResources("gpu").equals(gpuRes1));
res = rcs.getResourceMappings().getAssignedResources("fpga");
Assert.assertTrue(res.equals(fpgaRes));
+ Assert.assertTrue(
+ resourceMappings.getAssignedResources("fpga").equals(fpgaRes));
res = rcs.getResourceMappings().getAssignedResources("numa");
Assert.assertTrue(res.equals(numaRes));
+ Assert.assertTrue(
+ resourceMappings.getAssignedResources("numa").equals(numaRes));
}
private StartContainerRequest storeMockContainer(ContainerId containerId)