YARN-8642. Add support for tmpfs mounts with the Docker runtime. Contributed by Craig Condit

This commit is contained in:
Shane Kumpf 2018-08-29 07:08:37 -06:00
parent a0ebb6b39f
commit 73625168c0
10 changed files with 317 additions and 2 deletions

View File

@ -2012,6 +2012,11 @@ public static boolean isAclEnabled(Configuration conf) {
public static final String NM_DOCKER_DEFAULT_RW_MOUNTS =
DOCKER_CONTAINER_RUNTIME_PREFIX + "default-rw-mounts";
/** The default list of tmpfs mounts to be mounted into all
* Docker containers that use DockerContainerRuntime. */
public static final String NM_DOCKER_DEFAULT_TMPFS_MOUNTS =
DOCKER_CONTAINER_RUNTIME_PREFIX + "default-tmpfs-mounts";
/** The mode in which the Java Container Sandbox should run detailed by
* the JavaSandboxLinuxContainerRuntime. */
public static final String YARN_CONTAINER_SANDBOX =

View File

@ -1827,6 +1827,13 @@
<value></value>
</property>
<property>
<description>The default list of tmpfs mounts to be mounted into all Docker
containers that use DockerContainerRuntime.</description>
<name>yarn.nodemanager.runtime.linux.docker.default-tmpfs-mounts</name>
<value></value>
</property>
<property>
<description>The mode in which the Java Container Sandbox should run detailed by
the JavaSandboxLinuxContainerRuntime.</description>

View File

@ -165,6 +165,11 @@
* {@code docker.allowed.ro-mounts} and {@code docker.allowed.rw-mounts}.
* </li>
* <li>
* {@code YARN_CONTAINER_RUNTIME_DOCKER_TMPFS_MOUNTS} allows users to
* specify additional tmpfs mounts for the Docker container. The value of
* the environment variable should be a comma-separated list of mounts.
* </li>
* <li>
* {@code YARN_CONTAINER_RUNTIME_DOCKER_DELAYED_REMOVAL} allows a user
* to request delayed deletion of the Docker containers on a per
* container basis. If true, Docker containers will not be removed until
@ -195,6 +200,8 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
private static final Pattern USER_MOUNT_PATTERN = Pattern.compile(
"(?<=^|,)([^:\\x00]+):([^:\\x00]+)" +
"(:(r[ow]|(r[ow][+])?(r?shared|r?slave|r?private)))?(?:,|$)");
private static final Pattern TMPFS_MOUNT_PATTERN = Pattern.compile(
"^/[^:\\x00]+$");
private static final int HOST_NAME_LENGTH = 64;
private static final String DEFAULT_PROCFS = "/proc";
@ -220,6 +227,9 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
public static final String ENV_DOCKER_CONTAINER_MOUNTS =
"YARN_CONTAINER_RUNTIME_DOCKER_MOUNTS";
@InterfaceAudience.Private
public static final String ENV_DOCKER_CONTAINER_TMPFS_MOUNTS =
"YARN_CONTAINER_RUNTIME_DOCKER_TMPFS_MOUNTS";
@InterfaceAudience.Private
public static final String ENV_DOCKER_CONTAINER_DELAYED_REMOVAL =
"YARN_CONTAINER_RUNTIME_DOCKER_DELAYED_REMOVAL";
private Configuration conf;
@ -238,6 +248,7 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
private int dockerStopGracePeriod;
private Set<String> defaultROMounts = new HashSet<>();
private Set<String> defaultRWMounts = new HashSet<>();
private Set<String> defaultTmpfsMounts = new HashSet<>();
/**
* Return whether the given environment variables indicate that the operation
@ -302,6 +313,7 @@ public void initialize(Configuration conf, Context nmContext)
allowedNetworks.clear();
defaultROMounts.clear();
defaultRWMounts.clear();
defaultTmpfsMounts.clear();
allowedNetworks.addAll(Arrays.asList(
conf.getTrimmedStrings(
YarnConfiguration.NM_DOCKER_ALLOWED_CONTAINER_NETWORKS,
@ -355,6 +367,10 @@ public void initialize(Configuration conf, Context nmContext)
defaultRWMounts.addAll(Arrays.asList(
conf.getTrimmedStrings(
YarnConfiguration.NM_DOCKER_DEFAULT_RW_MOUNTS)));
defaultTmpfsMounts.addAll(Arrays.asList(
conf.getTrimmedStrings(
YarnConfiguration.NM_DOCKER_DEFAULT_TMPFS_MOUNTS)));
}
private Set<String> getDockerCapabilitiesFromConf() throws
@ -907,6 +923,28 @@ public void launchContainer(ContainerRuntimeContext ctx)
}
}
if (environment.containsKey(ENV_DOCKER_CONTAINER_TMPFS_MOUNTS)) {
String[] tmpfsMounts = environment.get(ENV_DOCKER_CONTAINER_TMPFS_MOUNTS)
.split(",");
for (String mount : tmpfsMounts) {
if (!TMPFS_MOUNT_PATTERN.matcher(mount).matches()) {
throw new ContainerExecutionException("Invalid tmpfs mount : " +
mount);
}
runCommand.addTmpfsMount(mount);
}
}
if (defaultTmpfsMounts != null && !defaultTmpfsMounts.isEmpty()) {
for (String mount : defaultTmpfsMounts) {
if (!TMPFS_MOUNT_PATTERN.matcher(mount).matches()) {
throw new ContainerExecutionException("Invalid tmpfs mount : " +
mount);
}
runCommand.addTmpfsMount(mount);
}
}
if (allowHostPidNamespace(container)) {
runCommand.setPidNamespace("host");
}

View File

@ -104,6 +104,11 @@ public DockerRunCommand addAllReadOnlyMountLocations(List<String> paths) {
return this;
}
public DockerRunCommand addTmpfsMount(String mount) {
super.addCommandArguments("tmpfs", mount);
return this;
}
public DockerRunCommand setVolumeDriver(String volumeDriver) {
super.addCommandArguments("volume-driver", volumeDriver);
return this;

View File

@ -155,6 +155,12 @@ static int is_regex(const char *str) {
return (strncmp(str, "regex:", 6) == 0);
}
static int is_valid_tmpfs_mount(const char *mount) {
const char *regex_str = "^/[^:]+$";
// execute_regex_match return 0 is matched success
return execute_regex_match(regex_str, mount) == 0;
}
static int is_volume_name(const char *volume_name) {
const char *regex_str = "^[a-zA-Z0-9]([a-zA-Z0-9_.-]*)$";
// execute_regex_match return 0 is matched success
@ -334,6 +340,8 @@ const char *get_docker_error_message(const int error_code) {
return "Invalid pid namespace";
case INVALID_DOCKER_IMAGE_TRUST:
return "Docker image is not trusted";
case INVALID_DOCKER_TMPFS_MOUNT:
return "Invalid docker tmpfs mount";
default:
return "Unknown error";
}
@ -1129,6 +1137,35 @@ static char* get_mount_type(const char *mount) {
return mount_type;
}
static int add_tmpfs_mounts(const struct configuration *command_config, args *args) {
char **values = get_configuration_values_delimiter("tmpfs", DOCKER_COMMAND_FILE_SECTION, command_config, ",");
int i = 0, ret = 0;
if (values == NULL) {
goto free_and_exit;
}
for (i = 0; values[i] != NULL; i++) {
if (!is_valid_tmpfs_mount(values[i])) {
fprintf(ERRORFILE, "Invalid docker tmpfs mount '%s'\n", values[i]);
ret = INVALID_DOCKER_TMPFS_MOUNT;
goto free_and_exit;
}
ret = add_to_args(args, "--tmpfs");
if (ret != 0) {
ret = BUFFER_TOO_SMALL;
goto free_and_exit;
}
ret = add_to_args(args, values[i]);
if (ret != 0) {
ret = BUFFER_TOO_SMALL;
goto free_and_exit;
}
}
free_and_exit:
free_values(values);
return ret;
}
static int add_mounts(const struct configuration *command_config, const struct configuration *conf, args *args) {
const char *tmp_path_buffer[2] = {NULL, NULL};
char *mount_src = NULL;
@ -1469,6 +1506,11 @@ int get_docker_run_command(const char *command_file, const struct configuration
goto free_and_exit;
}
ret = add_tmpfs_mounts(&command_config, args);
if (ret != 0) {
goto free_and_exit;
}
ret = set_cgroup_parent(&command_config, args);
if (ret != 0) {
goto free_and_exit;

View File

@ -66,7 +66,8 @@ enum docker_error_codes {
INVALID_DOCKER_VOLUME_COMMAND,
PID_HOST_DISABLED,
INVALID_PID_NAMESPACE,
INVALID_DOCKER_IMAGE_TRUST
INVALID_DOCKER_IMAGE_TRUST,
INVALID_DOCKER_TMPFS_MOUNT
};
/**

View File

@ -1185,6 +1185,70 @@ namespace ContainerExecutor {
free_configuration(&container_cfg);
}
TEST_F(TestDockerUtil, test_add_tmpfs_mounts) {
struct configuration cmd_cfg;
struct args buff = ARGS_INITIAL_VALUE;
int ret = 0;
std::string container_executor_cfg_contents = "[docker]\n docker.trusted.registries=hadoop\n";
std::vector<std::pair<std::string, std::string> > file_cmd_vec;
file_cmd_vec.push_back(std::make_pair<std::string, std::string>(
"[docker-command-execution]\n docker-command=run\n image=hadoop/image\n tmpfs=/run",
"--tmpfs /run"));
file_cmd_vec.push_back(std::make_pair<std::string, std::string>(
"[docker-command-execution]\n docker-command=run\n image=hadoop/image\n tmpfs=/run,/run2",
"--tmpfs /run --tmpfs /run2"));
write_container_executor_cfg(container_executor_cfg_contents);
ret = create_ce_file();
if (ret != 0) {
std::cerr << "Could not create ce file, skipping test" << std::endl;
return;
}
std::vector<std::pair<std::string, std::string> >::const_iterator itr;
for (itr = file_cmd_vec.begin(); itr != file_cmd_vec.end(); ++itr) {
write_command_file(itr->first);
ret = read_config(docker_command_file.c_str(), &cmd_cfg);
if (ret != 0) {
FAIL();
}
ret = add_tmpfs_mounts(&cmd_cfg, &buff);
char *actual = flatten(&buff);
ASSERT_EQ(0, ret);
ASSERT_STREQ(itr->second.c_str(), actual);
reset_args(&buff);
free(actual);
free_configuration(&cmd_cfg);
}
std::vector<std::pair<std::string, int> > bad_file_cmds_vec;
bad_file_cmds_vec.push_back(std::make_pair<std::string, int>(
"[docker-command-execution]\n docker-command=run\n image=hadoop/image\n tmpfs=run",
static_cast<int>(INVALID_DOCKER_TMPFS_MOUNT)));
bad_file_cmds_vec.push_back(std::make_pair<std::string, int>(
"[docker-command-execution]\n docker-command=run\n image=hadoop/image\n tmpfs=/ru:n",
static_cast<int>(INVALID_DOCKER_TMPFS_MOUNT)));
bad_file_cmds_vec.push_back(std::make_pair<std::string, int>(
"[docker-command-execution]\n docker-command=run\n image=hadoop/image\n tmpfs=/run:",
static_cast<int>(INVALID_DOCKER_TMPFS_MOUNT)));
std::vector<std::pair<std::string, int> >::const_iterator itr2;
for (itr2 = bad_file_cmds_vec.begin(); itr2 != bad_file_cmds_vec.end(); ++itr2) {
write_command_file(itr2->first);
ret = read_config(docker_command_file.c_str(), &cmd_cfg);
if (ret != 0) {
FAIL();
}
ret = add_tmpfs_mounts(&cmd_cfg, &buff);
char *actual = flatten(&buff);
ASSERT_EQ(itr2->second, ret);
ASSERT_STREQ("", actual);
reset_args(&buff);
free(actual);
free_configuration(&cmd_cfg);
}
}
TEST_F(TestDockerUtil, test_docker_run_privileged) {
std::string container_executor_contents = "[docker]\n docker.allowed.ro-mounts=/var,/etc,/usr/bin/cut\n"

View File

@ -86,6 +86,7 @@
import static org.apache.hadoop.yarn.conf.YarnConfiguration.NM_DOCKER_DEFAULT_RO_MOUNTS;
import static org.apache.hadoop.yarn.conf.YarnConfiguration.NM_DOCKER_DEFAULT_RW_MOUNTS;
import static org.apache.hadoop.yarn.conf.YarnConfiguration.NM_DOCKER_DEFAULT_TMPFS_MOUNTS;
import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.DockerLinuxContainerRuntime.ENV_DOCKER_CONTAINER_RUN_PRIVILEGED_CONTAINER;
import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.APPID;
import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.APPLICATION_LOCAL_DIRS;
@ -1327,6 +1328,154 @@ public void testUserMountModeNulInvalid() throws ContainerExecutionException {
}
}
@Test
public void testTmpfsMount()
throws ContainerExecutionException, PrivilegedOperationException,
IOException {
DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime(
mockExecutor, mockCGroupsHandler);
runtime.initialize(conf, nmContext);
env.put(
DockerLinuxContainerRuntime.ENV_DOCKER_CONTAINER_TMPFS_MOUNTS,
"/run");
runtime.launchContainer(builder.build());
PrivilegedOperation op = capturePrivilegedOperationAndVerifyArgs();
List<String> args = op.getArguments();
String dockerCommandFile = args.get(11);
List<String> dockerCommands = Files.readAllLines(
Paths.get(dockerCommandFile), Charset.forName("UTF-8"));
Assert.assertTrue(dockerCommands.contains(" tmpfs=/run"));
}
@Test
public void testTmpfsMountMulti()
throws ContainerExecutionException, PrivilegedOperationException,
IOException {
DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime(
mockExecutor, mockCGroupsHandler);
runtime.initialize(conf, nmContext);
env.put(
DockerLinuxContainerRuntime.ENV_DOCKER_CONTAINER_TMPFS_MOUNTS,
"/run,/tmp");
runtime.launchContainer(builder.build());
PrivilegedOperation op = capturePrivilegedOperationAndVerifyArgs();
List<String> args = op.getArguments();
String dockerCommandFile = args.get(11);
List<String> dockerCommands = Files.readAllLines(
Paths.get(dockerCommandFile), Charset.forName("UTF-8"));
Assert.assertTrue(dockerCommands.contains(" tmpfs=/run,/tmp"));
}
@Test
public void testDefaultTmpfsMounts()
throws ContainerExecutionException, PrivilegedOperationException,
IOException {
conf.setStrings(NM_DOCKER_DEFAULT_TMPFS_MOUNTS, "/run,/var/run");
DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime(
mockExecutor, mockCGroupsHandler);
runtime.initialize(conf, nmContext);
env.put(
DockerLinuxContainerRuntime.ENV_DOCKER_CONTAINER_TMPFS_MOUNTS,
"/tmpfs");
runtime.launchContainer(builder.build());
PrivilegedOperation op = capturePrivilegedOperationAndVerifyArgs();
List<String> args = op.getArguments();
String dockerCommandFile = args.get(11);
List<String> dockerCommands = Files.readAllLines(
Paths.get(dockerCommandFile), Charset.forName("UTF-8"));
Assert.assertTrue(dockerCommands.contains(" tmpfs=/tmpfs,/run,/var/run"));
}
@Test
public void testDefaultTmpfsMountsInvalid()
throws ContainerExecutionException {
conf.setStrings(NM_DOCKER_DEFAULT_TMPFS_MOUNTS, "run,var/run");
DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime(
mockExecutor, mockCGroupsHandler);
runtime.initialize(conf, nmContext);
env.put(
DockerLinuxContainerRuntime.ENV_DOCKER_CONTAINER_TMPFS_MOUNTS,
"/tmpfs");
try {
runtime.launchContainer(builder.build());
Assert.fail(
"Expected a launch container failure due to non-absolute path.");
} catch (ContainerExecutionException e) {
LOG.info("Caught expected exception : " + e);
}
}
@Test
public void testTmpfsRelativeInvalid() throws ContainerExecutionException {
DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime(
mockExecutor, mockCGroupsHandler);
runtime.initialize(conf, nmContext);
env.put(
DockerLinuxContainerRuntime.ENV_DOCKER_CONTAINER_TMPFS_MOUNTS,
"run");
try {
runtime.launchContainer(builder.build());
Assert.fail(
"Expected a launch container failure due to non-absolute path.");
} catch (ContainerExecutionException e) {
LOG.info("Caught expected exception : " + e);
}
}
@Test
public void testTmpfsColonInvalid() throws ContainerExecutionException {
DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime(
mockExecutor, mockCGroupsHandler);
runtime.initialize(conf, nmContext);
env.put(
DockerLinuxContainerRuntime.ENV_DOCKER_CONTAINER_TMPFS_MOUNTS,
"/run:");
try {
runtime.launchContainer(builder.build());
Assert.fail(
"Expected a launch container failure due to invalid character.");
} catch (ContainerExecutionException e) {
LOG.info("Caught expected exception : " + e);
}
}
@Test
public void testTmpfsNulInvalid() throws ContainerExecutionException {
DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime(
mockExecutor, mockCGroupsHandler);
runtime.initialize(conf, nmContext);
env.put(
DockerLinuxContainerRuntime.ENV_DOCKER_CONTAINER_TMPFS_MOUNTS,
"/ru\0n");
try {
runtime.launchContainer(builder.build());
Assert.fail(
"Expected a launch container failure due to NUL in tmpfs mount.");
} catch (ContainerExecutionException e) {
LOG.info("Caught expected exception : " + e);
}
}
@Test
public void testDefaultROMounts()
throws ContainerExecutionException, PrivilegedOperationException,

View File

@ -58,6 +58,7 @@ public void testCommandArguments() {
commands.add("launch_command");
dockerRunCommand.setOverrideCommandWithArgs(commands);
dockerRunCommand.removeContainerOnExit();
dockerRunCommand.addTmpfsMount("/run");
assertEquals("run", StringUtils.join(",",
dockerRunCommand.getDockerCommandWithArguments()
@ -76,7 +77,9 @@ public void testCommandArguments() {
assertEquals("launch_command", StringUtils.join(",",
dockerRunCommand.getDockerCommandWithArguments()
.get("launch-command")));
assertEquals(7, dockerRunCommand.getDockerCommandWithArguments().size());
assertEquals("/run", StringUtils.join(",",
dockerRunCommand.getDockerCommandWithArguments().get("tmpfs")));
assertEquals(8, dockerRunCommand.getDockerCommandWithArguments().size());
}
@Test

View File

@ -350,6 +350,7 @@ environment variables in the application's environment:
| `YARN_CONTAINER_RUNTIME_DOCKER_CONTAINER_PID_NAMESPACE` | Controls which PID namespace will be used by the Docker container. By default, each Docker container has its own PID namespace. To share the namespace of the host, the yarn.nodemanager.runtime.linux.docker.host-pid-namespace.allowed property must be set to true. If the host PID namespace is allowed and this environment variable is set to host, the Docker container will share the host's PID namespace. No other value is allowed. |
| `YARN_CONTAINER_RUNTIME_DOCKER_RUN_PRIVILEGED_CONTAINER` | Controls whether the Docker container is a privileged container. In order to use privileged containers, the yarn.nodemanager.runtime.linux.docker.privileged-containers.allowed property must be set to true, and the application owner must appear in the value of the yarn.nodemanager.runtime.linux.docker.privileged-containers.acl property. If this environment variable is set to true, a privileged Docker container will be used if allowed. No other value is allowed, so the environment variable should be left unset rather than setting it to false. |
| `YARN_CONTAINER_RUNTIME_DOCKER_MOUNTS` | Adds additional volume mounts to the Docker container. The value of the environment variable should be a comma-separated list of mounts. All such mounts must be given as `source:dest[:mode]` and the mode must be "ro" (read-only) or "rw" (read-write) to specify the type of access being requested. If neither is specified, read-write will be assumed. The mode may include a bind propagation option. In that case, the mode should either be of the form `[option]`, `rw+[option]`, or `ro+[option]`. Valid bind propagation options are shared, rshared, slave, rslave, private, and rprivate. The requested mounts will be validated by container-executor based on the values set in container-executor.cfg for `docker.allowed.ro-mounts` and `docker.allowed.rw-mounts`. |
| `YARN_CONTAINER_RUNTIME_DOCKER_TMPFS_MOUNTS` | Adds additional tmpfs mounts to the Docker container. The value of the environment variable should be a comma-separated list of absolute mount points within the container. |
| `YARN_CONTAINER_RUNTIME_DOCKER_DELAYED_REMOVAL` | Allows a user to request delayed deletion of the Docker container on a per container basis. If true, Docker containers will not be removed until the duration defined by yarn.nodemanager.delete.debug-delay-sec has elapsed. Administrators can disable this feature through the yarn-site property yarn.nodemanager.runtime.linux.docker.delayed-removal.allowed. This feature is disabled by default. When this feature is disabled or set to false, the container will be removed as soon as it exits. |
The first two are required. The remainder can be set as needed. While