YARN-4267. Add additional logging to container launch implementations in container-executor. Contributed by Sidharta Seethana.
This commit is contained in:
parent
e39ae0e676
commit
151eab2a12
@ -519,6 +519,10 @@ Release 2.8.0 - UNRELEASED
|
||||
YARN-4170. AM need to be notified with priority in AllocateResponse.
|
||||
(Sunil G via jianhe)
|
||||
|
||||
YARN-4267. Add additional logging to container launch implementations in
|
||||
container-executor. (Sidharta Seethana via vvasudev)
|
||||
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
YARN-3339. TestDockerContainerExecutor should pull a single image and not
|
||||
|
@ -1225,6 +1225,8 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
|
||||
if (docker_binary == NULL) {
|
||||
docker_binary = "docker";
|
||||
}
|
||||
|
||||
fprintf(LOGFILE, "Creating script paths...\n");
|
||||
exit_code = create_script_paths(
|
||||
work_dir, script_name, cred_file, &script_file_dest, &cred_file_dest,
|
||||
&container_file_source, &cred_file_source);
|
||||
@ -1235,6 +1237,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
|
||||
}
|
||||
gid_t user_gid = getegid();
|
||||
|
||||
fprintf(LOGFILE, "Creating local dirs...\n");
|
||||
exit_code = create_local_dirs(user, app_id, container_id,
|
||||
work_dir, script_name, cred_file, local_dirs, log_dirs,
|
||||
1, script_file_dest, cred_file_dest,
|
||||
@ -1245,6 +1248,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
fprintf(LOGFILE, "Getting exit code file...\n");
|
||||
exit_code_file = get_exit_code_file(pid_file);
|
||||
if (NULL == exit_code_file) {
|
||||
exit_code = OUT_OF_MEMORY;
|
||||
@ -1253,6 +1257,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
fprintf(LOGFILE, "Changing effective user to root...\n");
|
||||
if (change_effective_user(0, user_gid) != 0) {
|
||||
fprintf(ERRORFILE, "Could not change to effective users %d, %d\n", 0, user_gid);
|
||||
fflush(ERRORFILE);
|
||||
@ -1261,6 +1266,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
|
||||
|
||||
snprintf(docker_command_with_binary, EXECUTOR_PATH_MAX, "%s %s", docker_binary, docker_command);
|
||||
|
||||
fprintf(LOGFILE, "Launching docker container...\n");
|
||||
FILE* start_docker = popen(docker_command_with_binary, "r");
|
||||
if (pclose (start_docker) != 0)
|
||||
{
|
||||
@ -1275,6 +1281,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
|
||||
"%s inspect --format {{.State.Pid}} %s",
|
||||
docker_binary, container_id);
|
||||
|
||||
fprintf(LOGFILE, "Inspecting docker container...\n");
|
||||
FILE* inspect_docker = popen(docker_inspect_command, "r");
|
||||
int pid = 0;
|
||||
int res = fscanf (inspect_docker, "%d", &pid);
|
||||
@ -1288,6 +1295,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
|
||||
}
|
||||
|
||||
if (pid != 0) {
|
||||
fprintf(LOGFILE, "Writing to cgroup task files...\n");
|
||||
// cgroups-based resource enforcement
|
||||
if (resources_key != NULL && ! strcmp(resources_key, "cgroups")) {
|
||||
// write pid to cgroups
|
||||
@ -1301,7 +1309,9 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// write pid to pidfile
|
||||
fprintf(LOGFILE, "Writing pid file...\n");
|
||||
if (pid_file == NULL
|
||||
|| write_pid_to_file_as_nm(pid_file, (pid_t)pid) != 0) {
|
||||
exit_code = WRITE_PIDFILE_FAILED;
|
||||
@ -1313,6 +1323,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
|
||||
snprintf(docker_wait_command, EXECUTOR_PATH_MAX,
|
||||
"%s wait %s", docker_binary, container_id);
|
||||
|
||||
fprintf(LOGFILE, "Waiting for docker container to finish...\n");
|
||||
FILE* wait_docker = popen(docker_wait_command, "r");
|
||||
res = fscanf (wait_docker, "%d", &exit_code);
|
||||
if (pclose (wait_docker) != 0 || res <= 0) {
|
||||
@ -1321,6 +1332,8 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
|
||||
fflush(ERRORFILE);
|
||||
}
|
||||
if(exit_code != 0) {
|
||||
fprintf(ERRORFILE, "Docker container exit code was not zero: %d\n",
|
||||
exit_code);
|
||||
snprintf(docker_logs_command, EXECUTOR_PATH_MAX, "%s logs --tail=250 %s",
|
||||
docker_binary, container_id);
|
||||
FILE* logs = popen(docker_logs_command, "r");
|
||||
@ -1350,6 +1363,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(LOGFILE, "Removing docker container post-exit...\n");
|
||||
snprintf(docker_rm_command, EXECUTOR_PATH_MAX,
|
||||
"%s rm %s", docker_binary, container_id);
|
||||
FILE* rm_docker = popen(docker_rm_command, "w");
|
||||
@ -1399,7 +1413,7 @@ int launch_container_as_user(const char *user, const char *app_id,
|
||||
char *cred_file_dest = NULL;
|
||||
char *exit_code_file = NULL;
|
||||
|
||||
|
||||
fprintf(LOGFILE, "Getting exit code file...\n");
|
||||
exit_code_file = get_exit_code_file(pid_file);
|
||||
if (NULL == exit_code_file) {
|
||||
exit_code = OUT_OF_MEMORY;
|
||||
@ -1408,6 +1422,8 @@ int launch_container_as_user(const char *user, const char *app_id,
|
||||
|
||||
int container_file_source =-1;
|
||||
int cred_file_source = -1;
|
||||
|
||||
fprintf(LOGFILE, "Creating script paths...\n");
|
||||
exit_code = create_script_paths(
|
||||
work_dir, script_name, cred_file, &script_file_dest, &cred_file_dest,
|
||||
&container_file_source, &cred_file_source);
|
||||
@ -1431,6 +1447,7 @@ int launch_container_as_user(const char *user, const char *app_id,
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
fprintf(LOGFILE, "Writing pid file...\n");
|
||||
// write pid to pidfile
|
||||
if (pid_file == NULL
|
||||
|| write_pid_to_file_as_nm(pid_file, pid) != 0) {
|
||||
@ -1438,6 +1455,7 @@ int launch_container_as_user(const char *user, const char *app_id,
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
fprintf(LOGFILE, "Writing to cgroup task files...\n");
|
||||
// cgroups-based resource enforcement
|
||||
if (resources_key != NULL && ! strcmp(resources_key, "cgroups")) {
|
||||
// write pid to cgroups
|
||||
@ -1452,6 +1470,7 @@ int launch_container_as_user(const char *user, const char *app_id,
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(LOGFILE, "Creating local dirs...\n");
|
||||
exit_code = create_local_dirs(user, app_id, container_id,
|
||||
work_dir, script_name, cred_file, local_dirs, log_dirs,
|
||||
0, script_file_dest, cred_file_dest,
|
||||
@ -1462,6 +1481,8 @@ int launch_container_as_user(const char *user, const char *app_id,
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
fprintf(LOGFILE, "Launching container...\n");
|
||||
|
||||
#if HAVE_FCLOSEALL
|
||||
fcloseall();
|
||||
#else
|
||||
|
Loading…
Reference in New Issue
Block a user