YARN-11689. Update the cgroup v2 init error handling (#6810)

This commit is contained in:
Benjamin Teke 2024-05-13 12:56:26 +02:00 committed by GitHub
parent b5a90d9500
commit ce7d01fac8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 35 additions and 26 deletions

View File

@ -358,14 +358,14 @@ private void initializePreMountedCGroupController(CGroupController controller)
} else {
// Unexpected: we just checked that it was missing
throw new ResourceHandlerException(getErrorWithDetails(
"Unexpected: Cannot create yarn cgroup",
"Unexpected: Cannot create yarn cgroup hierarchy",
subsystemName,
yarnHierarchy.getAbsolutePath()
));
}
} catch (SecurityException e) {
throw new ResourceHandlerException(getErrorWithDetails(
"No permissions to create yarn cgroup",
"No permissions to create yarn cgroup hierarchy",
subsystemName,
yarnHierarchy.getAbsolutePath()
), e);
@ -378,15 +378,7 @@ private void initializePreMountedCGroupController(CGroupController controller)
));
}
try {
updateEnabledControllersInHierarchy(yarnHierarchy, controller);
} catch (ResourceHandlerException e) {
throw new ResourceHandlerException(getErrorWithDetails(
"Failed to update cgroup.subtree_control in yarn hierarchy",
subsystemName,
yarnHierarchy.getAbsolutePath()
));
}
updateEnabledControllersInHierarchy(yarnHierarchy, controller);
}
protected abstract void updateEnabledControllersInHierarchy(
@ -401,7 +393,7 @@ protected abstract void updateEnabledControllersInHierarchy(
* @param yarnCgroupPath cgroup path that failed
* @return a string builder that can be appended by the caller
*/
private String getErrorWithDetails(
protected String getErrorWithDetails(
String errorMessage,
String subsystemName,
String yarnCgroupPath) {

View File

@ -97,10 +97,8 @@ protected List<CGroupController> getCGroupControllers() {
@Override
protected Map<String, Set<String>> parsePreConfiguredMountPath() throws IOException {
Map<String, Set<String>> controllerMappings = new HashMap<>();
String controllerPath = this.cGroupsMountConfig.getMountPath() +
Path.SEPARATOR + this.cGroupPrefix;
controllerMappings.put(this.cGroupsMountConfig.getMountPath(),
readControllersFile(controllerPath));
readControllersFile(this.cGroupsMountConfig.getMountPath()));
return controllerMappings;
}
@ -171,19 +169,32 @@ protected void updateEnabledControllersInHierarchy(
try {
Set<String> enabledControllers = readControllersFile(yarnHierarchy.getAbsolutePath());
if (!enabledControllers.contains(controller.getName())) {
throw new ResourceHandlerException(String.format(
String errorMsg = String.format(
"The controller %s is not enabled in the cgroup hierarchy: %s. Please enable it in " +
"in the %s/cgroup.subtree_control file.",
controller.getName(), yarnHierarchy.getAbsolutePath(),
yarnHierarchy.getParentFile().getAbsolutePath()));
yarnHierarchy.getParentFile().getAbsolutePath());
throw new ResourceHandlerException(getErrorWithDetails(
errorMsg, controller.getName(),
yarnHierarchy.getAbsolutePath()));
}
File subtreeControlFile = new File(yarnHierarchy.getAbsolutePath()
+ Path.SEPARATOR + CGROUP_SUBTREE_CONTROL_FILE);
if (!subtreeControlFile.exists()) {
throw new ResourceHandlerException(
"No subtree control file found in the cgroup hierarchy: " +
yarnHierarchy.getAbsolutePath());
String errorMsg = "No subtree control file found in the cgroup hierarchy: " +
yarnHierarchy.getAbsolutePath();
throw new ResourceHandlerException(getErrorWithDetails(
errorMsg, controller.getName(),
yarnHierarchy.getAbsolutePath()));
}
if (!subtreeControlFile.canWrite()) {
String errorMsg = "Cannot write the cgroup.subtree_control file in the " +
"cgroup hierarchy: " + yarnHierarchy.getAbsolutePath();
throw new ResourceHandlerException(getErrorWithDetails(
errorMsg, controller.getName(),
yarnHierarchy.getAbsolutePath()));
}
Writer w = new OutputStreamWriter(Files.newOutputStream(subtreeControlFile.toPath(),
@ -194,16 +205,20 @@ protected void updateEnabledControllersInHierarchy(
yarnHierarchy.getAbsolutePath());
pw.write("+" + controller.getName());
if (pw.checkError()) {
throw new ResourceHandlerException("Failed to add the controller to the " +
String errorMsg = "Failed to add the controller to the " +
"cgroup.subtree_control file in the cgroup hierarchy: " +
yarnHierarchy.getAbsolutePath());
yarnHierarchy.getAbsolutePath();
throw new ResourceHandlerException(getErrorWithDetails(
errorMsg, controller.getName(),
yarnHierarchy.getAbsolutePath()));
}
}
} catch (IOException e) {
throw new ResourceHandlerException(
"Failed to update the cgroup.subtree_control file in the cgroup hierarchy: " +
yarnHierarchy.getAbsolutePath(), e);
String errorMsg = "Failed to update the cgroup.subtree_control file in the " +
"cgroup hierarchy: " + yarnHierarchy.getAbsolutePath();
throw new ResourceHandlerException(getErrorWithDetails(
errorMsg, controller.getName(),
yarnHierarchy.getAbsolutePath()));
}
}
}

View File

@ -217,11 +217,13 @@ public void testManualCgroupSetting() throws Exception {
conf.set(YarnConfiguration.NM_LINUX_CONTAINER_CGROUPS_HIERARCHY,
"/hadoop-yarn");
File baseCgroup = new File(tmpPath);
File subCgroup = new File(tmpPath, "/hadoop-yarn");
Assert.assertTrue("temp dir should be created", subCgroup.mkdirs());
subCgroup.deleteOnExit();
String enabledControllers = "cpuset cpu io memory hugetlb pids rdma misc\n";
createFileWithContent(baseCgroup, CGroupsHandler.CGROUP_CONTROLLERS_FILE, enabledControllers);
createFileWithContent(subCgroup, CGroupsHandler.CGROUP_CONTROLLERS_FILE, enabledControllers);
File subtreeControlFile = new File(subCgroup.getAbsolutePath(),