YARN-11689. Update the cgroup v2 init error handling (#6810)

This commit is contained in:
Benjamin Teke 2024-05-13 12:56:26 +02:00 committed by GitHub
parent b5a90d9500
commit ce7d01fac8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 35 additions and 26 deletions

View File

@ -358,14 +358,14 @@ private void initializePreMountedCGroupController(CGroupController controller)
} else { } else {
// Unexpected: we just checked that it was missing // Unexpected: we just checked that it was missing
throw new ResourceHandlerException(getErrorWithDetails( throw new ResourceHandlerException(getErrorWithDetails(
"Unexpected: Cannot create yarn cgroup", "Unexpected: Cannot create yarn cgroup hierarchy",
subsystemName, subsystemName,
yarnHierarchy.getAbsolutePath() yarnHierarchy.getAbsolutePath()
)); ));
} }
} catch (SecurityException e) { } catch (SecurityException e) {
throw new ResourceHandlerException(getErrorWithDetails( throw new ResourceHandlerException(getErrorWithDetails(
"No permissions to create yarn cgroup", "No permissions to create yarn cgroup hierarchy",
subsystemName, subsystemName,
yarnHierarchy.getAbsolutePath() yarnHierarchy.getAbsolutePath()
), e); ), e);
@ -378,15 +378,7 @@ private void initializePreMountedCGroupController(CGroupController controller)
)); ));
} }
try { updateEnabledControllersInHierarchy(yarnHierarchy, controller);
updateEnabledControllersInHierarchy(yarnHierarchy, controller);
} catch (ResourceHandlerException e) {
throw new ResourceHandlerException(getErrorWithDetails(
"Failed to update cgroup.subtree_control in yarn hierarchy",
subsystemName,
yarnHierarchy.getAbsolutePath()
));
}
} }
protected abstract void updateEnabledControllersInHierarchy( protected abstract void updateEnabledControllersInHierarchy(
@ -401,7 +393,7 @@ protected abstract void updateEnabledControllersInHierarchy(
* @param yarnCgroupPath cgroup path that failed * @param yarnCgroupPath cgroup path that failed
* @return a string builder that can be appended by the caller * @return a string builder that can be appended by the caller
*/ */
private String getErrorWithDetails( protected String getErrorWithDetails(
String errorMessage, String errorMessage,
String subsystemName, String subsystemName,
String yarnCgroupPath) { String yarnCgroupPath) {

View File

@ -97,10 +97,8 @@ protected List<CGroupController> getCGroupControllers() {
@Override @Override
protected Map<String, Set<String>> parsePreConfiguredMountPath() throws IOException { protected Map<String, Set<String>> parsePreConfiguredMountPath() throws IOException {
Map<String, Set<String>> controllerMappings = new HashMap<>(); Map<String, Set<String>> controllerMappings = new HashMap<>();
String controllerPath = this.cGroupsMountConfig.getMountPath() +
Path.SEPARATOR + this.cGroupPrefix;
controllerMappings.put(this.cGroupsMountConfig.getMountPath(), controllerMappings.put(this.cGroupsMountConfig.getMountPath(),
readControllersFile(controllerPath)); readControllersFile(this.cGroupsMountConfig.getMountPath()));
return controllerMappings; return controllerMappings;
} }
@ -171,19 +169,32 @@ protected void updateEnabledControllersInHierarchy(
try { try {
Set<String> enabledControllers = readControllersFile(yarnHierarchy.getAbsolutePath()); Set<String> enabledControllers = readControllersFile(yarnHierarchy.getAbsolutePath());
if (!enabledControllers.contains(controller.getName())) { if (!enabledControllers.contains(controller.getName())) {
throw new ResourceHandlerException(String.format( String errorMsg = String.format(
"The controller %s is not enabled in the cgroup hierarchy: %s. Please enable it in " + "The controller %s is not enabled in the cgroup hierarchy: %s. Please enable it in " +
"in the %s/cgroup.subtree_control file.", "in the %s/cgroup.subtree_control file.",
controller.getName(), yarnHierarchy.getAbsolutePath(), controller.getName(), yarnHierarchy.getAbsolutePath(),
yarnHierarchy.getParentFile().getAbsolutePath())); yarnHierarchy.getParentFile().getAbsolutePath());
throw new ResourceHandlerException(getErrorWithDetails(
errorMsg, controller.getName(),
yarnHierarchy.getAbsolutePath()));
} }
File subtreeControlFile = new File(yarnHierarchy.getAbsolutePath() File subtreeControlFile = new File(yarnHierarchy.getAbsolutePath()
+ Path.SEPARATOR + CGROUP_SUBTREE_CONTROL_FILE); + Path.SEPARATOR + CGROUP_SUBTREE_CONTROL_FILE);
if (!subtreeControlFile.exists()) { if (!subtreeControlFile.exists()) {
throw new ResourceHandlerException( String errorMsg = "No subtree control file found in the cgroup hierarchy: " +
"No subtree control file found in the cgroup hierarchy: " + yarnHierarchy.getAbsolutePath();
yarnHierarchy.getAbsolutePath()); throw new ResourceHandlerException(getErrorWithDetails(
errorMsg, controller.getName(),
yarnHierarchy.getAbsolutePath()));
}
if (!subtreeControlFile.canWrite()) {
String errorMsg = "Cannot write the cgroup.subtree_control file in the " +
"cgroup hierarchy: " + yarnHierarchy.getAbsolutePath();
throw new ResourceHandlerException(getErrorWithDetails(
errorMsg, controller.getName(),
yarnHierarchy.getAbsolutePath()));
} }
Writer w = new OutputStreamWriter(Files.newOutputStream(subtreeControlFile.toPath(), Writer w = new OutputStreamWriter(Files.newOutputStream(subtreeControlFile.toPath(),
@ -194,16 +205,20 @@ protected void updateEnabledControllersInHierarchy(
yarnHierarchy.getAbsolutePath()); yarnHierarchy.getAbsolutePath());
pw.write("+" + controller.getName()); pw.write("+" + controller.getName());
if (pw.checkError()) { if (pw.checkError()) {
throw new ResourceHandlerException("Failed to add the controller to the " + String errorMsg = "Failed to add the controller to the " +
"cgroup.subtree_control file in the cgroup hierarchy: " + "cgroup.subtree_control file in the cgroup hierarchy: " +
yarnHierarchy.getAbsolutePath()); yarnHierarchy.getAbsolutePath();
throw new ResourceHandlerException(getErrorWithDetails(
errorMsg, controller.getName(),
yarnHierarchy.getAbsolutePath()));
} }
} }
} catch (IOException e) { } catch (IOException e) {
throw new ResourceHandlerException( String errorMsg = "Failed to update the cgroup.subtree_control file in the " +
"Failed to update the cgroup.subtree_control file in the cgroup hierarchy: " + "cgroup hierarchy: " + yarnHierarchy.getAbsolutePath();
yarnHierarchy.getAbsolutePath(), e); throw new ResourceHandlerException(getErrorWithDetails(
errorMsg, controller.getName(),
yarnHierarchy.getAbsolutePath()));
} }
} }
} }

View File

@ -217,11 +217,13 @@ public void testManualCgroupSetting() throws Exception {
conf.set(YarnConfiguration.NM_LINUX_CONTAINER_CGROUPS_HIERARCHY, conf.set(YarnConfiguration.NM_LINUX_CONTAINER_CGROUPS_HIERARCHY,
"/hadoop-yarn"); "/hadoop-yarn");
File baseCgroup = new File(tmpPath);
File subCgroup = new File(tmpPath, "/hadoop-yarn"); File subCgroup = new File(tmpPath, "/hadoop-yarn");
Assert.assertTrue("temp dir should be created", subCgroup.mkdirs()); Assert.assertTrue("temp dir should be created", subCgroup.mkdirs());
subCgroup.deleteOnExit(); subCgroup.deleteOnExit();
String enabledControllers = "cpuset cpu io memory hugetlb pids rdma misc\n"; String enabledControllers = "cpuset cpu io memory hugetlb pids rdma misc\n";
createFileWithContent(baseCgroup, CGroupsHandler.CGROUP_CONTROLLERS_FILE, enabledControllers);
createFileWithContent(subCgroup, CGroupsHandler.CGROUP_CONTROLLERS_FILE, enabledControllers); createFileWithContent(subCgroup, CGroupsHandler.CGROUP_CONTROLLERS_FILE, enabledControllers);
File subtreeControlFile = new File(subCgroup.getAbsolutePath(), File subtreeControlFile = new File(subCgroup.getAbsolutePath(),