Skip to content

Commit

Permalink
YARN-11689. Update the cgroup v2 init error handling (apache#6810)
Browse files Browse the repository at this point in the history
  • Loading branch information
brumi1024 authored May 13, 2024
1 parent b5a90d9 commit ce7d01f
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -358,14 +358,14 @@ private void initializePreMountedCGroupController(CGroupController controller)
} else {
// Unexpected: we just checked that it was missing
throw new ResourceHandlerException(getErrorWithDetails(
"Unexpected: Cannot create yarn cgroup",
"Unexpected: Cannot create yarn cgroup hierarchy",
subsystemName,
yarnHierarchy.getAbsolutePath()
));
}
} catch (SecurityException e) {
throw new ResourceHandlerException(getErrorWithDetails(
"No permissions to create yarn cgroup",
"No permissions to create yarn cgroup hierarchy",
subsystemName,
yarnHierarchy.getAbsolutePath()
), e);
Expand All @@ -378,15 +378,7 @@ private void initializePreMountedCGroupController(CGroupController controller)
));
}

try {
updateEnabledControllersInHierarchy(yarnHierarchy, controller);
} catch (ResourceHandlerException e) {
throw new ResourceHandlerException(getErrorWithDetails(
"Failed to update cgroup.subtree_control in yarn hierarchy",
subsystemName,
yarnHierarchy.getAbsolutePath()
));
}
updateEnabledControllersInHierarchy(yarnHierarchy, controller);
}

protected abstract void updateEnabledControllersInHierarchy(
Expand All @@ -401,7 +393,7 @@ protected abstract void updateEnabledControllersInHierarchy(
* @param yarnCgroupPath cgroup path that failed
* @return a string builder that can be appended by the caller
*/
private String getErrorWithDetails(
protected String getErrorWithDetails(
String errorMessage,
String subsystemName,
String yarnCgroupPath) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,8 @@ protected List<CGroupController> getCGroupControllers() {
@Override
protected Map<String, Set<String>> parsePreConfiguredMountPath() throws IOException {
Map<String, Set<String>> controllerMappings = new HashMap<>();
String controllerPath = this.cGroupsMountConfig.getMountPath() +
Path.SEPARATOR + this.cGroupPrefix;
controllerMappings.put(this.cGroupsMountConfig.getMountPath(),
readControllersFile(controllerPath));
readControllersFile(this.cGroupsMountConfig.getMountPath()));
return controllerMappings;
}

Expand Down Expand Up @@ -171,19 +169,32 @@ protected void updateEnabledControllersInHierarchy(
try {
Set<String> enabledControllers = readControllersFile(yarnHierarchy.getAbsolutePath());
if (!enabledControllers.contains(controller.getName())) {
throw new ResourceHandlerException(String.format(
String errorMsg = String.format(
"The controller %s is not enabled in the cgroup hierarchy: %s. Please enable it in " +
"in the %s/cgroup.subtree_control file.",
controller.getName(), yarnHierarchy.getAbsolutePath(),
yarnHierarchy.getParentFile().getAbsolutePath()));
yarnHierarchy.getParentFile().getAbsolutePath());

throw new ResourceHandlerException(getErrorWithDetails(
errorMsg, controller.getName(),
yarnHierarchy.getAbsolutePath()));
}

File subtreeControlFile = new File(yarnHierarchy.getAbsolutePath()
+ Path.SEPARATOR + CGROUP_SUBTREE_CONTROL_FILE);
if (!subtreeControlFile.exists()) {
throw new ResourceHandlerException(
"No subtree control file found in the cgroup hierarchy: " +
yarnHierarchy.getAbsolutePath());
String errorMsg = "No subtree control file found in the cgroup hierarchy: " +
yarnHierarchy.getAbsolutePath();
throw new ResourceHandlerException(getErrorWithDetails(
errorMsg, controller.getName(),
yarnHierarchy.getAbsolutePath()));
}
if (!subtreeControlFile.canWrite()) {
String errorMsg = "Cannot write the cgroup.subtree_control file in the " +
"cgroup hierarchy: " + yarnHierarchy.getAbsolutePath();
throw new ResourceHandlerException(getErrorWithDetails(
errorMsg, controller.getName(),
yarnHierarchy.getAbsolutePath()));
}

Writer w = new OutputStreamWriter(Files.newOutputStream(subtreeControlFile.toPath(),
Expand All @@ -194,16 +205,20 @@ protected void updateEnabledControllersInHierarchy(
yarnHierarchy.getAbsolutePath());
pw.write("+" + controller.getName());
if (pw.checkError()) {
throw new ResourceHandlerException("Failed to add the controller to the " +
String errorMsg = "Failed to add the controller to the " +
"cgroup.subtree_control file in the cgroup hierarchy: " +
yarnHierarchy.getAbsolutePath());
yarnHierarchy.getAbsolutePath();
throw new ResourceHandlerException(getErrorWithDetails(
errorMsg, controller.getName(),
yarnHierarchy.getAbsolutePath()));
}
}
} catch (IOException e) {
throw new ResourceHandlerException(
"Failed to update the cgroup.subtree_control file in the cgroup hierarchy: " +
yarnHierarchy.getAbsolutePath(), e);
String errorMsg = "Failed to update the cgroup.subtree_control file in the " +
"cgroup hierarchy: " + yarnHierarchy.getAbsolutePath();
throw new ResourceHandlerException(getErrorWithDetails(
errorMsg, controller.getName(),
yarnHierarchy.getAbsolutePath()));
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -217,11 +217,13 @@ public void testManualCgroupSetting() throws Exception {
conf.set(YarnConfiguration.NM_LINUX_CONTAINER_CGROUPS_HIERARCHY,
"/hadoop-yarn");

File baseCgroup = new File(tmpPath);
File subCgroup = new File(tmpPath, "/hadoop-yarn");
Assert.assertTrue("temp dir should be created", subCgroup.mkdirs());
subCgroup.deleteOnExit();

String enabledControllers = "cpuset cpu io memory hugetlb pids rdma misc\n";
createFileWithContent(baseCgroup, CGroupsHandler.CGROUP_CONTROLLERS_FILE, enabledControllers);
createFileWithContent(subCgroup, CGroupsHandler.CGROUP_CONTROLLERS_FILE, enabledControllers);

File subtreeControlFile = new File(subCgroup.getAbsolutePath(),
Expand Down

0 comments on commit ce7d01f

Please sign in to comment.