From ce7d01fac84e736aa0bce6f775d63fbd36c9459e Mon Sep 17 00:00:00 2001 From: Benjamin Teke Date: Mon, 13 May 2024 12:56:26 +0200 Subject: [PATCH] YARN-11689. Update the cgroup v2 init error handling (#6810) --- .../resources/AbstractCGroupsHandler.java | 16 ++----- .../linux/resources/CGroupsV2HandlerImpl.java | 43 +++++++++++++------ .../resources/TestCGroupsV2HandlerImpl.java | 2 + 3 files changed, 35 insertions(+), 26 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/AbstractCGroupsHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/AbstractCGroupsHandler.java index a8f528a209113..becb68e22f0ff 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/AbstractCGroupsHandler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/AbstractCGroupsHandler.java @@ -358,14 +358,14 @@ private void initializePreMountedCGroupController(CGroupController controller) } else { // Unexpected: we just checked that it was missing throw new ResourceHandlerException(getErrorWithDetails( - "Unexpected: Cannot create yarn cgroup", + "Unexpected: Cannot create yarn cgroup hierarchy", subsystemName, yarnHierarchy.getAbsolutePath() )); } } catch (SecurityException e) { throw new ResourceHandlerException(getErrorWithDetails( - "No permissions to create yarn cgroup", + "No permissions to create yarn cgroup hierarchy", subsystemName, yarnHierarchy.getAbsolutePath() ), e); @@ -378,15 +378,7 @@ private void initializePreMountedCGroupController(CGroupController controller) )); } - try { - updateEnabledControllersInHierarchy(yarnHierarchy, controller); - } catch (ResourceHandlerException e) { - throw new ResourceHandlerException(getErrorWithDetails( - "Failed to update cgroup.subtree_control in yarn hierarchy", - subsystemName, - yarnHierarchy.getAbsolutePath() - )); - } + updateEnabledControllersInHierarchy(yarnHierarchy, controller); } protected abstract void updateEnabledControllersInHierarchy( @@ -401,7 +393,7 @@ protected abstract void updateEnabledControllersInHierarchy( * @param yarnCgroupPath cgroup path that failed * @return a string builder that can be appended by the caller */ - private String getErrorWithDetails( + protected String getErrorWithDetails( String errorMessage, String subsystemName, String yarnCgroupPath) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsV2HandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsV2HandlerImpl.java index 312627f89ba39..cd362ab9a548c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsV2HandlerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsV2HandlerImpl.java @@ -97,10 +97,8 @@ protected List getCGroupControllers() { @Override protected Map> parsePreConfiguredMountPath() throws IOException { Map> controllerMappings = new HashMap<>(); - String controllerPath = this.cGroupsMountConfig.getMountPath() + - Path.SEPARATOR + this.cGroupPrefix; controllerMappings.put(this.cGroupsMountConfig.getMountPath(), - readControllersFile(controllerPath)); + readControllersFile(this.cGroupsMountConfig.getMountPath())); return controllerMappings; } @@ -171,19 +169,32 @@ protected void updateEnabledControllersInHierarchy( try { Set enabledControllers = readControllersFile(yarnHierarchy.getAbsolutePath()); if (!enabledControllers.contains(controller.getName())) { - throw new ResourceHandlerException(String.format( + String errorMsg = String.format( "The controller %s is not enabled in the cgroup hierarchy: %s. Please enable it in " + "in the %s/cgroup.subtree_control file.", controller.getName(), yarnHierarchy.getAbsolutePath(), - yarnHierarchy.getParentFile().getAbsolutePath())); + yarnHierarchy.getParentFile().getAbsolutePath()); + + throw new ResourceHandlerException(getErrorWithDetails( + errorMsg, controller.getName(), + yarnHierarchy.getAbsolutePath())); } File subtreeControlFile = new File(yarnHierarchy.getAbsolutePath() + Path.SEPARATOR + CGROUP_SUBTREE_CONTROL_FILE); if (!subtreeControlFile.exists()) { - throw new ResourceHandlerException( - "No subtree control file found in the cgroup hierarchy: " + - yarnHierarchy.getAbsolutePath()); + String errorMsg = "No subtree control file found in the cgroup hierarchy: " + + yarnHierarchy.getAbsolutePath(); + throw new ResourceHandlerException(getErrorWithDetails( + errorMsg, controller.getName(), + yarnHierarchy.getAbsolutePath())); + } + if (!subtreeControlFile.canWrite()) { + String errorMsg = "Cannot write the cgroup.subtree_control file in the " + + "cgroup hierarchy: " + yarnHierarchy.getAbsolutePath(); + throw new ResourceHandlerException(getErrorWithDetails( + errorMsg, controller.getName(), + yarnHierarchy.getAbsolutePath())); } Writer w = new OutputStreamWriter(Files.newOutputStream(subtreeControlFile.toPath(), @@ -194,16 +205,20 @@ protected void updateEnabledControllersInHierarchy( yarnHierarchy.getAbsolutePath()); pw.write("+" + controller.getName()); if (pw.checkError()) { - throw new ResourceHandlerException("Failed to add the controller to the " + + String errorMsg = "Failed to add the controller to the " + "cgroup.subtree_control file in the cgroup hierarchy: " + - yarnHierarchy.getAbsolutePath()); + yarnHierarchy.getAbsolutePath(); + throw new ResourceHandlerException(getErrorWithDetails( + errorMsg, controller.getName(), + yarnHierarchy.getAbsolutePath())); } } } catch (IOException e) { - throw new ResourceHandlerException( - "Failed to update the cgroup.subtree_control file in the cgroup hierarchy: " + - yarnHierarchy.getAbsolutePath(), e); + String errorMsg = "Failed to update the cgroup.subtree_control file in the " + + "cgroup hierarchy: " + yarnHierarchy.getAbsolutePath(); + throw new ResourceHandlerException(getErrorWithDetails( + errorMsg, controller.getName(), + yarnHierarchy.getAbsolutePath())); } } - } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsV2HandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsV2HandlerImpl.java index b8d1fb238d1f7..1198cda7ab050 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsV2HandlerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsV2HandlerImpl.java @@ -217,11 +217,13 @@ public void testManualCgroupSetting() throws Exception { conf.set(YarnConfiguration.NM_LINUX_CONTAINER_CGROUPS_HIERARCHY, "/hadoop-yarn"); + File baseCgroup = new File(tmpPath); File subCgroup = new File(tmpPath, "/hadoop-yarn"); Assert.assertTrue("temp dir should be created", subCgroup.mkdirs()); subCgroup.deleteOnExit(); String enabledControllers = "cpuset cpu io memory hugetlb pids rdma misc\n"; + createFileWithContent(baseCgroup, CGroupsHandler.CGROUP_CONTROLLERS_FILE, enabledControllers); createFileWithContent(subCgroup, CGroupsHandler.CGROUP_CONTROLLERS_FILE, enabledControllers); File subtreeControlFile = new File(subCgroup.getAbsolutePath(),