Skip to content

Commit

Permalink
8322420: [Linux] cgroup v2: Limits in parent nested control groups ar…
Browse files Browse the repository at this point in the history
…e not detected

Reviewed-by: stuefe, asmehra
  • Loading branch information
jerboaa authored and pull[bot] committed Oct 2, 2024
1 parent 403be0a commit 8318454
Show file tree
Hide file tree
Showing 9 changed files with 362 additions and 111 deletions.
4 changes: 2 additions & 2 deletions src/hotspot/os/linux/cgroupSubsystem_linux.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -609,7 +609,7 @@ jlong CgroupSubsystem::memory_limit_in_bytes() {
bool CgroupController::read_string(const char* filename, char* buf, size_t buf_size) {
assert(buf != nullptr, "buffer must not be null");
assert(filename != nullptr, "filename must be given");
char* s_path = subsystem_path();
const char* s_path = subsystem_path();
if (s_path == nullptr) {
log_debug(os, container)("read_string: subsystem path is null");
return false;
Expand Down Expand Up @@ -679,7 +679,7 @@ bool CgroupController::read_numerical_key_value(const char* filename, const char
assert(key != nullptr, "key must be given");
assert(result != nullptr, "result pointer must not be null");
assert(filename != nullptr, "file to search in must be given");
char* s_path = subsystem_path();
const char* s_path = subsystem_path();
if (s_path == nullptr) {
log_debug(os, container)("read_numerical_key_value: subsystem path is null");
return false;
Expand Down
18 changes: 17 additions & 1 deletion src/hotspot/os/linux/cgroupSubsystem_linux.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,15 @@
}

class CgroupController: public CHeapObj<mtInternal> {
protected:
char* _cgroup_path;
char* _mount_point;
public:
virtual char* subsystem_path() = 0;
virtual const char* subsystem_path() = 0;
virtual bool is_read_only() = 0;
const char* cgroup_path() { return _cgroup_path; }
const char* mount_point() { return _mount_point; }
virtual bool needs_hierarchy_adjustment() { return false; }

/* Read a numerical value as unsigned long
*
Expand Down Expand Up @@ -202,7 +208,12 @@ class CgroupCpuController: public CHeapObj<mtInternal> {
virtual int cpu_quota() = 0;
virtual int cpu_period() = 0;
virtual int cpu_shares() = 0;
virtual bool needs_hierarchy_adjustment() = 0;
virtual bool is_read_only() = 0;
virtual const char* subsystem_path() = 0;
virtual void set_subsystem_path(const char* cgroup_path) = 0;
virtual const char* mount_point() = 0;
virtual const char* cgroup_path() = 0;
};

// Pure virtual class representing version agnostic memory controllers
Expand All @@ -217,7 +228,12 @@ class CgroupMemoryController: public CHeapObj<mtInternal> {
virtual jlong rss_usage_in_bytes() = 0;
virtual jlong cache_usage_in_bytes() = 0;
virtual void print_version_specific_info(outputStream* st, julong host_mem) = 0;
virtual bool needs_hierarchy_adjustment() = 0;
virtual bool is_read_only() = 0;
virtual const char* subsystem_path() = 0;
virtual void set_subsystem_path(const char* cgroup_path) = 0;
virtual const char* mount_point() = 0;
virtual const char* cgroup_path() = 0;
};

class CgroupSubsystem: public CHeapObj<mtInternal> {
Expand Down
111 changes: 111 additions & 0 deletions src/hotspot/os/linux/cgroupUtil_linux.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
*
*/

#include "os_linux.hpp"
#include "cgroupUtil_linux.hpp"

int CgroupUtil::processor_count(CgroupCpuController* cpu_ctrl, int host_cpus) {
Expand All @@ -46,3 +47,113 @@ int CgroupUtil::processor_count(CgroupCpuController* cpu_ctrl, int host_cpus) {
log_trace(os, container)("OSContainer::active_processor_count: %d", result);
return result;
}

void CgroupUtil::adjust_controller(CgroupMemoryController* mem) {
if (!mem->needs_hierarchy_adjustment()) {
// nothing to do
return;
}
log_trace(os, container)("Adjusting controller path for memory: %s", mem->subsystem_path());
assert(mem->cgroup_path() != nullptr, "invariant");
char* orig = os::strdup(mem->cgroup_path());
char* cg_path = os::strdup(orig);
char* last_slash;
assert(cg_path[0] == '/', "cgroup path must start with '/'");
julong phys_mem = os::Linux::physical_memory();
char* limit_cg_path = nullptr;
jlong limit = mem->read_memory_limit_in_bytes(phys_mem);
jlong lowest_limit = phys_mem;
while ((last_slash = strrchr(cg_path, '/')) != cg_path) {
*last_slash = '\0'; // strip path
// update to shortened path and try again
mem->set_subsystem_path(cg_path);
limit = mem->read_memory_limit_in_bytes(phys_mem);
if (limit >= 0 && limit < lowest_limit) {
lowest_limit = limit;
os::free(limit_cg_path); // handles nullptr
limit_cg_path = os::strdup(cg_path);
}
}
// need to check limit at mount point
mem->set_subsystem_path("/");
limit = mem->read_memory_limit_in_bytes(phys_mem);
if (limit >= 0 && limit < lowest_limit) {
lowest_limit = limit;
os::free(limit_cg_path); // handles nullptr
limit_cg_path = os::strdup("/");
}
assert(lowest_limit >= 0, "limit must be positive");
if ((julong)lowest_limit != phys_mem) {
// we've found a lower limit anywhere in the hierarchy,
// set the path to the limit path
assert(limit_cg_path != nullptr, "limit path must be set");
mem->set_subsystem_path(limit_cg_path);
log_trace(os, container)("Adjusted controller path for memory to: %s. "
"Lowest limit was: " JLONG_FORMAT,
mem->subsystem_path(),
lowest_limit);
} else {
log_trace(os, container)("No lower limit found for memory in hierarchy %s, "
"adjusting to original path %s",
mem->mount_point(), orig);
mem->set_subsystem_path(orig);
}
os::free(cg_path);
os::free(orig);
os::free(limit_cg_path);
}

void CgroupUtil::adjust_controller(CgroupCpuController* cpu) {
if (!cpu->needs_hierarchy_adjustment()) {
// nothing to do
return;
}
log_trace(os, container)("Adjusting controller path for cpu: %s", cpu->subsystem_path());
assert(cpu->cgroup_path() != nullptr, "invariant");
char* orig = os::strdup(cpu->cgroup_path());
char* cg_path = os::strdup(orig);
char* last_slash;
assert(cg_path[0] == '/', "cgroup path must start with '/'");
int host_cpus = os::Linux::active_processor_count();
int cpus = CgroupUtil::processor_count(cpu, host_cpus);
int lowest_limit = host_cpus;
char* limit_cg_path = nullptr;
while ((last_slash = strrchr(cg_path, '/')) != cg_path) {
*last_slash = '\0'; // strip path
// update to shortened path and try again
cpu->set_subsystem_path(cg_path);
cpus = CgroupUtil::processor_count(cpu, host_cpus);
if (cpus != host_cpus && cpus < lowest_limit) {
lowest_limit = cpus;
os::free(limit_cg_path); // handles nullptr
limit_cg_path = os::strdup(cg_path);
}
}
// need to check limit at mount point
cpu->set_subsystem_path("/");
cpus = CgroupUtil::processor_count(cpu, host_cpus);
if (cpus != host_cpus && cpus < lowest_limit) {
lowest_limit = cpus;
os::free(limit_cg_path); // handles nullptr
limit_cg_path = os::strdup(cg_path);
}
assert(lowest_limit >= 0, "limit must be positive");
if (lowest_limit != host_cpus) {
// we've found a lower limit anywhere in the hierarchy,
// set the path to the limit path
assert(limit_cg_path != nullptr, "limit path must be set");
cpu->set_subsystem_path(limit_cg_path);
log_trace(os, container)("Adjusted controller path for cpu to: %s. "
"Lowest limit was: %d",
cpu->subsystem_path(),
lowest_limit);
} else {
log_trace(os, container)("No lower limit found for cpu in hierarchy %s, "
"adjusting to original path %s",
cpu->mount_point(), orig);
cpu->set_subsystem_path(orig);
}
os::free(cg_path);
os::free(orig);
os::free(limit_cg_path);
}
6 changes: 6 additions & 0 deletions src/hotspot/os/linux/cgroupUtil_linux.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ class CgroupUtil: AllStatic {

public:
static int processor_count(CgroupCpuController* cpu, int host_cpus);
// Given a memory controller, adjust its path to a point in the hierarchy
// that represents the closest memory limit.
static void adjust_controller(CgroupMemoryController* m);
// Given a cpu controller, adjust its path to a point in the hierarchy
// that represents the closest cpu limit.
static void adjust_controller(CgroupCpuController* c);
};

#endif // CGROUP_UTIL_LINUX_HPP
87 changes: 34 additions & 53 deletions src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,15 @@
* Set directory to subsystem specific files based
* on the contents of the mountinfo and cgroup files.
*/
void CgroupV1Controller::set_subsystem_path(char *cgroup_path) {
void CgroupV1Controller::set_subsystem_path(const char* cgroup_path) {
if (_cgroup_path != nullptr) {
os::free(_cgroup_path);
}
if (_path != nullptr) {
os::free(_path);
_path = nullptr;
}
_cgroup_path = os::strdup(cgroup_path);
stringStream ss;
if (_root != nullptr && cgroup_path != nullptr) {
if (strcmp(_root, "/") == 0) {
Expand All @@ -52,7 +60,7 @@ void CgroupV1Controller::set_subsystem_path(char *cgroup_path) {
ss.print_raw(_mount_point);
_path = os::strdup(ss.base());
} else {
char *p = strstr(cgroup_path, _root);
char *p = strstr((char*)cgroup_path, _root);
if (p != nullptr && p == _root) {
if (strlen(cgroup_path) > strlen(_root)) {
ss.print_raw(_mount_point);
Expand All @@ -66,27 +74,15 @@ void CgroupV1Controller::set_subsystem_path(char *cgroup_path) {
}
}

/* uses_mem_hierarchy
*
* Return whether or not hierarchical cgroup accounting is being
* done.
*
* return:
* A number > 0 if true, or
* OSCONTAINER_ERROR for not supported
/*
* The common case, containers, we have _root == _cgroup_path, and thus set the
* controller path to the _mount_point. This is where the limits are exposed in
* the cgroup pseudo filesystem (at the leaf) and adjustment of the path won't
* be needed for that reason.
*/
jlong CgroupV1MemoryController::uses_mem_hierarchy() {
julong use_hierarchy;
CONTAINER_READ_NUMBER_CHECKED(reader(), "/memory.use_hierarchy", "Use Hierarchy", use_hierarchy);
return (jlong)use_hierarchy;
}

void CgroupV1MemoryController::set_subsystem_path(char *cgroup_path) {
reader()->set_subsystem_path(cgroup_path);
jlong hierarchy = uses_mem_hierarchy();
if (hierarchy > 0) {
set_hierarchical(true);
}
bool CgroupV1Controller::needs_hierarchy_adjustment() {
assert(_cgroup_path != nullptr, "sanity");
return strcmp(_root, _cgroup_path) != 0;
}

static inline
Expand Down Expand Up @@ -115,20 +111,6 @@ jlong CgroupV1MemoryController::read_memory_limit_in_bytes(julong phys_mem) {
julong memlimit;
CONTAINER_READ_NUMBER_CHECKED(reader(), "/memory.limit_in_bytes", "Memory Limit", memlimit);
if (memlimit >= phys_mem) {
log_trace(os, container)("Non-Hierarchical Memory Limit is: Unlimited");
if (is_hierarchical()) {
julong hier_memlimit;
bool is_ok = reader()->read_numerical_key_value("/memory.stat", "hierarchical_memory_limit", &hier_memlimit);
if (!is_ok) {
return OSCONTAINER_ERROR;
}
log_trace(os, container)("Hierarchical Memory Limit is: " JULONG_FORMAT, hier_memlimit);
if (hier_memlimit < phys_mem) {
verbose_log(hier_memlimit, phys_mem);
return (jlong)hier_memlimit;
}
log_trace(os, container)("Hierarchical Memory Limit is: Unlimited");
}
verbose_log(memlimit, phys_mem);
return (jlong)-1;
} else {
Expand All @@ -150,26 +132,10 @@ jlong CgroupV1MemoryController::read_memory_limit_in_bytes(julong phys_mem) {
* upper bound)
*/
jlong CgroupV1MemoryController::read_mem_swap(julong host_total_memsw) {
julong hier_memswlimit;
julong memswlimit;
CONTAINER_READ_NUMBER_CHECKED(reader(), "/memory.memsw.limit_in_bytes", "Memory and Swap Limit", memswlimit);
if (memswlimit >= host_total_memsw) {
log_trace(os, container)("Non-Hierarchical Memory and Swap Limit is: Unlimited");
if (is_hierarchical()) {
const char* matchline = "hierarchical_memsw_limit";
bool is_ok = reader()->read_numerical_key_value("/memory.stat",
matchline,
&hier_memswlimit);
if (!is_ok) {
return OSCONTAINER_ERROR;
}
log_trace(os, container)("Hierarchical Memory and Swap Limit is: " JULONG_FORMAT, hier_memswlimit);
if (hier_memswlimit >= host_total_memsw) {
log_trace(os, container)("Hierarchical Memory and Swap Limit is: Unlimited");
} else {
return (jlong)hier_memswlimit;
}
}
log_trace(os, container)("Memory and Swap Limit is: Unlimited");
return (jlong)-1;
} else {
return (jlong)memswlimit;
Expand Down Expand Up @@ -233,6 +199,21 @@ jlong CgroupV1MemoryController::memory_soft_limit_in_bytes(julong phys_mem) {
}
}

// Constructor
CgroupV1Subsystem::CgroupV1Subsystem(CgroupV1Controller* cpuset,
CgroupV1CpuController* cpu,
CgroupV1Controller* cpuacct,
CgroupV1Controller* pids,
CgroupV1MemoryController* memory) :
_cpuset(cpuset),
_cpuacct(cpuacct),
_pids(pids) {
CgroupUtil::adjust_controller(memory);
CgroupUtil::adjust_controller(cpu);
_memory = new CachingCgroupController<CgroupMemoryController>(memory);
_cpu = new CachingCgroupController<CgroupCpuController>(cpu);
}

bool CgroupV1Subsystem::is_containerized() {
// containerized iff all required controllers are mounted
// read-only. See OSContainer::is_containerized() for
Expand Down
Loading

0 comments on commit 8318454

Please sign in to comment.