From 166cd537d5a84041c3c1f3290b5f938ba150bce8 Mon Sep 17 00:00:00 2001 From: Clark Zinzow Date: Fri, 15 Apr 2022 15:38:01 -0700 Subject: [PATCH] [Core] Fix in-container memory limit fetching for cgroups v2 (#23922) --- python/ray/_private/utils.py | 18 ++++++---- python/ray/tests/test_advanced_8.py | 52 +++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 6 deletions(-) diff --git a/python/ray/_private/utils.py b/python/ray/_private/utils.py index d43470c30bc1..e4e8a95e9b25 100644 --- a/python/ray/_private/utils.py +++ b/python/ray/_private/utils.py @@ -370,7 +370,12 @@ def open_log(path, unbuffered=False, **kwargs): return stream -def get_system_memory(): +def get_system_memory( + # For cgroups v1: + memory_limit_filename="/sys/fs/cgroup/memory/memory.limit_in_bytes", + # For cgroups v2: + memory_limit_filename_v2="/sys/fs/cgroup/memory.max", +): """Return the total amount of system memory in bytes. Returns: @@ -380,16 +385,17 @@ def get_system_memory(): # container. Note that this file is not specific to Docker and its value is # often much larger than the actual amount of memory. docker_limit = None - # For cgroups v1: - memory_limit_filename = "/sys/fs/cgroup/memory/memory.limit_in_bytes" - # For cgroups v2: - memory_limit_filename_v2 = "/sys/fs/cgroup/memory.max" if os.path.exists(memory_limit_filename): with open(memory_limit_filename, "r") as f: docker_limit = int(f.read()) elif os.path.exists(memory_limit_filename_v2): with open(memory_limit_filename_v2, "r") as f: - docker_limit = int(f.read()) + max_file = f.read() + if max_file.isnumeric(): + docker_limit = int(max_file) + else: + # max_file is "max", i.e. is unset. + docker_limit = None # Use psutil if it is available. psutil_memory_in_bytes = psutil.virtual_memory().total diff --git a/python/ray/tests/test_advanced_8.py b/python/ray/tests/test_advanced_8.py index 1995fd7cc089..cce9296a43dc 100644 --- a/python/ray/tests/test_advanced_8.py +++ b/python/ray/tests/test_advanced_8.py @@ -9,6 +9,7 @@ import numpy as np import pytest +import psutil import ray from ray.dashboard import k8s_utils @@ -273,6 +274,57 @@ def initialized(self): wait_for_condition(lambda: ray.available_resources()[resource_name] < quantity) +def test_get_system_memory(): + # cgroups v1, set + with tempfile.NamedTemporaryFile("w") as memory_limit_file: + memory_limit_file.write("100") + memory_limit_file.flush() + assert ( + ray._private.utils.get_system_memory( + memory_limit_filename=memory_limit_file.name, + memory_limit_filename_v2="__does_not_exist__", + ) + == 100 + ) + + # cgroups v1, high + with tempfile.NamedTemporaryFile("w") as memory_limit_file: + memory_limit_file.write(str(2 ** 64)) + memory_limit_file.flush() + psutil_memory_in_bytes = psutil.virtual_memory().total + assert ( + ray._private.utils.get_system_memory( + memory_limit_filename=memory_limit_file.name, + memory_limit_filename_v2="__does_not_exist__", + ) + == psutil_memory_in_bytes + ) + # cgroups v2, set + with tempfile.NamedTemporaryFile("w") as memory_max_file: + memory_max_file.write("100") + memory_max_file.flush() + assert ( + ray._private.utils.get_system_memory( + memory_limit_filename="__does_not_exist__", + memory_limit_filename_v2=memory_max_file.name, + ) + == 100 + ) + + # cgroups v2, not set + with tempfile.NamedTemporaryFile("w") as memory_max_file: + memory_max_file.write("max") + memory_max_file.flush() + psutil_memory_in_bytes = psutil.virtual_memory().total + assert ( + ray._private.utils.get_system_memory( + memory_limit_filename="__does_not_exist__", + memory_limit_filename_v2=memory_max_file.name, + ) + == psutil_memory_in_bytes + ) + + @pytest.mark.skipif(sys.platform == "win32", reason="not relevant for windows") def test_detect_docker_cpus(): # No limits set