Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ci/raydepsets/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ py_test(
srcs = ["tests/test_workspace.py"],
data = [
"tests/test_data/test.depsets.yaml",
"tests/test_data/test2.depsets.yaml",
],
tags = [
"ci_unit",
Expand Down
5 changes: 5 additions & 0 deletions ci/raydepsets/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ def test_check_if_subset_exists(self):
output="requirements_compiled_general.txt",
append_flags=[],
override_flags=[],
config_name="test.depsets.yaml",
)
with self.assertRaises(RuntimeError):
manager.check_subset_exists(
Expand Down Expand Up @@ -437,6 +438,7 @@ def test_build_graph_bad_operation(self):
operation="invalid_op",
requirements=["requirements_test.txt"],
output="requirements_compiled_invalid_op.txt",
config_name="test.depsets.yaml",
)
_overwrite_config_file(tmpdir, depset)
with self.assertRaises(ValueError):
Expand Down Expand Up @@ -608,6 +610,7 @@ def test_copy_lock_files_to_temp_dir(self):
constraints=["requirement_constraints_test.txt"],
requirements=["requirements_test.txt"],
output="requirements_compiled_test.txt",
config_name="test.depsets.yaml",
)
_overwrite_config_file(tmpdir, depset)
manager = _create_test_manager(tmpdir, check=True)
Expand All @@ -632,6 +635,7 @@ def test_diff_lock_files_out_of_date(self):
constraints=["requirement_constraints_test.txt"],
requirements=["requirements_test.txt"],
output="requirements_compiled_test.txt",
config_name="test.depsets.yaml",
)
_overwrite_config_file(tmpdir, depset)
manager = _create_test_manager(tmpdir, check=True)
Expand Down Expand Up @@ -666,6 +670,7 @@ def test_diff_lock_files_up_to_date(self):
constraints=["requirement_constraints_test.txt"],
requirements=["requirements_test.txt"],
output="requirements_compiled_test.txt",
config_name="test.depsets.yaml",
)
_overwrite_config_file(tmpdir, depset)
manager = _create_test_manager(tmpdir, check=True)
Expand Down
28 changes: 28 additions & 0 deletions ci/raydepsets/tests/test_data/test2.depsets.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
build_arg_sets:
py311_cpu:
CUDA_VERSION: cpu
PYTHON_VERSION: py311

depsets:
- name: other_config_depset
operation: expand
depsets:
- expand_general_depset__${PYTHON_VERSION}_${CUDA_VERSION}
- expanded_depset__${PYTHON_VERSION}_${CUDA_VERSION}
output: requirements_compiled_other_config.txt
build_arg_sets:
- py311_cpu
- name: pre_hook_test_depset
operation: compile
requirements:
- requirements_test.txt
output: requirements_compiled_pre_hook.txt
pre_hooks:
- pre-hook-test.sh
- name: pre_hook_invalid_test_depset
operation: compile
requirements:
- requirements_test.txt
output: requirements_compiled_pre_hook_invalid.txt
pre_hooks:
- pre-hook-error-test.sh
82 changes: 71 additions & 11 deletions ci/raydepsets/tests/test_workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,14 @@ def test_parse_build_arg_sets():
with tempfile.TemporaryDirectory() as tmpdir:
copy_data_to_tmpdir(tmpdir)
workspace = Workspace(dir=tmpdir)
config = workspace.load_config(path=Path(tmpdir) / "test.depsets.yaml")
assert config.build_arg_sets["py311_cpu"].build_args == {
"CUDA_VERSION": "cpu",
"PYTHON_VERSION": "py311",
}
assert config.build_arg_sets["py311_cuda128"].build_args == {
"CUDA_VERSION": 128,
"PYTHON_VERSION": "py311",
}
config = workspace.load_config(config_path=Path(tmpdir) / "test.depsets.yaml")
assert "general_depset__py311_cpu" in [depset.name for depset in config.depsets]
assert "build_args_test_depset__py311_cpu" in [
depset.name for depset in config.depsets
]
assert "expanded_depset__py311_cpu" in [
depset.name for depset in config.depsets
]
Comment on lines +22 to +28
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This test implementation can be made more efficient and readable.

  1. The list comprehension [depset.name for depset in config.depsets] is created three times. It's better to compute it once.
  2. Using a set for membership testing (in) is more efficient than a list.

Also, the test name test_parse_build_arg_sets is now misleading since the test's focus has changed from testing build_arg_sets parsing to verifying the outcome of load_config. Consider renaming it to something like test_load_config_with_build_args for clarity.

Suggested change
assert "general_depset__py311_cpu" in [depset.name for depset in config.depsets]
assert "build_args_test_depset__py311_cpu" in [
depset.name for depset in config.depsets
]
assert "expanded_depset__py311_cpu" in [
depset.name for depset in config.depsets
]
depset_names = {depset.name for depset in config.depsets}
assert "general_depset__py311_cpu" in depset_names
assert "build_args_test_depset__py311_cpu" in depset_names
assert "expanded_depset__py311_cpu" in depset_names



def test_substitute_build_args():
Expand Down Expand Up @@ -65,17 +64,78 @@ def test_invalid_build_arg_set():
)
with pytest.raises(KeyError):
workspace = Workspace(dir=tmpdir)
workspace.load_config(path=Path(tmpdir) / "test.depsets.yaml")
workspace.load_config(config_path=Path(tmpdir) / "test.depsets.yaml")


def test_parse_pre_hooks():
with tempfile.TemporaryDirectory() as tmpdir:
copy_data_to_tmpdir(tmpdir)
workspace = Workspace(dir=tmpdir)
config = workspace.load_config(path=Path(tmpdir) / "test.depsets.yaml")
config = workspace.load_config(config_path=Path(tmpdir) / "test2.depsets.yaml")
pre_hook_depset = get_depset_by_name(config.depsets, "pre_hook_test_depset")
assert pre_hook_depset.pre_hooks == ["pre-hook-test.sh"]


def test_load_first_config():
with tempfile.TemporaryDirectory() as tmpdir:
copy_data_to_tmpdir(tmpdir)
workspace = Workspace(dir=tmpdir)
config = workspace.load_config(config_path=Path(tmpdir) / "test.depsets.yaml")
assert config.depsets is not None
assert len(config.depsets) == 8


def test_load_second_config():
with tempfile.TemporaryDirectory() as tmpdir:
copy_data_to_tmpdir(tmpdir)
workspace = Workspace(dir=tmpdir)
config = workspace.load_config(config_path=Path(tmpdir) / "test2.depsets.yaml")
assert config.depsets is not None
assert len(config.depsets) == 3


# load all configs should always load all depsets
def test_load_all_configs_first_config():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The tests test_load_all_configs_first_config and test_load_all_configs_second_config are nearly identical. To reduce code duplication and improve maintainability, you could use pytest.mark.parametrize to run the same test logic with different config files.

Example:

import pytest

@pytest.mark.parametrize("config_file", ["test.depsets.yaml", "test2.depsets.yaml"])
def test_load_all_configs(config_file):
    with tempfile.TemporaryDirectory() as tmpdir:
        copy_data_to_tmpdir(tmpdir)
        workspace = Workspace(dir=tmpdir)
        config = workspace.load_configs(config_path=Path(tmpdir) / config_file)
        assert config.depsets is not None
        assert len(config.depsets) == 11

with tempfile.TemporaryDirectory() as tmpdir:
copy_data_to_tmpdir(tmpdir)
workspace = Workspace(dir=tmpdir)
config = workspace.load_configs(config_path=Path(tmpdir) / "test.depsets.yaml")
assert config.depsets is not None
assert len(config.depsets) == 11


# load all configs should always load all depsets
def test_load_all_configs_second_config():
with tempfile.TemporaryDirectory() as tmpdir:
copy_data_to_tmpdir(tmpdir)
workspace = Workspace(dir=tmpdir)
config = workspace.load_configs(config_path=Path(tmpdir) / "test2.depsets.yaml")
assert config.depsets is not None
assert len(config.depsets) == 11


def test_merge_configs():
with tempfile.TemporaryDirectory() as tmpdir:
copy_data_to_tmpdir(tmpdir)
workspace = Workspace(dir=tmpdir)
config = workspace.load_config(config_path=Path(tmpdir) / "test.depsets.yaml")
config2 = workspace.load_config(config_path=Path(tmpdir) / "test2.depsets.yaml")
merged_config = workspace.merge_configs([config, config2])
assert merged_config.depsets is not None
assert len(merged_config.depsets) == 11


def test_get_configs_dir():
with tempfile.TemporaryDirectory() as tmpdir:
copy_data_to_tmpdir(tmpdir)
workspace = Workspace(dir=tmpdir)
configs_dir = workspace.get_configs_dir(
configs_path=Path(tmpdir) / "test.depsets.yaml"
)
assert len(configs_dir) == 2
assert f"{tmpdir}/test.depsets.yaml" in configs_dir
assert f"{tmpdir}/test2.depsets.yaml" in configs_dir
Comment on lines +136 to +137
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

There are a couple of issues with these assertions:

  1. Hardcoded path separator: Using an f-string with / as a path separator is not portable across operating systems (e.g., Windows uses \). It's better to use os.path.join to construct paths.
  2. Unordered results: os.listdir (used inside get_configs_dir) does not guarantee the order of files. Asserting membership in a list can be flaky if the order changes. It's more robust to convert the result to a set and compare it with an expected set of paths.
Suggested change
assert f"{tmpdir}/test.depsets.yaml" in configs_dir
assert f"{tmpdir}/test2.depsets.yaml" in configs_dir
assert set(configs_dir) == {
f"{tmpdir}/test.depsets.yaml",
f"{tmpdir}/test2.depsets.yaml",
}



if __name__ == "__main__":
sys.exit(pytest.main(["-v", __file__]))
41 changes: 32 additions & 9 deletions ci/raydepsets/workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ class Depset:
name: str
operation: str
output: str
config_name: str
constraints: Optional[List[str]] = None
override_flags: Optional[List[str]] = None
append_flags: Optional[List[str]] = None
Expand All @@ -40,7 +41,7 @@ def _substitute_build_args(obj: Any, build_arg_set: BuildArgSet):
return obj


def _dict_to_depset(depset: dict) -> Depset:
def _dict_to_depset(depset: dict, config_name: str) -> Depset:
return Depset(
name=depset.get("name"),
requirements=depset.get("requirements", []),
Expand All @@ -53,16 +54,16 @@ def _dict_to_depset(depset: dict) -> Depset:
append_flags=depset.get("append_flags", []),
pre_hooks=depset.get("pre_hooks", []),
packages=depset.get("packages", []),
config_name=config_name,
)


@dataclass
class Config:
depsets: List[Depset] = field(default_factory=list)
build_arg_sets: Dict[str, BuildArgSet] = field(default_factory=dict)

@classmethod
def from_dict(cls, data: dict) -> "Config":
def from_dict(cls, data: dict, config_name: str) -> "Config":
build_arg_sets = cls.parse_build_arg_sets(data.get("build_arg_sets", {}))
raw_depsets = data.get("depsets", [])
depsets = []
Expand All @@ -75,10 +76,10 @@ def from_dict(cls, data: dict) -> "Config":
if build_arg_set is None:
raise KeyError(f"Build arg set {build_arg_set_key} not found")
depset_yaml = _substitute_build_args(depset, build_arg_set)
depsets.append(_dict_to_depset(depset_yaml))
depsets.append(_dict_to_depset(depset_yaml, config_name))
else:
depsets.append(_dict_to_depset(depset))
return Config(depsets=depsets, build_arg_sets=build_arg_sets)
depsets.append(_dict_to_depset(depset, config_name))
return Config(depsets=depsets)

@staticmethod
def parse_build_arg_sets(build_arg_sets: Dict[str, dict]) -> Dict[str, BuildArgSet]:
Expand All @@ -98,7 +99,29 @@ def __init__(self, dir: str = None):
if self.dir is None:
raise RuntimeError("BUILD_WORKSPACE_DIRECTORY is not set")

def load_config(self, path: str) -> Config:
with open(os.path.join(self.dir, path), "r") as f:
def load_configs(self, config_path: str = None) -> Config:
merged_configs = self.merge_configs(self.get_all_configs(config_path))
return merged_configs

def get_all_configs(self, config_path: str = None) -> List[Config]:
return [self.load_config(path) for path in self.get_configs_dir(config_path)]

def get_configs_dir(self, configs_path: str) -> List[str]:
configs_dir = os.path.dirname(os.path.join(self.dir, configs_path))
return [
os.path.join(self.dir, configs_dir, path)
for path in os.listdir(os.path.join(self.dir, configs_dir))
if path.endswith(".depsets.yaml")
]
Comment on lines +109 to +115
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The implementation of get_configs_dir is a bit confusing because it repeatedly uses os.path.join(self.dir, ...) where the second argument is often an absolute path. While this works due to os.path.join's behavior, it makes the code harder to reason about.

A clearer approach would be to resolve the directory path once and then use it to find and construct the full paths of the config files. This would also simplify the load_config method, which currently has a redundant os.path.join.

    def get_configs_dir(self, configs_path: str) -> List[str]:
        if not os.path.isabs(configs_path):
            configs_path = os.path.join(self.dir, configs_path)

        configs_dir = os.path.dirname(configs_path)

        return [
            os.path.join(configs_dir, path)
            for path in os.listdir(configs_dir)
            if path.endswith(".depsets.yaml")
        ]


def load_config(self, config_path: str) -> Config:
with open(os.path.join(self.dir, config_path), "r") as f:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Since get_configs_dir returns absolute paths (especially with the suggested refactoring), the os.path.join(self.dir, config_path) is redundant here. os.path.join will simply return config_path because it's an absolute path. You can simplify this by opening config_path directly.

        with open(config_path, "r") as f:

data = yaml.safe_load(f)
return Config.from_dict(data)
config_name = os.path.basename(config_path)
config = Config.from_dict(data, config_name)
return config
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Path Construction Error Causes File Load Failure

The get_configs_dir method incorrectly constructs file paths by joining self.dir multiple times, resulting in duplicated directory prefixes. This creates malformed paths for configuration files. load_config then attempts to prepend self.dir again to these already-full paths, which can prevent files from being loaded.

Fix in Cursor Fix in Web


def merge_configs(self, configs: List[Config]) -> Config:
return Config(
depsets=[depset for config in configs for depset in config.depsets]
)
Comment on lines +124 to +127
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The current implementation of merge_configs simply concatenates lists of depsets. If multiple configuration files define a depset with the same name, this will result in duplicates in the final list. This can lead to unpredictable behavior, as functions like _get_depset will only return the first match they find, which depends on the non-guaranteed order of files from os.listdir.

To prevent this, you should check for and disallow duplicate depset names across all configuration files. This will make the behavior deterministic and prevent subtle bugs.

    def merge_configs(self, configs: List[Config]) -> Config:
        all_depsets = [depset for config in configs for depset in config.depsets]
        depset_names = [depset.name for depset in all_depsets]
        if len(depset_names) != len(set(depset_names)):
            from collections import Counter

            duplicates = [
                name for name, count in Counter(depset_names).items() if count > 1
            ]
            raise ValueError(f"Duplicate depset names found: {duplicates}")
        return Config(depsets=all_depsets)