Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 33 additions & 18 deletions .buildkite/ml.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ steps:
PYTHON: "{{matrix}}"
RAYCI_IS_GPU_BUILD: "false"
matrix:
- "3.10"
- "3.12"
tags: cibase

Expand All @@ -62,6 +63,7 @@ steps:
PYTHON: "{{matrix}}"
RAYCI_IS_GPU_BUILD: "true"
matrix:
- "3.10"
- "3.12"
tags: cibase

Expand All @@ -74,7 +76,8 @@ steps:
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/train/... ml
--workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 3
--except-tags gpu,minimal,tune,doctest,needs_credentials,train_v2,train_v2_gpu
depends_on: [ "mlbuild", "forge" ]
--python-version 3.10 --build-name mlbuild-py3.10
depends_on: [ "mlbuild-multipy", "forge" ]

- label: ":bullettrain_front: ml: train v2 tests"
tags: train
Expand All @@ -83,9 +86,10 @@ steps:
commands:
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/train/... ml
--workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 3
--python-version 3.10 --build-name mlbuild-py3.10
--only-tags train_v2
--except-tags needs_credentials
depends_on: [ "mlbuild", "forge" ]
depends_on: [ "mlbuild-multipy", "forge" ]

- label: ":bullettrain_front: ml: train v2 gpu tests"
tags:
Expand All @@ -95,9 +99,9 @@ steps:
commands:
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/train/... //doc/... ml
--workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 2
--build-name mlgpubuild
--build-name mlgpubuild-py3.10 --python-version 3.10
--only-tags train_v2_gpu
depends_on: [ "mlgpubuild", "forge" ]
depends_on: [ "mlgpubuild-multipy", "forge" ]

- label: ":train: ml: {{matrix.python}} tests ({{matrix.worker_id}})"
if: build.pull_request.labels includes "continuous-build" || pipeline.id == "0189e759-8c96-4302-b6b5-b4274406bf89" || pipeline.id == "018f4f1e-1b73-4906-9802-92422e3badaa"
Expand Down Expand Up @@ -129,9 +133,9 @@ steps:
commands:
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/train/... //python/ray/air/... //doc/... ml
--workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 2
--build-name mlgpubuild
--build-name mlgpubuild-py3.10 --python-version 3.10
--only-tags gpu
depends_on: [ "mlgpubuild", "forge" ]
depends_on: [ "mlgpubuild-multipy", "forge" ]

- label: ":train: ml: train gpu {{matrix.python}} tests ({{matrix.worker_id}})"
if: build.pull_request.labels includes "continuous-build" || pipeline.id == "0189e759-8c96-4302-b6b5-b4274406bf89" || pipeline.id == "018f4f1e-1b73-4906-9802-92422e3badaa"
Expand Down Expand Up @@ -163,18 +167,20 @@ steps:
- $(python ci/env/setup_credentials.py)
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/train/... ml
--parallelism-per-worker 3
--python-version 3.10 --build-name mlbuild-py3.10
--only-tags needs_credentials
--test-env=WANDB_API_KEY --test-env=COMET_API_KEY
depends_on: [ "mlbuild", "forge" ]
depends_on: [ "mlbuild-multipy", "forge" ]

- label: ":train: ml: tune tests"
tags: tune
instance_type: large
commands:
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/tune/... ml
--parallelism-per-worker 3
--python-version 3.10 --build-name mlbuild-py3.10
--except-tags doctest,soft_imports,rllib
depends_on: [ "mlbuild", "forge" ]
depends_on: [ "mlbuild-multipy", "forge" ]

- label: ":train: ml: tune soft import tests"
tags: tune
Expand All @@ -191,22 +197,25 @@ steps:
commands:
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/air/... ml
--parallelism-per-worker 3
--python-version 3.10 --build-name mlbuild-py3.10
--except-tags gpu,doctest
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/data/... ml
--python-version 3.10 --build-name mlbuild-py3.10
--parallelism-per-worker 3
--only-tags ray_air
--skip-ray-installation
depends_on: [ "mlbuild", "forge" ]
depends_on: [ "mlbuild-multipy", "forge" ]

- label: ":train: ml: train+tune tests"
tags: train
instance_type: medium
commands:
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/train/... ml
--parallelism-per-worker 3
--python-version 3.10 --build-name mlbuild-py3.10
--only-tags tune
--except-tags ray_air,gpu,doctest,needs_credentials
depends_on: [ "mlbuild", "forge" ]
depends_on: [ "mlbuild-multipy", "forge" ]

- label: ":train: ml: rllib+tune tests"
tags:
Expand All @@ -215,10 +224,11 @@ steps:
instance_type: large
commands:
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/tune/... ml
--python-version 3.10 --build-name mlbuild-py3.10
--parallelism-per-worker 3
--only-tags rllib
--except-tags gpu
depends_on: [ "mlbuild", "forge" ]
depends_on: [ "mlbuild-multipy", "forge" ]

- label: ":train: ml: release tests"
tags:
Expand All @@ -228,8 +238,9 @@ steps:
instance_type: large
commands:
- bazel run //ci/ray_ci:test_in_docker -- //release/... ml
--python-version 3.10 --build-name mlbuild-py3.10
--parallelism-per-worker 3
depends_on: [ "mlbuild", "forge" ]
depends_on: [ "mlbuild-multipy", "forge" ]

- label: ":train: ml: train minimal"
tags: train
Expand All @@ -238,7 +249,7 @@ steps:
- python ./ci/env/check_minimal_install.py
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/train/... ml
--parallelism-per-worker 2
--build-name minbuild-ml-py3.9
--build-name minbuild-ml-py3.10 --python-version 3.10
--only-tags minimal
depends_on: [ "minbuild-ml", "forge" ]

Expand All @@ -253,16 +264,18 @@ steps:
# doc tests
- bazel run //ci/ray_ci:test_in_docker -- python/ray/... //doc/... ml
--workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}"
--python-version 3.10 --build-name mlbuild-py3.10
--only-tags doctest
--except-tags gpu
--parallelism-per-worker 3
# doc examples
- bazel run //ci/ray_ci:test_in_docker -- //doc/... ml
--workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}"
--python-version 3.10 --build-name mlbuild-py3.10
--except-tags gpu,post_wheel_build,doctest,highly_parallel
--parallelism-per-worker 3
--skip-ray-installation
depends_on: [ "mlbuild", "forge" ]
depends_on: [ "mlbuild-multipy", "forge" ]

- label: ":train: ml: train gpu lightning 2.0 tests"
tags:
Expand All @@ -284,9 +297,10 @@ steps:
instance_type: large
commands:
- bazel run //ci/ray_ci:test_in_docker -- //... ml --run-flaky-tests
--python-version 3.10 --build-name mlbuild-py3.10
--parallelism-per-worker 2
--except-tags gpu,needs_credentials,train_v2_gpu
depends_on: [ "mlbuild", "forge" ]
depends_on: [ "mlbuild-multipy", "forge" ]
soft_fail: true

- label: ":train: ml: flaky authentication tests"
Expand All @@ -299,10 +313,11 @@ steps:
commands:
- $(python ci/env/setup_credentials.py)
- bazel run //ci/ray_ci:test_in_docker -- //... ml --run-flaky-tests
--python-version 3.10 --build-name mlbuild-py3.10
--parallelism-per-worker 3
--only-tags needs_credentials
--test-env=WANDB_API_KEY --test-env=COMET_API_KEY
depends_on: [ "mlbuild", "forge" ]
depends_on: [ "mlbuild-multipy", "forge" ]
soft_fail: true

- label: ":train: ml: train gpu flaky tests"
Expand All @@ -315,7 +330,7 @@ steps:
commands:
- bazel run //ci/ray_ci:test_in_docker -- //... ml --run-flaky-tests
--parallelism-per-worker 2
--build-name mlgpubuild
--python-version 3.10 --build-name mlgpubuild-py3.10
--only-tags gpu,train_v2_gpu
depends_on: [ "mlgpubuild", "forge" ]
depends_on: [ "mlgpubuild-multipy", "forge" ]
soft_fail: true