ray-project
diff --git a/‎.buildkite/core.rayci.yml‎
Lines changed: 3 additions & 1 deletion b/‎.buildkite/core.rayci.yml‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎.buildkite/release-automation/pre_release.rayci.yml‎
Lines changed: 12 additions & 12 deletions b/‎.buildkite/release-automation/pre_release.rayci.yml‎
Lines changed: 12 additions & 12 deletions
diff --git a/‎.github/CODEOWNERS‎
Lines changed: 4 additions & 0 deletions b/‎.github/CODEOWNERS‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎BUILD.bazel‎
Lines changed: 26 additions & 26 deletions b/‎BUILD.bazel‎
Lines changed: 26 additions & 26 deletions
diff --git a/‎bazel/ray.bzl‎
Lines changed: 1 addition & 0 deletions b/‎bazel/ray.bzl‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/source/train/common/torch-configure-run.rst‎
Lines changed: 2 additions & 3 deletions b/‎doc/source/train/common/torch-configure-run.rst‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎doc/source/train/examples/lightning/dolly_lightning_fsdp_finetuning.ipynb‎
Lines changed: 1 addition & 1 deletion b/‎doc/source/train/examples/lightning/dolly_lightning_fsdp_finetuning.ipynb‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/train/getting-started-pytorch-lightning.rst‎
Lines changed: 3 additions & 3 deletions b/‎doc/source/train/getting-started-pytorch-lightning.rst‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎doc/source/train/getting-started-pytorch.rst‎
Lines changed: 4 additions & 4 deletions b/‎doc/source/train/getting-started-pytorch.rst‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎doc/source/train/getting-started-transformers.rst‎
Lines changed: 4 additions & 3 deletions b/‎doc/source/train/getting-started-transformers.rst‎
Lines changed: 4 additions & 3 deletions
@@ -214,7 +214,9 @@ steps:
         --except-tags kubernetes,manual
 
   - label: ":ray: core: asan tests"
-    tags: python
+    tags:
+      - python
+      - skip-on-premerge # currently failing
     instance_type: medium
     commands:
       - bazel run //ci/ray_ci:test_in_docker -- //python/ray/tests/... core
 
@@ -51,11 +51,22 @@ steps:
         RAYCI_RELEASE: 1
         RAYCI_SCHEDULE: "nightly"
 
+- label: "Check Ray commit in {{matrix}} nightly images"
+  key: check-ray-commit
+  if: build.branch !~ /^releases\// && build.env("RAYCI_WEEKLY_RELEASE_NIGHTLY") == "1"
+  depends_on: trigger-postmerge-nightly
+  allow_dependency_failure: true
+  commands:
+    - bazel run //ci/ray_ci/automation:check_nightly_ray_commit -- --ray_type={{matrix}} --expected_commit="${BUILDKITE_COMMIT}"
+  matrix:
+    - ray
+    - ray-ml
+
   - label: "Trigger :kubernetes: Kuberay CI Tests"
     if: build.env("RAYCI_WEEKLY_RELEASE_NIGHTLY") == "1"
     trigger: "ray-ecosystem-ci-kuberay-ci"
     key: trigger-kuberay
-    depends_on: trigger-postmerge-nightly
+    depends_on: check-ray-commit
     build:
       branch: "release-1.3"
       message: "Triggered by release-automation build #${BUILDKITE_BUILD_NUMBER}"
@@ -125,14 +136,3 @@ steps:
       env:
         AUTOMATIC: 1
         RELEASE_FREQUENCY: "weekly"
-
-  - label: "Check Ray commit in {{matrix}} nightly images"
-    key: check-ray-commit
-    if: build.branch !~ /^releases\// && build.env("RAYCI_WEEKLY_RELEASE_NIGHTLY") == "1"
-    depends_on: trigger-postmerge-nightly
-    allow_dependency_failure: true
-    commands:
-      - bazel run //ci/ray_ci/automation:check_nightly_ray_commit -- --ray_type={{matrix}} --expected_commit="${BUILDKITE_COMMIT}"
-    matrix:
-      - ray
-      - ray-ml
@@ -68,6 +68,10 @@
 
 # ==== Libraries and frameworks ====
 
+# Common directory shared by core and the libraries.
+# @edoakes is the czar for now because the pattern is new.
+/python/ray/_common/ @edoakes @aslonnie
+
 # Ray data.
 /python/ray/data/ @ray-project/ray-data
 /doc/source/data/ @ray-project/ray-data
 
@@ -142,8 +142,8 @@ ray_cc_library(
     hdrs = ["src/ray/rpc/grpc_client.h"],
     deps = [
         ":grpc_common_base",
-        ":rpc_client_call",
         ":rpc_chaos",
+        ":rpc_client_call",
         "//src/ray/common:grpc_util",
         "//src/ray/common:ray_config",
         "//src/ray/common:status",
@@ -157,9 +157,9 @@ ray_cc_library(
     deps = [
         ":stats_metric",
         "//src/ray/common:asio",
-        "//src/ray/common:ray_config",
         "//src/ray/common:grpc_util",
         "//src/ray/common:id",
+        "//src/ray/common:ray_config",
         "//src/ray/common:status",
         "@com_github_grpc_grpc//:grpc++",
     ],
@@ -170,8 +170,8 @@ ray_cc_library(
     srcs = ["src/ray/rpc/retryable_grpc_client.cc"],
     hdrs = ["src/ray/rpc/retryable_grpc_client.h"],
     deps = [
-        ":rpc_client_call",
         ":grpc_client",
+        ":rpc_client_call",
         "@com_google_absl//absl/container:btree",
         "@com_google_absl//absl/strings:str_format",
         "@com_google_absl//absl/time",
@@ -184,8 +184,8 @@ ray_cc_library(
     deps = [
         ":grpc_client",
         "//src/ray/common:status",
-        "//src/ray/util:logging",
         "//src/ray/protobuf:reporter_cc_proto",
+        "//src/ray/util:logging",
         "@com_github_grpc_grpc//:grpc++",
     ],
 )
@@ -198,8 +198,8 @@ ray_cc_library(
         ":grpc_common_base",
         ":rpc_server_call",
         "//src/ray/common:asio",
-        "//src/ray/common:status",
         "//src/ray/common:ray_config",
+        "//src/ray/common:status",
         "//src/ray/util:thread_utils",
         "@com_github_grpc_grpc//:grpc++",
         "@com_github_grpc_grpc//:grpc++_reflection",
@@ -216,9 +216,9 @@ ray_cc_library(
     ],
     # TODO(core): These three dependencies come from raylet client, should be able to remove after we split node rpc and raylet client into smaller targets.
     deps = [
+        "//src/ray/common:network",
         "//src/ray/common:ray_object",
         "//src/ray/common:task_common",
-        "//src/ray/common:network",
     ] + [
         ":grpc_client",
         ":grpc_common_base",
@@ -459,9 +459,9 @@ ray_cc_library(
     }),
     linkopts = PLASMA_LINKOPTS,
     deps = [
+        ":object_manager_common",
         ":plasma_fbs",
         ":ray_common",
-        ":object_manager_common",
         "//src/ray/protobuf:common_cc_proto",
         "//src/ray/util",
         "//src/ray/util:compat",
@@ -526,13 +526,13 @@ ray_cc_library(
     name = "ray_mock",
     hdrs = glob(
         ["src/mock/**/*.h"],
-        exclude = ["src/mock/ray/common/ray_syncer/ray_syncer.h"]
+        exclude = ["src/mock/ray/common/ray_syncer/ray_syncer.h"],
     ),
 )
 
 ray_cc_library(
     name = "ray_mock_syncer",
-    hdrs = ["src/mock/ray/common/ray_syncer/ray_syncer.h"]
+    hdrs = ["src/mock/ray/common/ray_syncer/ray_syncer.h"],
 )
 
 cc_grpc_library(
@@ -573,8 +573,8 @@ ray_cc_binary(
         ":raylet_lib",
         "//src/ray/util",
         "//src/ray/util:cmd_line_utils",
-        "//src/ray/util:stream_redirection_options",
         "//src/ray/util:stream_redirection",
+        "//src/ray/util:stream_redirection_options",
         "@com_github_gflags_gflags//:gflags",
     ],
 )
@@ -810,8 +810,8 @@ ray_cc_binary(
     deps = [
         ":gcs_server_lib",
         ":stats_lib",
-        "//src/ray/util:stream_redirection_options",
         "//src/ray/util:stream_redirection",
+        "//src/ray/util:stream_redirection_options",
         "@com_github_gflags_gflags//:gflags",
     ],
 )
@@ -864,8 +864,8 @@ ray_cc_library(
     name = "stats_opentelemetry",
     srcs = ["src/ray/stats/opentelemetry_metrics.cc"],
     deps = [
-        "@io_opentelemetry_cpp//sdk/src/logs:logs",
-        "@io_opentelemetry_cpp//sdk/src/trace:trace",
+        "@io_opentelemetry_cpp//sdk/src/logs",
+        "@io_opentelemetry_cpp//sdk/src/trace",
     ],
 )
 
@@ -1069,8 +1069,8 @@ ray_cc_library(
         "@com_google_absl//absl/base:core_headers",
         "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
-        "@com_google_absl//absl/strings:str_format",
         "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/strings:str_format",
         "@com_google_googletest//:gtest",
         "@io_opencensus_cpp//opencensus/exporters/stats/prometheus:prometheus_exporter",
         "@io_opencensus_cpp//opencensus/stats",
@@ -1093,16 +1093,16 @@ ray_cc_library(
     srcs = ["src/ray/raylet_client/raylet_client.cc"],
     hdrs = ["src/ray/raylet_client/raylet_client.h"],
     deps = [
-        ":raylet_client_connection_lib",
         ":node_manager_rpc",
-        "//src/ray/common:id",
+        ":raylet_client_connection_lib",
         "//src/ray/common:asio",
+        "//src/ray/common:id",
+        "//src/ray/common:network",
         "//src/ray/common:ray_object",
         "//src/ray/common:status",
-        "//src/ray/common:network",
         "//src/ray/common:task_common",
-        "//src/ray/util:logging",
         "//src/ray/protobuf:common_cc_proto",
+        "//src/ray/util:logging",
     ],
 )
 
@@ -1191,8 +1191,8 @@ ray_cc_library(
         "//src/ray/util:mutex_protected",
         "//src/ray/util:process",
         "//src/ray/util:shared_lru",
-        "//src/ray/util:stream_redirection_options",
         "//src/ray/util:stream_redirection",
+        "//src/ray/util:stream_redirection_options",
         "@boost//:circular_buffer",
         "@boost//:fiber",
         "@com_google_absl//absl/cleanup",
@@ -2514,10 +2514,10 @@ ray_cc_library(
     deps = [
         ":chunk_object_reader",
         ":object_buffer_pool",
-        ":object_manager_common",
         ":object_directory",
-        ":ownership_based_object_directory",
+        ":object_manager_common",
         ":object_manager_rpc",
+        ":ownership_based_object_directory",
         ":plasma_store_server_lib",
         ":pull_manager",
         ":push_manager",
@@ -2562,8 +2562,8 @@ ray_cc_library(
         "//src/ray/common:ray_config",
         "//src/ray/common:ray_object",
         "//src/ray/common:status",
-        "//src/ray/util:counter_map",
         "//src/ray/util:container_util",
+        "//src/ray/util:counter_map",
         "@boost//:asio",
         "@boost//:bind",
         "@com_google_absl//absl/container:flat_hash_map",
@@ -2937,11 +2937,11 @@ ray_cc_library(
     name = "gcs",
     deps = [
         ":gcs_callback",
+        ":gcs_pb_util",
         ":node_manager_fbs",
         ":node_manager_rpc",
-        ":gcs_pb_util",
         ":redis_client",
-    ]
+    ],
 )
 
 ray_cc_test(
@@ -3055,7 +3055,7 @@ pyx_library(
     cc_kwargs = dict(
         srcs = PYX_SRCS,
         # cython code is auto-generated, which is out of our control.
-        copts = COPTS + PYX_COPTS + ["-Wno-shadow"],
+        copts = COPTS + PYX_COPTS,
         # see https://github.com/tensorflow/tensorflow/blob/r2.1/tensorflow/lite/BUILD#L444
         linkopts = select({
             "@platforms//os:osx": [
@@ -3082,8 +3082,8 @@ pyx_library(
         "//src/ray/protobuf:serialization_cc_proto",
         "//src/ray/util",
         "//src/ray/util:memory",
-        "//src/ray/util:stream_redirection_options",
         "//src/ray/util:stream_redirection",
+        "//src/ray/util:stream_redirection_options",
     ],
 )
 
 
@@ -31,6 +31,7 @@ PYX_COPTS = select({
     "//conditions:default": [
         # Ignore this warning since CPython and Cython have issue removing deprecated tp_print on MacOS
         "-Wno-deprecated-declarations",
+        "-Wno-shadow",
     ],
 }) + select({
     "@platforms//os:windows": [
 
@@ -1,5 +1,5 @@
-Configure scale and resources
------------------------------
+Configure scale and GPUs
+------------------------
 
 Outside of your training function, create a :class:`~ray.train.ScalingConfig` object to configure:
 
@@ -11,7 +11,6 @@ Outside of your training function, create a :class:`~ray.train.ScalingConfig` ob
     from ray.train import ScalingConfig
     scaling_config = ScalingConfig(num_workers=2, use_gpu=True)
 
-3. (Optional) :class:`resources_per_worker <ray.train.ScalingConfig>` - The resources reserved for each worker. If you want to allocate more than one CPU or GPU per training worker, or if you need to specify other accelerators, set this attribute.
 
 For more details, see :ref:`train_scaling_config`.
 
 
@@ -338,7 +338,7 @@
    "source": [
     "## Fine-tune with Ray TorchTrainer\n",
     "\n",
-    "Ray TorchTrainer allows you to scale your PyTorch Lightning training workload over multiple nodes. See {ref}`Configuring Scale and Resources <train_scaling_config>` for more details."
+    "Ray TorchTrainer allows you to scale your PyTorch Lightning training workload over multiple nodes. See {ref}`Configuring Scale and GPUs <train_scaling_config>` for more details."
    ]
   },
   {
 
@@ -7,9 +7,9 @@ This tutorial walks through the process of converting an existing PyTorch Lightn
 
 Learn how to:
 
-1. Configure the Lightning Trainer so that it runs distributed with Ray and on the correct CPU, GPU, or other accelerator device.
+1. Configure the Lightning Trainer so that it runs distributed with Ray and on the correct CPU or GPU device.
 2. Configure :ref:`training function <train-overview-training-function>` to report metrics and save checkpoints.
-3. Configure :ref:`scaling <train-overview-scaling-config>` and CPU, GPU, or other accelerator resource requirements for a training job.
+3. Configure :ref:`scaling <train-overview-scaling-config>` and CPU or GPU resource requirements for a training job.
 4. Launch a distributed training job with a :class:`~ray.train.torch.TorchTrainer`.
 
 Quickstart
@@ -31,7 +31,7 @@ For reference, the final code is as follows:
     result = trainer.fit()
 
 1. `train_func` is the Python code that executes on each distributed training worker.
-2. :class:`~ray.train.ScalingConfig` defines the number of distributed training workers and whether to use GPUs or other types of accelerators.
+2. :class:`~ray.train.ScalingConfig` defines the number of distributed training workers and whether to use GPUs.
 3. :class:`~ray.train.torch.TorchTrainer` launches the distributed training job.
 
 Compare a PyTorch Lightning training script with and without Ray Train.
 
@@ -7,10 +7,10 @@ This tutorial walks through the process of converting an existing PyTorch script
 
 Learn how to:
 
-1. Configure a model to run distributed and on the correct CPU, GPU, or other accelerator device.
-2. Configure a dataloader to shard data across the :ref:`workers <train-overview-worker>` and place data on the correct CPU, GPU, or other accelerator device.
+1. Configure a model to run distributed and on the correct CPU/GPU device.
+2. Configure a dataloader to shard data across the :ref:`workers <train-overview-worker>` and place data on the correct CPU or GPU device.
 3. Configure a :ref:`training function <train-overview-training-function>` to report metrics and save checkpoints.
-4. Configure :ref:`scaling <train-overview-scaling-config>` and CPU, GPU, or other accelerator resource requirements for a training job.
+4. Configure :ref:`scaling <train-overview-scaling-config>` and CPU or GPU resource requirements for a training job.
 5. Launch a distributed training job with a :class:`~ray.train.torch.TorchTrainer` class.
 
 Quickstart
@@ -33,7 +33,7 @@ For reference, the final code will look something like the following:
     result = trainer.fit()
 
 1. `train_func` is the Python code that executes on each distributed training worker.
-2. :class:`~ray.train.ScalingConfig` defines the number of distributed training workers, and whether to use CPUs, GPUs, or other types of accelerator devices.
+2. :class:`~ray.train.ScalingConfig` defines the number of distributed training workers and whether to use GPUs.
 3. :class:`~ray.train.torch.TorchTrainer` launches the distributed training job.
 
 Compare a PyTorch training script with and without Ray Train.
 
@@ -7,8 +7,8 @@ This tutorial shows you how to convert an existing Hugging Face Transformers scr
 
 In this guide, learn how to:
 
-1. Configure a :ref:`training function <train-overview-training-function>` that reports metrics and saves checkpoints.
-2. Configure :ref:`scaling <train-overview-scaling-config>` and resource requirements for CPUs, GPUs or other accelerators for your distributed training job.
+1. Configure a :ref:`training function <train-overview-training-function>` that properly reports metrics and saves checkpoints.
+2. Configure :ref:`scaling <train-overview-scaling-config>` and resource requirements for CPUs or GPUs for your distributed training job.
 3. Launch a distributed training job with :class:`~ray.train.torch.TorchTrainer`.
 
 
@@ -21,6 +21,7 @@ Install the necessary packages before you begin:
 
     pip install "ray[train]" torch "transformers[torch]" datasets evaluate numpy scikit-learn
 
+
 Quickstart
 ----------
 
@@ -43,7 +44,7 @@ Here's a quick overview of the final code structure:
 The key components are:
 
 1. `train_func`: Python code that runs on each distributed training worker.
-2. :class:`~ray.train.ScalingConfig`: Defines the number of distributed training workers and their CPUs, GPUs, or other types of accelerator devices.
+2. :class:`~ray.train.ScalingConfig`: Defines the number of distributed training workers and GPU usage.
 3. :class:`~ray.train.torch.TorchTrainer`: Launches and manages the distributed training job.
 
 Code Comparison: Hugging Face Transformers vs. Ray Train Integration
Original file line number	Diff line number	Diff line change
`@@ -338,7 +338,7 @@`
`338`	`338`	`"source": [`
`339`	`339`	`"## Fine-tune with Ray TorchTrainer\n",`
`340`	`340`	`"\n",`
`341`		- "Ray TorchTrainer allows you to scale your PyTorch Lightning training workload over multiple nodes. See {ref}`Configuring Scale and Resources <train_scaling_config>` for more details."
	`341`	+ "Ray TorchTrainer allows you to scale your PyTorch Lightning training workload over multiple nodes. See {ref}`Configuring Scale and GPUs <train_scaling_config>` for more details."
`342`	`342`	`]`
`343`	`343`	`},`
`344`	`344`	`{`