Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .buildkite/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ py_binary(
srcs = ["copy_files.py"],
visibility = ["//visibility:private"],
deps = [
ci_require("boto3"),
ci_require("aws_requests_auth"),
"//ci/ray_ci:rayci_auth",
],
)
49 changes: 3 additions & 46 deletions .buildkite/copy_files.py
Original file line number Diff line number Diff line change
@@ -1,53 +1,11 @@
import argparse
import os
import subprocess
import sys
import time
from collections import OrderedDict

import requests
from aws_requests_auth.boto_utils import BotoAWSRequestsAuth


def retry(f):
def inner():
resp = None
for _ in range(5):
resp = f()
print("Getting Presigned URL, status_code", resp.status_code)
if resp.status_code >= 500:
print("errored, retrying...")
print(resp.text)
time.sleep(5)
else:
return resp
if resp is None or resp.status_code >= 500:
print("still errorred after many retries")
sys.exit(1)

return inner


@retry
def perform_auth():
auth = BotoAWSRequestsAuth(
aws_host="vop4ss7n22.execute-api.us-west-2.amazonaws.com",
aws_region="us-west-2",
aws_service="execute-api",
)
resp = requests.get(
"https://vop4ss7n22.execute-api.us-west-2.amazonaws.com/endpoint/",
auth=auth,
params={"job_id": os.environ["BUILDKITE_JOB_ID"]},
)
return resp


def handle_docker_login(resp):
pwd = resp.json()["docker_password"]
subprocess.check_call(
["docker", "login", "--username", "raydockerreleaser", "--password", pwd]
)
from ci.ray_ci.rayci_auth import docker_hub_login, get_rayci_api_response


def gather_paths(dir_path):
Expand Down Expand Up @@ -117,11 +75,10 @@ def upload_paths(paths, resp, destination):
assert "BUILDKITE_JOB_ID" in os.environ
assert "BUILDKITE_COMMIT" in os.environ

resp = perform_auth()

if args.destination == "docker_login":
handle_docker_login(resp)
docker_hub_login()
else:
resp = get_rayci_api_response()
paths = gather_paths(args.path)
print("Planning to upload", paths)
upload_paths(paths, resp, args.destination)
14 changes: 14 additions & 0 deletions ci/ray_ci/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,11 +1,25 @@
load("@py_deps_py310//:requirements.bzl", ci_require = "requirement")
load("@rules_python//python:defs.bzl", "py_binary", "py_library", "py_test")

py_library(
name = "rayci_auth",
srcs = ["rayci_auth.py"],
visibility = [
"//.buildkite:__pkg__",
"//ci/ray_ci/automation:__pkg__",
],
deps = [
ci_require("boto3"),
ci_require("aws_requests_auth"),
],
)

py_library(
name = "ray_ci_lib",
srcs = glob(
["*.py"],
exclude = [
"rayci_auth.py",
"test_*.py",
"test_in_docker.py",
"build_in_docker.py",
Expand Down
2 changes: 2 additions & 0 deletions ci/ray_ci/automation/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,7 @@ py_binary(
deps = [
":crane_lib",
"//ci/ray_ci:ray_ci_lib",
"//ci/ray_ci:rayci_auth",
"//release:ray_release",
ci_require("click"),
],
Expand All @@ -340,6 +341,7 @@ py_test(
],
deps = [
":push_ray_image",
"//ci/ray_ci:rayci_auth",
ci_require("pytest"),
],
)
5 changes: 5 additions & 0 deletions ci/ray_ci/automation/push_ray_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
RAY_REPO_MAP,
RayType,
)
from ci.ray_ci.rayci_auth import docker_hub_login
from ci.ray_ci.utils import ci_init, ecr_docker_login

from ray_release.configs.global_config import get_global_config
Expand Down Expand Up @@ -338,6 +339,10 @@ def main(
ecr_registry = rayci_work_repo.split("/")[0]
ecr_docker_login(ecr_registry)

if not dry_run:
logger.info("Logging in to Docker Hub...")
docker_hub_login()

all_tags = []
for plat in platforms:
logger.info(f"\n{'='*60}\nProcessing platform: {plat}\n{'='*60}")
Expand Down
18 changes: 16 additions & 2 deletions ci/ray_ci/automation/test_push_ray_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,13 +466,20 @@ class TestMultiplePlatforms:
POSTMERGE_PIPELINE_ID = "test-postmerge-pipeline-id"
WORK_REPO = "123456789.dkr.ecr.us-west-2.amazonaws.com/rayci-work"

@mock.patch("ci.ray_ci.automation.push_ray_image.docker_hub_login")
@mock.patch("ci.ray_ci.automation.push_ray_image.ci_init")
@mock.patch("ci.ray_ci.automation.push_ray_image.ecr_docker_login")
@mock.patch("ci.ray_ci.automation.push_ray_image._copy_image")
@mock.patch("ci.ray_ci.automation.push_ray_image._image_exists")
@mock.patch("ci.ray_ci.automation.push_ray_image.get_global_config")
def test_multiple_platforms_processed(
self, mock_config, mock_exists, mock_copy, mock_ecr_login, mock_ci_init
self,
mock_config,
mock_exists,
mock_copy,
mock_ecr_login,
mock_ci_init,
mock_docker_login,
):
"""Test that multiple platforms are each processed with correct source refs."""
from click.testing import CliRunner
Expand Down Expand Up @@ -527,13 +534,20 @@ def test_multiple_platforms_processed(
for src, dest in copy_calls
)

@mock.patch("ci.ray_ci.automation.push_ray_image.docker_hub_login")
@mock.patch("ci.ray_ci.automation.push_ray_image.ci_init")
@mock.patch("ci.ray_ci.automation.push_ray_image.ecr_docker_login")
@mock.patch("ci.ray_ci.automation.push_ray_image._copy_image")
@mock.patch("ci.ray_ci.automation.push_ray_image._image_exists")
@mock.patch("ci.ray_ci.automation.push_ray_image.get_global_config")
def test_multiple_platforms_fails_if_one_missing(
self, mock_config, mock_exists, mock_copy, mock_ecr_login, mock_ci_init
self,
mock_config,
mock_exists,
mock_copy,
mock_ecr_login,
mock_ci_init,
mock_docker_login,
):
"""Test that processing fails if any platform's source image is missing."""
from click.testing import CliRunner
Expand Down
93 changes: 93 additions & 0 deletions ci/ray_ci/rayci_auth.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
"""
Authentication library for RayCI API endpoints.

Provides authenticated access to the RayCI credentials API for Docker Hub
login and S3 presigned URLs.
"""

import os
import subprocess
import time

import requests
from aws_requests_auth.boto_utils import BotoAWSRequestsAuth

RAYCI_API_HOST = "vop4ss7n22.execute-api.us-west-2.amazonaws.com"
RAYCI_API_URL = f"https://{RAYCI_API_HOST}/endpoint/"
RAYCI_API_REGION = "us-west-2"
RAYCI_API_SERVICE = "execute-api"
DOCKER_HUB_USERNAME = "raydockerreleaser"


class RayCIAuthError(Exception):
"""Error raised when RayCI API authentication fails."""


def _retry(f):
"""Retry decorator for API calls with fixed-interval retry on 5xx errors."""

def inner():
resp = None
for _ in range(5):
resp = f()
print("Getting Presigned URL, status_code", resp.status_code)
if resp.status_code >= 500:
print("errored, retrying...")
print(resp.text)
time.sleep(5)
else:
return resp
if resp is None or resp.status_code >= 500:
raise RayCIAuthError(
"Failed to get a valid response from RayCI API after multiple retries."
)

return inner


@_retry
def get_rayci_api_response():
"""
Fetch credentials from the RayCI API endpoint.

Uses AWS SigV4 authentication via the instance's IAM role.
Requires BUILDKITE_JOB_ID environment variable.

Returns:
requests.Response containing credentials for Docker Hub and S3.
"""
job_id = os.environ.get("BUILDKITE_JOB_ID")
if not job_id:
raise ValueError("BUILDKITE_JOB_ID environment variable must be set.")

auth = BotoAWSRequestsAuth(
aws_host=RAYCI_API_HOST,
aws_region=RAYCI_API_REGION,
aws_service=RAYCI_API_SERVICE,
)
resp = requests.get(
RAYCI_API_URL,
auth=auth,
params={"job_id": job_id},
)
return resp


def docker_hub_login() -> None:
"""
Login to Docker Hub using credentials from RayCI API.

Fetches credentials and runs `docker login` for the rayproject account.
"""
resp = get_rayci_api_response()
try:
pwd = resp.json()["docker_password"]
except (requests.exceptions.JSONDecodeError, KeyError) as e:
raise RayCIAuthError(
f"Could not get docker_password from API response: {resp.text}"
) from e
subprocess.run(
["docker", "login", "--username", DOCKER_HUB_USERNAME, "--password-stdin"],
input=pwd.encode(),
check=True,
)