Skip to content

ci: native extension hash cache #13675

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ tests-gen:
extends: .testrunner
script:
- pip install riot==0.20.1
- export DD_NATIVE_SOURCES_HASH=$(scripts/get-native-sources-hash.sh)
- riot -v run --pass-env -s gitlab-gen-config -v
needs: []
artifacts:
Expand Down
59 changes: 59 additions & 0 deletions scripts/gen_ext_cache_scripts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from pathlib import Path
import subprocess
import sys
import typing as t


HERE = Path(__file__).resolve().parent
ROOT = HERE.parent
CACHE = ROOT / ".ext_cache"
RESTORE_FILE = HERE / "restore-ext-cache.sh"
SAVE_FILE = HERE / "save-ext-cache.sh"

# Get extension information from setup.py
output = subprocess.check_output([sys.executable, ROOT / "setup.py", "ext_hashes", "--inplace"])
cached_files = set()
for line in output.decode().splitlines():
if not line.startswith("#EXTHASH:"):
continue
ext_name, ext_hash, ext_target = t.cast(t.Tuple[str, str, str], eval(line.split(":", 1)[-1].strip()))
target = Path(ext_target)
cache_dir = CACHE / ext_name / ext_hash
if ext_target.endswith("*"):
target_dir = target.parent.resolve()
if RESTORE_FILE.exists():
# Iterate over the target as these are the files we want to cache
for d in target_dir.glob(target.name):
if d.is_file():
cached_files.add((str(cache_dir / d.name), str(d.resolve())))
else:
# Iterate over the cached files as these are the ones we want to
# restore
for d in cache_dir.glob(target.name):
if d.is_file():
cached_files.add((str(d.resolve()), str(target_dir / d.name)))
else:
cached_files.add((str(cache_dir / target.name), ext_target))

# Generate the restore script on the first run
if not RESTORE_FILE.exists():
RESTORE_FILE.write_text(
"\n".join(
[
f" test -f {cached_file} && (cp {cached_file} {dest} && touch {dest} "
f"&& echo 'Restored {cached_file} -> {dest}') || true"
for cached_file, dest in cached_files
]
)
)
else:
# Generate the save script on the second run
SAVE_FILE.write_text(
"\n".join(
[
f" test -f {cached_file} || mkdir -p {Path(cached_file).parent} && (cp {dest} {cached_file} "
f"&& echo 'Saved {dest} -> {cached_file}' || true)"
for cached_file, dest in cached_files
]
)
)
56 changes: 30 additions & 26 deletions scripts/gen_gitlab_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# file. The function will be called automatically when this script is run.

from dataclasses import dataclass
import datetime
import os
import subprocess
import typing as t
Expand Down Expand Up @@ -251,8 +252,7 @@ def check(name: str, command: str, paths: t.Set[str]) -> None:
def gen_build_base_venvs() -> None:
"""Generate the list of base jobs for building virtual environments."""

ci_commit_sha = os.getenv("CI_COMMIT_SHA", "default")
native_hash = os.getenv("DD_NATIVE_SOURCES_HASH", ci_commit_sha)
current_month = datetime.datetime.now().month

with TESTS_GEN.open("a") as f:
f.write(
Expand All @@ -272,47 +272,51 @@ def gen_build_base_venvs() -> None:
PIP_CACHE_DIR: '${{CI_PROJECT_DIR}}/.cache/pip'
SCCACHE_DIR: '${{CI_PROJECT_DIR}}/.cache/sccache'
DD_FAST_BUILD: '1'
DD_CMAKE_INCREMENTAL_BUILD: '1'
DD_SETUP_CACHE_DOWNLOADS: '1'
EXT_CACHE_VENV: '${{CI_PROJECT_DIR}}/.cache/ext_cache_venv'
rules:
- if: '$CI_COMMIT_REF_NAME == "main"'
variables:
DD_FAST_BUILD: '0'
- when: always
script: |
set -e -o pipefail
if [ ! -f cache_used.txt ];
then
echo "No cache found, building native extensions and base venv"
apt update && apt install -y sccache
pip install riot==0.20.1
riot -P -v generate --python=$PYTHON_VERSION
echo "Running smoke tests"
riot -v run -s --python=$PYTHON_VERSION smoke_test
touch cache_used.txt
apt update && apt install -y sccache
pip install riot==0.20.1
if [ ! -d $EXT_CACHE_VENV ]; then
python$PYTHON_VERSION -m venv $EXT_CACHE_VENV
source $EXT_CACHE_VENV/bin/activate
pip install cmake setuptools_rust Cython
else
echo "Skipping build, using compiled files/venv from cache"
echo "Fixing ddtrace versions"
pip install "setuptools_scm[toml]>=4"
ddtrace_version=$(python -m setuptools_scm --force-write-version-files)
find .riot/ -path '*/ddtrace*.dist-info/METADATA' | \
xargs sed -E -i "s/^Version:.*$/Version: ${{ddtrace_version}}/"
echo "Using version: ${{ddtrace_version}}"
source $EXT_CACHE_VENV/bin/activate
fi
python scripts/gen_ext_cache_scripts.py
deactivate
$SHELL scripts/restore-ext-cache.sh
riot -P -v generate --python=$PYTHON_VERSION
echo "Running smoke tests"
riot -v run -s --python=$PYTHON_VERSION smoke_test
source $EXT_CACHE_VENV/bin/activate
python scripts/gen_ext_cache_scripts.py
deactivate
$SHELL scripts/save-ext-cache.sh
cache:
# Share pip/sccache between jobs of the same Python version
- key: v1-build_base_venvs-${{PYTHON_VERSION}}-cache
- key: v1-build_base_venvs-${{PYTHON_VERSION}}-cache-{current_month}
paths:
- .cache
# Reuse job artifacts between runs if no native source files have been changed
- key: v1-build_base_venvs-${{PYTHON_VERSION}}-native-{native_hash}
- key: v1-build_base_venvs-${{PYTHON_VERSION}}-ext-{current_month}
paths:
- .riot/venv_*
- ddtrace/**/*.so*
- ddtrace/internal/datadog/profiling/crashtracker/crashtracker_exe*
- ddtrace/internal/datadog/profiling/test/test_*
- cache_used.txt
- .ext_cache
- key: v1-build_base_venvs-${{PYTHON_VERSION}}-download-cache-{current_month}
paths:
- .download_cache
artifacts:
name: venv_$PYTHON_VERSION
paths:
- scripts/restore-ext-cache.sh
- scripts/save-ext-cache.sh
- .riot/venv_*
- ddtrace/_version.py
- ddtrace/**/*.so*
Expand Down
22 changes: 0 additions & 22 deletions scripts/get-native-sources-hash.sh

This file was deleted.

85 changes: 71 additions & 14 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import sysconfig
import tarfile
import time
import typing as t
import warnings

import cmake
Expand Down Expand Up @@ -167,6 +168,28 @@ def is_64_bit_python():
return sys.maxsize > (1 << 32)


class ExtensionHashes(build_ext):
def run(self):
for ext in self.distribution.ext_modules:
full_path = Path(self.get_ext_fullpath(ext.name))

sources = ext.get_sources(self) if isinstance(ext, CMakeExtension) else [Path(_) for _ in ext.sources]

sources_hash = hashlib.sha256()
for source in sorted(sources):
sources_hash.update(source.read_bytes())

print("#EXTHASH:", (ext.name, sources_hash.hexdigest(), str(full_path)))

# Include any dependencies that might have been built alongside
# the extension.
if isinstance(ext, CMakeExtension):
for dependency in ext.dependencies:
print(
"#EXTHASH:", (f"{ext.name}-{dependency.name}", sources_hash.hexdigest(), str(dependency) + "*")
)


class LibraryDownload:
CACHE_DIR = HERE / ".download_cache"
USE_CACHE = os.getenv("DD_SETUP_CACHE_DOWNLOADS", "0").lower() in ("1", "yes", "on", "true")
Expand Down Expand Up @@ -370,26 +393,36 @@ def build_extension(self, ext):
except Exception as e:
print(f"WARNING: An error occurred while building the extension: {e}")

def build_extension_cmake(self, ext):
def build_extension_cmake(self, ext: "CMakeExtension") -> None:
if IS_EDITABLE and self.INCREMENTAL:
# DEV: Rudimentary incremental build support. We copy the logic from
# setuptools' build_ext command, best effort.
full_path = Path(self.get_ext_fullpath(ext.name))
ext_path = Path(ext.source_dir, full_path.name)

# Collect all the source files within the source directory. We exclude
# Python sources and anything that does not have a suffix (most likely
# a binary file), or that has the same name as the extension binary.
sources = (
[
_
for _ in Path(ext.source_dir).rglob("**")
if _.is_file() and _.name != full_path.name and _.suffix and _.suffix not in (".py", ".pyc", ".pyi")
force = self.force

if ext.dependencies:
dependencies = [
str(d.resolve())
for dependency in ext.dependencies
for d in dependency.parent.glob(dependency.name + "*")
if d.is_file()
]
if ext.source_dir
else []
)
if not (self.force or newer_group([str(_.resolve()) for _ in sources], str(ext_path.resolve()), "newer")):
if not dependencies:
# We expected some dependencies but none were found so we
# force the build to happen
force = True

else:
dependencies = []

if not (
force
or newer_group(
[str(_.resolve()) for _ in ext.get_sources(self)] + dependencies, str(ext_path.resolve()), "newer"
)
):
print(f"skipping '{ext.name}' CMake extension (up-to-date)")

# We need to copy the binary where setuptools expects it
Expand Down Expand Up @@ -561,12 +594,13 @@ class CMakeExtension(Extension):
def __init__(
self,
name,
source_dir=".",
source_dir=Path("."),
cmake_args=[],
build_args=[],
install_args=[],
build_type=None,
optional=True, # By default, extensions are optional
dependencies=[],
):
super().__init__(name, sources=[])
self.source_dir = source_dir
Expand All @@ -575,6 +609,27 @@ def __init__(
self.install_args = install_args or []
self.build_type = build_type or COMPILE_MODE
self.optional = optional # If True, cmake errors are ignored
self.dependencies = dependencies

def get_sources(self, cmd: build_ext) -> t.List[Path]:
"""
Returns the list of source files for this extension.
This is used by the CMakeBuild class to determine if the extension needs to be rebuilt.
"""
full_path = Path(cmd.get_ext_fullpath(self.name))

# Collect all the source files within the source directory. We exclude
# Python sources and anything that does not have a suffix (most likely
# a binary file), or that has the same name as the extension binary.
return (
[
_
for _ in Path(self.source_dir).rglob("**")
if _.is_file() and _.name != full_path.name and _.suffix and _.suffix not in {".py", ".pyc", ".pyi"}
]
if self.source_dir
else []
)


def check_rust_toolchain():
Expand Down Expand Up @@ -743,6 +798,7 @@ def get_exts_for(name):
"ddtrace.internal.datadog.profiling.crashtracker._crashtracker",
source_dir=CRASHTRACKER_DIR,
optional=False,
dependencies=[CRASHTRACKER_DIR.parent / "libdd_wrapper"],
)
)

Expand Down Expand Up @@ -780,6 +836,7 @@ def get_exts_for(name):
"build_py": LibraryDownloader,
"build_rust": build_rust,
"clean": CleanLibraries,
"ext_hashes": ExtensionHashes,
},
setup_requires=["setuptools_scm[toml]>=4", "cython", "cmake>=3.24.2,<3.28", "setuptools-rust"],
ext_modules=filter_extensions(ext_modules)
Expand Down
Loading