Skip to content

Commit 3eeca8c

Browse files
committed
feat(A/B): add customizable artifacts
Update `ab_test.py` with ability to accept custom artifacts for A and B runs. Additionally update `pipeline_perf.py` as well. Now REVISION_A_ARTIFACTS and REVISION_B_ARTIFACTS environment variables specify custom artifacts which will be used in A/B test Signed-off-by: Egor Lazarchuk <yegorlz@amazon.co.uk>
1 parent 3886e89 commit 3eeca8c

File tree

3 files changed

+73
-12
lines changed

3 files changed

+73
-12
lines changed

.buildkite/common.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,12 @@ def to_json(self):
376376
"""Serialize the pipeline to JSON"""
377377
return json.dumps(self.to_dict(), indent=4, sort_keys=True, ensure_ascii=False)
378378

379+
def devtool_download_artifacts(self, artifacts):
380+
"""Generate a `devtool download_ci_artifacts` command"""
381+
parts = ["./tools/devtool -y download_ci_artifacts"]
382+
parts += artifacts
383+
return " ".join(parts)
384+
379385
def devtool_test(self, devtool_opts=None, pytest_opts=None):
380386
"""Generate a `devtool test` command"""
381387
cmds = []

.buildkite/pipeline_perf.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,11 +94,16 @@
9494

9595
REVISION_A = os.environ.get("REVISION_A")
9696
REVISION_B = os.environ.get("REVISION_B")
97+
REVISION_A_ARTIFACTS = os.environ.get("REVISION_A_ARTIFACTS")
98+
REVISION_B_ARTIFACTS = os.environ.get("REVISION_B_ARTIFACTS")
9799

98100
# Either both are specified or neither. Only doing either is a bug. If you want to
99101
# run performance tests _on_ a specific commit, specify neither and put your commit
100102
# into buildkite's "commit" field.
101103
assert (REVISION_A and REVISION_B) or (not REVISION_A and not REVISION_B)
104+
assert (REVISION_A_ARTIFACTS and REVISION_B_ARTIFACTS) or (
105+
not REVISION_A_ARTIFACTS and not REVISION_B_ARTIFACTS
106+
)
102107

103108
BKPipeline.parser.add_argument(
104109
"--test",
@@ -132,17 +137,26 @@
132137
ab_opts = test.pop("ab_opts", "")
133138
devtool_opts += " --performance"
134139
test_script_opts = ""
140+
artifacts = []
135141
if REVISION_A:
136142
devtool_opts += " --ab"
137143
test_script_opts = f'{ab_opts} run --binaries-a build/{REVISION_A}/ --binaries-b build/{REVISION_B} --pytest-opts "{test_selector}"'
144+
if REVISION_A_ARTIFACTS:
145+
artifacts.append(REVISION_A_ARTIFACTS)
146+
artifacts.append(REVISION_B_ARTIFACTS)
147+
test_script_opts += f" --artifacts-a {REVISION_A_ARTIFACTS} --artifacts-b {REVISION_B_ARTIFACTS}"
138148
else:
139149
# Passing `-m ''` below instructs pytest to collect tests regardless of
140150
# their markers (e.g. it will collect both tests marked as nonci, and
141151
# tests without any markers).
142152
test_script_opts += f" -m '' {test_selector}"
143153

154+
command = []
155+
if artifacts:
156+
command.append(pipeline.devtool_download_artifacts(artifacts))
157+
command.extend(pipeline.devtool_test(devtool_opts, test_script_opts))
144158
pipeline.build_group(
145-
command=pipeline.devtool_test(devtool_opts, test_script_opts),
159+
command=command,
146160
# and the rest can be command arguments
147161
**test,
148162
)

tools/ab_test.py

Lines changed: 52 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
import subprocess
2525
from collections import defaultdict
2626
from pathlib import Path
27-
from typing import Callable, List, TypeVar
27+
from typing import Callable, List, Optional, TypeVar
2828

2929
import scipy
3030

@@ -194,16 +194,31 @@ def uninteresting_dimensions(data):
194194
return uninteresting
195195

196196

197-
def collect_data(tag: str, binary_dir: Path, pytest_opts: str):
197+
def collect_data(
198+
tag: str, binary_dir: Path, artifacts: Optional[Path], pytest_opts: str
199+
):
198200
"""
199201
Executes the specified test using the provided firecracker binaries and
200202
stores results into the `test_results/tag` directory
201203
"""
202204
binary_dir = binary_dir.resolve()
203205

204-
print(f"Collecting samples with {binary_dir}")
206+
print(
207+
f"Collecting samples | binaries path: {binary_dir}"
208+
+ f" | artifacts path: {artifacts}"
209+
if artifacts
210+
else ""
211+
)
205212
test_path = f"test_results/{tag}"
206213
test_report_path = f"{test_path}/test-report.json"
214+
215+
# It is not possible to just download them here this script is usually run inside docker
216+
# and artifacts downloading does not work inside it.
217+
if artifacts:
218+
subprocess.run(
219+
f"./tools/devtool set_current_artifacts {artifacts}", check=True, shell=True
220+
)
221+
207222
subprocess.run(
208223
f"./tools/test.sh --binary-dir={binary_dir} {pytest_opts} -m '' --json-report-file=../{test_report_path}",
209224
env=os.environ
@@ -371,25 +386,29 @@ def analyze_data(
371386

372387

373388
def binary_ab_test(
374-
test_runner: Callable[[Path, bool], T],
389+
test_runner: Callable[[Path, Optional[Path], bool], T],
375390
comparator: Callable[[T, T], U],
376391
*,
377392
a_directory: Path,
378393
b_directory: Path,
394+
a_artifacts: Optional[Path],
395+
b_artifacts: Optional[Path],
379396
):
380397
"""
381398
Similar to `git_ab_test`, but instead of locally checking out different revisions, it operates on
382399
directories containing firecracker/jailer binaries
383400
"""
384-
result_a = test_runner(a_directory, True)
385-
result_b = test_runner(b_directory, False)
401+
result_a = test_runner(a_directory, a_artifacts, True)
402+
result_b = test_runner(b_directory, b_artifacts, False)
386403

387404
return result_a, result_b, comparator(result_a, result_b)
388405

389406

390407
def ab_performance_test(
391-
a_revision: Path,
392-
b_revision: Path,
408+
a_directory: Path,
409+
b_directory: Path,
410+
a_artifacts: Optional[Path],
411+
b_artifacts: Optional[Path],
393412
pytest_opts,
394413
p_thresh,
395414
strength_abs_thresh,
@@ -398,7 +417,9 @@ def ab_performance_test(
398417
"""Does an A/B-test of the specified test with the given firecracker/jailer binaries"""
399418

400419
return binary_ab_test(
401-
lambda bin_dir, is_a: collect_data(is_a and "A" or "B", bin_dir, pytest_opts),
420+
lambda bin_dir, art_dir, is_a: collect_data(
421+
is_a and "A" or "B", bin_dir, art_dir, pytest_opts
422+
),
402423
lambda ah, be: analyze_data(
403424
ah,
404425
be,
@@ -407,8 +428,10 @@ def ab_performance_test(
407428
noise_threshold,
408429
n_resamples=int(100 / p_thresh),
409430
),
410-
a_directory=a_revision,
411-
b_directory=b_revision,
431+
a_directory=a_directory,
432+
b_directory=b_directory,
433+
a_artifacts=a_artifacts,
434+
b_artifacts=b_artifacts,
412435
)
413436

414437

@@ -433,6 +456,22 @@ def ab_performance_test(
433456
type=Path,
434457
required=True,
435458
)
459+
run_parser.add_argument(
460+
"--artifacts-a",
461+
help="Name of the artifacts directory in the build/artifacts to use for revision A test. If the directory does not exist, the name will be treated as S3 path and artifacts will be downloaded from there.",
462+
# Type is string since it can be an s3 paht which if passed to `Path` constructor
463+
# will be incorrectly modified
464+
type=str,
465+
required=False,
466+
)
467+
run_parser.add_argument(
468+
"--artifacts-b",
469+
help="Name of the artifacts directory in the build/artifacts to use for revision B test. If the directory does not exist, the name will be treated as S3 path and artifacts will be downloaded from there.",
470+
# Type is string since it can be an s3 paht which if passed to `Path` constructor
471+
# will be incorrectly modified
472+
type=str,
473+
required=False,
474+
)
436475
run_parser.add_argument(
437476
"--pytest-opts",
438477
help="Parameters to pass through to pytest, for example for test selection",
@@ -476,6 +515,8 @@ def ab_performance_test(
476515
ab_performance_test(
477516
args.binaries_a,
478517
args.binaries_b,
518+
args.artifacts_a,
519+
args.artifacts_b,
479520
args.pytest_opts,
480521
args.significance,
481522
args.absolute_strength,

0 commit comments

Comments
 (0)