Skip to content

Commit 9cbccd6

Browse files
authored
CI: add benchmark workflow and script (#250)
Fixes: #85 ### What changes are included in this PR? Added Benchmark CI using Debian as runtime environment ### Are these changes tested? Yes, I have tested the changes locally. --------- Signed-off-by: Saurabh Kumar Singh <singh1203.ss@gmail.com>
1 parent 406abb6 commit 9cbccd6

File tree

3 files changed

+254
-0
lines changed

3 files changed

+254
-0
lines changed

.github/workflows/benchmark.yml

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
name: Benchmarks
19+
on:
20+
push:
21+
branches: [main]
22+
pull_request:
23+
paths:
24+
- ".github/workflows/benchmark.yml"
25+
- "ci/scripts/bench.sh"
26+
- "ci/scripts/bench_adapt.py"
27+
workflow_dispatch:
28+
permissions:
29+
contents: read
30+
jobs:
31+
benchmark:
32+
runs-on: ubuntu-latest
33+
container: debian:12
34+
strategy:
35+
matrix:
36+
go: ['1.22.7']
37+
arch: ['amd64']
38+
steps:
39+
- name: Install dependencies
40+
run: |
41+
apt-get update
42+
apt-get install -y git ca-certificates
43+
- name: Checkout repository
44+
uses: actions/checkout@v4
45+
with:
46+
submodules: recursive
47+
- name: Set up Python
48+
uses: actions/setup-python@v4
49+
with:
50+
python-version: '3.9'
51+
- name: Install Go ${{ matrix.go }} for Benchmarks
52+
uses: actions/setup-go@v5
53+
with:
54+
go-version: ${{ matrix.go }}
55+
cache: true
56+
cache-dependency-path: go.sum
57+
check-latest: false
58+
- name: Run Benchmarks
59+
if: github.event_name != 'push'
60+
run: bash ci/scripts/bench.sh $(pwd) --json
61+
- name: Upload results
62+
if: github.event_name == 'push' && github.repository == 'apache/arrow-go' && github.ref_name == 'main'
63+
env:
64+
CONBENCH_URL: https://conbench.ursa.dev
65+
CONBENCH_EMAIL: ${{ secrets.CONBENCH_EMAIL }}
66+
CONBENCH_PASSWORD: ${{ secrets.CONBENCH_PASS }}
67+
CONBENCH_REF: ${{ github.ref_name }}
68+
CONBENCH_MACHINE_INFO_NAME: ${{ matrix.arch }}-debian-12
69+
run: |
70+
python3 -m pip install benchadapt@git+https://github.com/conbench/conbench.git@main#subdirectory=benchadapt/python
71+
python3 ci/scripts/bench_adapt.py

ci/scripts/bench.sh

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
#!/usr/bin/env bash
2+
#
3+
# Licensed to the Apache Software Foundation (ASF) under one
4+
# or more contributor license agreements. See the NOTICE file
5+
# distributed with this work for additional information
6+
# regarding copyright ownership. The ASF licenses this file
7+
# to you under the Apache License, Version 2.0 (the
8+
# "License"); you may not use this file except in compliance
9+
# with the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing,
14+
# software distributed under the License is distributed on an
15+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
# KIND, either express or implied. See the License for the
17+
# specific language governing permissions and limitations
18+
# under the License.
19+
20+
# this will output the benchmarks to STDOUT but if `-json` is passed
21+
# as the second argument, it will create a file "bench_stats.json"
22+
# in the directory this is called from containing a json representation
23+
24+
set -ex
25+
26+
# Validate input arguments
27+
if [ -z "$1" ]; then
28+
echo "Error: Missing source directory argument"
29+
exit 1
30+
fi
31+
32+
source_dir="$1"
33+
34+
PARQUET_TEST_DATA="${source_dir}/parquet-testing/data"
35+
export PARQUET_TEST_DATA
36+
37+
pushd "${source_dir}"
38+
39+
# lots of benchmarks, they can take a while
40+
# the timeout is for *ALL* benchmarks together,
41+
# not per benchmark
42+
go test -bench=. -benchmem -timeout 40m -run=^$ ./... | tee bench_stat.dat
43+
44+
popd
45+
46+
if [[ "$2" = "-json" ]]; then
47+
go install go.bobheadxi.dev/gobenchdata@latest
48+
PATH=$(go env GOPATH)/bin:$PATH
49+
export PATH
50+
cat "${source_dir}"/bench_*.dat | gobenchdata --json bench_stats.json
51+
fi
52+
53+
rm "${source_dir}"/bench_*.dat

ci/scripts/bench_adapt.py

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
#!/usr/bin/env python3
2+
#
3+
# Licensed to the Apache Software Foundation (ASF) under one
4+
# or more contributor license agreements. See the NOTICE file
5+
# distributed with this work for additional information
6+
# regarding copyright ownership. The ASF licenses this file
7+
# to you under the Apache License, Version 2.0 (the
8+
# "License"); you may not use this file except in compliance
9+
# with the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing,
14+
# software distributed under the License is distributed on an
15+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
# KIND, either express or implied. See the License for the
17+
# specific language governing permissions and limitations
18+
# under the License.
19+
20+
import json
21+
import os
22+
import uuid
23+
import logging
24+
from pathlib import Path
25+
from typing import List
26+
27+
from benchadapt import BenchmarkResult
28+
from benchadapt.adapters import BenchmarkAdapter
29+
from benchadapt.log import log
30+
31+
log.setLevel(logging.DEBUG)
32+
33+
ARROW_ROOT = Path(__file__).parent.parent.parent.resolve()
34+
SCRIPTS_PATH = ARROW_ROOT / "ci" / "scripts"
35+
36+
# `github_commit_info` is meant to communicate GitHub-flavored commit
37+
# information to Conbench. See
38+
# https://github.com/conbench/conbench/blob/cf7931f/benchadapt/python/benchadapt/result.py#L66
39+
# for a specification.
40+
github_commit_info = {"repository": "https://github.com/apache/arrow-go"}
41+
42+
if os.environ.get("CONBENCH_REF") == "main":
43+
# Assume GitHub Actions CI. The environment variable lookups below are
44+
# expected to fail when not running in GitHub Actions.
45+
github_commit_info = {
46+
"repository": f'{os.environ["GITHUB_SERVER_URL"]}/{os.environ["GITHUB_REPOSITORY"]}',
47+
"commit": os.environ["GITHUB_SHA"],
48+
"pr_number": None, # implying default branch
49+
}
50+
run_reason = "commit"
51+
else:
52+
# Assume that the environment is not GitHub Actions CI. Error out if that
53+
# assumption seems to be wrong.
54+
assert os.getenv("GITHUB_ACTIONS") is None
55+
56+
# This is probably a local dev environment, for testing. In this case, it
57+
# does usually not make sense to provide commit information (not a
58+
# controlled CI environment). Explicitly leave out "commit" and "pr_number" to
59+
# reflect that (to not send commit information).
60+
61+
# Reflect 'local dev' scenario in run_reason. Allow user to (optionally)
62+
# inject a custom piece of information into the run reason here, from
63+
# environment.
64+
run_reason = "localdev"
65+
custom_reason_suffix = os.getenv("CONBENCH_CUSTOM_RUN_REASON")
66+
if custom_reason_suffix is not None:
67+
run_reason += f" {custom_reason_suffix.strip()}"
68+
69+
70+
class GoAdapter(BenchmarkAdapter):
71+
result_file = "bench_stats.json"
72+
command = ["bash", SCRIPTS_PATH / "bench.sh", ARROW_ROOT, "-json"]
73+
74+
def __init__(self, *args, **kwargs) -> None:
75+
super().__init__(command=self.command, *args, **kwargs)
76+
77+
def _transform_results(self) -> List[BenchmarkResult]:
78+
with open(self.result_file, "r") as f:
79+
raw_results = json.load(f)
80+
81+
run_id = uuid.uuid4().hex
82+
parsed_results = []
83+
for suite in raw_results[0]["Suites"]:
84+
batch_id = uuid.uuid4().hex
85+
pkg = suite["Pkg"]
86+
87+
for benchmark in suite["Benchmarks"]:
88+
data = benchmark["Mem"]["MBPerSec"] * 1e6
89+
time = 1 / benchmark["NsPerOp"] * 1e9
90+
91+
name = benchmark["Name"].removeprefix("Benchmark")
92+
ncpu = name[name.rfind("-") + 1 :]
93+
pieces = name[: -(len(ncpu) + 1)].split("/")
94+
95+
parsed = BenchmarkResult(
96+
run_id=run_id,
97+
batch_id=batch_id,
98+
stats={
99+
"data": [data],
100+
"unit": "B/s",
101+
"times": [time],
102+
"time_unit": "i/s",
103+
"iterations": benchmark["Runs"],
104+
},
105+
context={
106+
"benchmark_language": "Go",
107+
"goos": suite["Goos"],
108+
"goarch": suite["Goarch"],
109+
},
110+
tags={
111+
"pkg": pkg,
112+
"num_cpu": ncpu,
113+
"name": pieces[0],
114+
"params": "/".join(pieces[1:]),
115+
},
116+
run_reason=run_reason,
117+
github=github_commit_info,
118+
)
119+
parsed.run_name = (
120+
f"{parsed.run_reason}: {github_commit_info.get('commit')}"
121+
)
122+
parsed_results.append(parsed)
123+
124+
return parsed_results
125+
126+
127+
if __name__ == "__main__":
128+
go_adapter = GoAdapter(result_fields_override={"info": {}})
129+
go_adapter()
130+

0 commit comments

Comments
 (0)