Skip to content

Commit b7f7171

Browse files
Merge pull request #11944 from protocolbuffers/gha-port-22.x
Backport GHA fixes and optimizations to 22.x
2 parents 5dba656 + d39aeac commit b7f7171

File tree

15 files changed

+397
-196
lines changed

15 files changed

+397
-196
lines changed

.github/BUILD.bazel

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# This information is extracted from the MacOS runner specs located at:
2-
# https://github.com/actions/runner-images/blob/win19/20230129.2/images/macos/macos-12-Readme.md
2+
# https://github.com/actions/runner-images/blob/main/images/macos/macos-12-Readme.md
3+
#
4+
# When updating, also ensure the "xcode_destination" entries in
5+
# `.github/workflows/test_objectivec.yml` are supported for the given versions
6+
# of Xcode.
37
xcode_version(
48
name = "version14_2_14C18",
59
version = "14.2.14C18",

.github/actions/bazel-docker/action.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,5 +71,5 @@ runs:
7171

7272
- name: Save Bazel repository cache
7373
# Only allow repository cache updates during post-submits.
74-
if: ${{ github.event_name == 'push' }}
74+
if: ${{ github.event_name != 'pull_request' && github.event_name != 'pull_request_target' }}
7575
uses: ./.github/actions/internal/repository-cache-save

.github/actions/bazel/action.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,14 +63,14 @@ runs:
6363
run: echo "BAZELISK_PATH=$LOCALAPPDATA\bazelisk" >> $GITHUB_ENV
6464

6565
- name: Cache Bazelisk
66-
if: ${{ github.event_name == 'push' }}
66+
if: ${{ github.event_name != 'pull_request' && github.event_name != 'pull_request_target' }}
6767
uses: actions/cache@627f0f41f6904a5b1efbaed9f96d9eb58e92e920 # v3.2.4
6868
with:
6969
path: ${{ env.BAZELISK_PATH }}
7070
key: bazel-${{ runner.os }}-${{ inputs.version }}
7171

7272
- name: Restore Bazelisk
73-
if: ${{ github.event_name != 'push' }}
73+
if: ${{ github.event_name == 'pull_request' || github.event_name == 'pull_request_target' }}
7474
uses: actions/cache/restore@627f0f41f6904a5b1efbaed9f96d9eb58e92e920 # v3.2.4
7575
with:
7676
path: ${{ env.BAZELISK_PATH }}
@@ -107,5 +107,5 @@ runs:
107107

108108
- name: Save Bazel repository cache
109109
# Only allow repository cache updates during post-submits.
110-
if: ${{ github.event_name == 'push' }}
110+
if: ${{ github.event_name != 'pull_request' && github.event_name != 'pull_request_target'}}
111111
uses: ./.github/actions/internal/repository-cache-save

.github/actions/ccache/action.yml

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,18 @@ inputs:
1212
runs:
1313
using: 'composite'
1414
steps:
15+
- name: Configure ccache environment variables
16+
shell: bash
17+
run: |
18+
echo "CCACHE_BASEDIR=${{ github.workspace }}" >> $GITHUB_ENV
19+
echo "CCACHE_DIR=${{ github.workspace }}/.ccache" >> $GITHUB_ENV
20+
echo "CCACHE_COMPRESS=true" >> $GITHUB_ENV
21+
echo "CCACHE_COMPRESSLEVEL=5" >> $GITHUB_ENV
22+
echo "CCACHE_MAXSIZE=100M" >> $GITHUB_ENV
23+
echo "CCACHE_SLOPPINESS=clang_index_store,include_file_ctime,include_file_mtime,file_macro,time_macros" >> $GITHUB_ENV
24+
echo "CCACHE_DIRECT=true" >> $GITHUB_ENV
25+
echo "CCACHE_CMAKE_FLAGS=-Dprotobuf_ALLOW_CCACHE=ON -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache" >> $GITHUB_ENV
26+
1527
- name: Setup ccache on Windows
1628
if: ${{ runner.os == 'Windows' }}
1729
uses: ./.github/actions/internal/ccache-setup-windows
@@ -23,7 +35,10 @@ runs:
2335
- name: Setup fixed path ccache caching
2436
uses: actions/cache@627f0f41f6904a5b1efbaed9f96d9eb58e92e920 # v3.2.4
2537
with:
26-
path: .ccache
38+
path: |
39+
.ccache/**
40+
!.ccache/lock
41+
!.ccache/tmp
2742
# Always push to a cache key unique to this commit.
2843
key: ${{ format('ccache-{0}-{1}-{2}', inputs.cache-prefix, github.ref_name, github.sha) }}
2944
# Select a cache to restore from with the follow order of preference:
@@ -35,18 +50,6 @@ runs:
3550
${{ format('ccache-{0}-{1}', inputs.cache-prefix, github.ref_name) }}
3651
${{ format('ccache-{0}-{1}', inputs.cache-prefix, github.base_ref) }}
3752
38-
- name: Configure ccache environment variables
39-
shell: bash
40-
run: |
41-
echo "CCACHE_BASEDIR=${{ github.workspace }}" >> $GITHUB_ENV
42-
echo "CCACHE_DIR=${{ github.workspace }}/.ccache" >> $GITHUB_ENV
43-
echo "CCACHE_COMPRESS=true" >> $GITHUB_ENV
44-
echo "CCACHE_COMPRESSLEVEL=6" >> $GITHUB_ENV
45-
echo "CCACHE_MAXSIZE=600M" >> $GITHUB_ENV
46-
echo "CCACHE_SLOPPINESS=clang_index_store,include_file_ctime,include_file_mtime,file_macro,time_macros" >> $GITHUB_ENV
47-
echo "CCACHE_DIRECT=true" >> $GITHUB_ENV
48-
echo "CCACHE_CMAKE_FLAGS=-Dprotobuf_ALLOW_CCACHE=ON -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache $CCACHE_CMAKE_FLAGS" >> $GITHUB_ENV
49-
5053
- name: Enable module support
5154
if: ${{ inputs.support-modules }}
5255
shell: bash
@@ -55,11 +58,6 @@ runs:
5558
echo "CCACHE_DEPEND=true" >> $GITHUB_ENV
5659
5760
- name: Zero out ccache
58-
if: ${{ runner.os == 'macOS' }}
61+
if: ${{ runner.os != 'Linux' }}
5962
shell: bash
6063
run: ccache -z
61-
62-
- name: Zero out ccache
63-
if: ${{ runner.os == 'Windows' }}
64-
shell: pwsh
65-
run: ${{ github.workspace }}\ccache.exe -z

.github/actions/internal/ccache-setup-windows/action.yml

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,16 @@ inputs:
1010
runs:
1111
using: 'composite'
1212
steps:
13+
- name: Setup MSVC
14+
uses: ilammy/msvc-dev-cmd@cec98b9d092141f74527d0afa6feb2af698cfe89 # v1.12.1
15+
with:
16+
arch: x64
17+
vsversion: '2019'
18+
19+
- name: Install ccache
20+
shell: bash
21+
run: choco install ccache --version=4.7.4
22+
1323
- name: Configure ccache environment variables
1424
shell: pwsh
1525
run: |
@@ -18,19 +28,9 @@ runs:
1828
echo "CCACHE_COMPILER=$cllocation" | Out-File -FilePath $Env:GITHUB_ENV -Encoding utf8 -Append
1929
echo "CCACHE_COMPILERTYPE=msvc" | Out-File -FilePath $Env:GITHUB_ENV -Encoding utf8 -Append
2030
21-
- name: Download ccache
31+
- name: Configure Windows-specific ccache environment variables
2232
shell: bash
33+
# Windows caches are about 2x larger than other platforms.
2334
run: |
24-
curl -kLSs "https://github.com/ccache/ccache/releases/download/v${{ inputs.ccache-version }}/ccache-${{ inputs.ccache-version }}-windows-x86_64.zip" -o ccache.zip
25-
unzip ccache.zip
26-
cp ccache-${{ inputs.ccache-version }}-windows-x86_64/ccache.exe ccache.exe
27-
cp ccache.exe cl.exe
28-
rm ccache.zip
29-
30-
- name: Configure msbuild flags
31-
shell: bash
32-
run: echo "CCACHE_MSBUILD_FLAGS=/p:CLToolExe=cl.exe /p:CLToolPath=${{ github.workspace}}" >> $GITHUB_ENV
33-
34-
- name: Configure cmake flags
35-
shell: bash
36-
run: echo "CCACHE_CMAKE_FLAGS=-Dprotobuf_ALLOW_CCACHE=ON" >> $GITHUB_ENV
35+
echo "CCACHE_COMPRESSLEVEL=10" >> $GITHUB_ENV
36+
echo "CCACHE_MAXSIZE=200M" >> $GITHUB_ENV

.github/workflows/README.md

Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
This directory contains all of our automatically triggered workflows.
2+
3+
# Test runner
4+
5+
Our top level `test_runner.yml` is responsible for kicking off all tests, which
6+
are represented as reusable workflows. This is carefully constructed to satisfy
7+
the design laid out in go/protobuf-gha-protected-resources (see below), and
8+
duplicating it across every workflow file would be difficult to maintain. As an
9+
added bonus, we can manually dispatch our full test suite with a single button
10+
and monitor the progress of all of them simultaneously in GitHub's actions UI.
11+
12+
There are five ways our test suite can be triggered:
13+
14+
- **Post-submit tests** (`push`): These are run over newly submitted code
15+
that we can assume has been thoroughly reviewed. There are no additional
16+
security concerns here and these jobs can be given highly privileged access to
17+
our internal resources and caches.
18+
19+
- **Pre-submit tests from a branch** (`push_request`): These are run over
20+
every PR as changes are made. Since they are coming from branches in our
21+
repository, they have secret access by default and can also be given highly
22+
privileged access. However, we expect *many* of these events per change,
23+
and likely many from abandoned/exploratory changes. Given the much higher
24+
frequency, we restrict the ability to *write* to our more expensive caches.
25+
26+
- **Pre-submit tests from a fork** (`push_request_target`): These are run
27+
over every PR from a forked repository as changes are made. These have much
28+
more restricted access, since they could be coming from anywhere. To protect
29+
our secret keys and our resources, tests will not run until a commit has been
30+
labeled `safe to submit`. Further commits will require further approvals to
31+
run our test suite. Once marked as safe, we will provide read-only access to
32+
our caches and Docker images, but will generally disallow any writes to shared
33+
resources.
34+
35+
- **Continuous tests** (`schedule`): These are run on a fixed schedule. We
36+
currently have them set up to run daily, and can help identify non-hermetic
37+
issues in tests that don't get run often (such as due to test caching) or during
38+
slow periods like weekends and holidays. Similar to post-submit tests, these
39+
are run over submitted code and are highly privileged in the resources they
40+
can use.
41+
42+
- **Manual testing** (`workflow_dispatch`): Our test runner can be triggered
43+
manually over any branch. This is treated similarly to pre-submit tests,
44+
which should be highly privileged because they can only be triggered by the
45+
protobuf team.
46+
47+
# Staleness handling
48+
49+
While Bazel handles code generation seamlessly, we do support build systems that
50+
don't. There are a handful of cases where we need to check in generated files
51+
that can become stale over time. In order to provide a good developer
52+
experience, we've implemented a system to make this more manageable.
53+
54+
- Stale files should have a corresponding `staleness_test` Bazel target. This
55+
should be marked `manual` to avoid getting picked up in CI, but will fail if
56+
files become stale. It also provides a `--fix` flag to update the stale files.
57+
58+
- Bazel tests will never depend on the checked-in versions, and will generate
59+
new ones on-the-fly during build.
60+
61+
- Non-Bazel tests will always regenerate necessary files before starting. This
62+
is done using our `bash` and `docker` actions, which should be used for any
63+
non-Bazel tests. This way, no tests will fail due to stale files.
64+
65+
- A post-submit job will immediately regenerate any stale files and commit them
66+
if they've changed.
67+
68+
- A scheduled job will run late at night every day to make sure the post-submit
69+
is working as expected (that is, it will run all the staleness tests).
70+
71+
The `regenerate_stale_files.sh` script is the central script responsible for all
72+
the re-generation of stale files.
73+
74+
# Forked PRs
75+
76+
Because we need secret access to run our tests, we use the `pull_request_target`
77+
event for PRs coming from forked repositories. We do checkout the code from the
78+
PR's head, but the workflow files themselves are always fetched from the *base*
79+
branch (that is, the branch we're merging to). Therefore, any changes to these
80+
files won't be tested, so we explicitly ban PRs that touch these files.
81+
82+
# Caches
83+
84+
We have a number of different caching strategies to help speed up tests. These
85+
live either in GCP buckets or in our GitHub repository cache. The former has
86+
a lot of resources available and we don't have to worry as much about bloat.
87+
On the other hand, the GitHub repository cache is limited to 10GB, and will
88+
start pruning old caches when it exceeds that threshold. Therefore, we need
89+
to be very careful about the size and quantity of our caches in order to
90+
maximize the gains.
91+
92+
## Bazel remote cache
93+
94+
As described in https://bazel.build/remote/caching, remote caching allows us to
95+
offload a lot of our build steps to a remote server that holds a cache of
96+
previous builds. We use our GCP project for this storage, and configure
97+
*every* Bazel call to use it. This provides substantial performance
98+
improvements at minimal cost.
99+
100+
We do not allow forked PRs to upload updates to our Bazel caches, but they
101+
do use them. Every other event is given read/write access to the caches.
102+
Because Bazel behaves poorly under certain environment changes (such as
103+
toolchain, operating system), we try to use finely-grained caches. Each job
104+
should typically have its own cache to avoid cross-pollution.
105+
106+
## Bazel repository cache
107+
108+
When Bazel starts up, it downloads all the external dependencies for a given
109+
build and stores them in the repository cache. This cache is *separate* from
110+
the remote cache, and only exists locally. Because we have so many Bazel
111+
dependencies, this can be a source of frequent flakes due to network issues.
112+
113+
To avoid this, we keep a cached version of the repository cache in GitHub's
114+
action cache. Our full set of repository dependencies ends up being ~300MB,
115+
which is fairly expensive given our 10GB maximum. The most expensive ones seem
116+
to come from Java, which has some very large downstream dependencies.
117+
118+
Given the cost, we take a more conservative approach for this cache. Only push
119+
events will ever write to this cache, but all events can read from them.
120+
Additionally, we only store three caches for any given commit, one per platform.
121+
This means that multiple jobs are trying to update the same cache, leading to a
122+
race. GitHub rejects all but one of these updates, so we designed the system so
123+
that caches are only updated if they've actually changed. That way, over time
124+
(and multiple pushes) the repository caches will incrementally grow to encompass
125+
all of our dependencies. A scheduled job will run monthly to clear these caches
126+
to prevent unbounded growth as our dependencies evolve.
127+
128+
## ccache
129+
130+
In order to speed up non-Bazel builds to be on par with Bazel, we make use of
131+
[ccache](https://ccache.dev/). This intercepts all calls to the compiler, and
132+
caches the result. Subsequent calls with a cache-hit will very quickly
133+
short-circuit and return the already computed result. This has minimal affect
134+
on any *single* job, since we typically only run a single build. However, by
135+
caching the ccache results in GitHub's action cache we can substantially
136+
decrease the build time of subsequent runs.
137+
138+
One useful feature of ccache is that you can set a maximum cache size, and it
139+
will automatically prune older results to keep below that limit. On Linux and
140+
Mac cmake builds, we generally get 30MB caches and set a 100MB cache limit. On
141+
Windows, with debug symbol stripping we get ~70MB and set a 200MB cache limit.
142+
143+
Because CMake build tend to be our slowest, bottlenecking the entire CI process,
144+
we use a fairly expensive strategy with ccache. All events will cache their
145+
ccache directory, keyed by the commit and the branch. This means that each
146+
PR and each branch will write its own set of caches. When looking up which
147+
cache to use initially, each job will first look for a recent cache in its
148+
current branch. If it can't find one, it will accept a cache from the base
149+
branch (for example, PRs will initially use the latest cache from their target
150+
branch).
151+
152+
While the ccache caches quickly over-run our GitHub action cache, they also
153+
quickly become useless. Since GitHub prunes caches based on the time they were
154+
last used, this just means that we'll see quicker turnover.
155+
156+
## Bazelisk
157+
158+
Bazelisk will automatically download a pinned version of Bazel on first use.
159+
This can lead to flakes, and to avoid that we cache the result keyed on the
160+
Bazel version. Only push events will write to this cache, but it's unlikely
161+
to change very often.
162+
163+
## Docker images
164+
165+
Instead of downloading a fresh Docker image for every test run, we can save it
166+
as a tar and cache it using `docker image save` and later restore using
167+
`docker image load`. This can decrease download times and also reduce flakes.
168+
Note, Docker's load can actually be significantly slower than a pull in certain
169+
situations. Therefore, we should reserve this strategy for only Docker images
170+
that are causing noticeable flakes.
171+
172+
## Pip dependencies
173+
174+
The actions/setup-python action we use for Python supports automated caching
175+
of pip dependencies. We enable this to avoid having to download these
176+
dependencies on every run, which can lead to flakes.
177+
178+
# Custom actions
179+
180+
We've defined a number of custom actions to abstract out shared pieces of our
181+
workflows.
182+
183+
- **Bazel** use this for running all Bazel tests. It can take either a single
184+
Bazel command or a more general bash command. In the latter case, it provides
185+
environment variables for running Bazel with all our standardized settings.
186+
187+
- **Bazel-Docker** nearly identical to the **Bazel** action, this additionally
188+
runs everything in a specified Docker image.
189+
190+
- **Bash** use this for running non-Bazel tests. It takes a bash command and
191+
runs it verbatim. It also handles the regeneration of stale files (which does
192+
use Bazel), which non-Bazel tests might depend on.
193+
194+
- **Docker** nearly identical to the **Bash** action, this additionally runs
195+
everything in a specified Docker image.
196+
197+
- **ccache** this sets up a ccache environment, and initializes some
198+
environment variables for standardized usage of ccache.
199+
200+
- **Cross-compile protoc** this abstracts out the compilation of protoc using
201+
our cross-compilation infrastructure. It will set a `PROTOC` environment
202+
variable that gets automatically picked up by a lot of our infrastructure.
203+
This is most useful in conjunction with the **Bash** action with non-Bazel
204+
tests.
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
name: Forked PR workflow check
2+
3+
# This workflow prevents modifications to our workflow files in PRs from forked
4+
# repositories. Since tests in these PRs always use the workflows in the
5+
# *target* branch, modifications to these files can't be properly tested.
6+
7+
on:
8+
# safe presubmit
9+
pull_request:
10+
branches:
11+
- main
12+
- '[0-9]+.x'
13+
# The 21.x branch still uses Kokoro
14+
- '!21.x'
15+
# For testing purposes so we can stage this on the `gha` branch.
16+
- gha
17+
paths:
18+
- '.github/workflows/**'
19+
20+
jobs:
21+
check:
22+
name: Check PR source
23+
runs-on: ubuntu-latest
24+
steps:
25+
- run: >
26+
${{ github.event.pull_request.head.repo.full_name == 'protocolbuffers/protobuf' }} ||
27+
(echo "This pull request is from an unsafe fork (${{ github.event.pull_request.head.repo.full_name }}) and isn't allowed to modify workflow files!" && exit 1)

.github/workflows/staleness_check.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ jobs:
1414
branch: [main, 22.x]
1515
os: [{ name: Linux, value: ubuntu-latest}]
1616

17-
name: ${{ matrix.os.name }} ${{ matrix.branch}}
17+
name: Test staleness ${{ matrix.os.name }} ${{ matrix.branch}}
1818
runs-on: ${{ matrix.os.value }}
1919
steps:
2020
- name: Checkout ${{ matrix.branch }}

0 commit comments

Comments
 (0)