Skip to content

Commit

Permalink
proto_format: Optimize and cleanup (envoyproxy#26496)
Browse files Browse the repository at this point in the history
Signed-off-by: Ryan Northey <ryan@synca.io>
  • Loading branch information
phlax authored Apr 4, 2023
1 parent c2702b6 commit bd346d5
Show file tree
Hide file tree
Showing 13 changed files with 427 additions and 216 deletions.
2 changes: 1 addition & 1 deletion .azure-pipelines/stages.yml
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ stages:
steps:
- task: Cache@2
inputs:
key: "proto_format | ./WORKSPACE | **/*.bzl"
key: "proto_format | ./WORKSPACE | **/*.bzl, !mobile/**, !envoy-docs/**"
path: $(Build.StagingDirectory)/repository_cache
continueOnError: true

Expand Down
4 changes: 4 additions & 0 deletions .bazelrc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ build --action_env=CXX --host_action_env=CXX
build --action_env=LLVM_CONFIG --host_action_env=LLVM_CONFIG
build --action_env=PATH --host_action_env=PATH

# Allow stamped caches to bust when local filesystem changes.
# Requires setting `BAZEL_VOLATILE_DIRTY` in the env.
build --action_env=BAZEL_VOLATILE_DIRTY --host_action_env=BAZEL_VOLATILE_DIRTY

build --enable_platform_specific_config
build --test_summary=terse

Expand Down
2 changes: 1 addition & 1 deletion api/versioning/BUILD
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# DO NOT EDIT. This file is generated by tools/proto_format/active_protos_gen.py.
# DO NOT EDIT. This file is generated by tools/proto_format/proto_sync.py.

load("@rules_proto//proto:defs.bzl", "proto_library")

Expand Down
21 changes: 14 additions & 7 deletions bazel/get_workspace_status
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,20 @@ git_rev=$(git rev-parse HEAD) || exit 1
echo "BUILD_SCM_REVISION ${git_rev}"
echo "STABLE_BUILD_SCM_REVISION ${git_rev}"

# Check whether there are any uncommitted changes
tree_status="Clean"
git diff-index --quiet HEAD -- || {
tree_status="Modified"
}
# If BAZEL_VOLATILE_DIRTY is set then stamped builds will rebuild uncached when
# either a tracked file changes or an untracked file is added or removed.
if [[ -n "$BAZEL_VOLATILE_DIRTY" ]]; then
porcelain_status="$(git status --porcelain | sha256sum)"
tracked_status="$(git ls-files -s | sha256sum)"
tree_status="$(echo "${porcelain_status}:${tracked_status}" | sha256sum | head -c 40)"
else
# Check whether there are any uncommitted changes
tree_status="Clean"
git diff-index --quiet HEAD -- || {
tree_status="Modified"
}
fi

echo "BUILD_SCM_STATUS ${tree_status}"
echo "STABLE_BUILD_SCM_STATUS ${tree_status}"

Expand All @@ -48,5 +57,3 @@ git_remote=$(git remote get-url origin)
if [[ -n "$git_remote" ]]; then
echo "BUILD_SCM_REMOTE ${git_remote}"
fi


11 changes: 7 additions & 4 deletions ci/do_ci.sh
Original file line number Diff line number Diff line change
Expand Up @@ -499,17 +499,20 @@ elif [[ "$CI_TARGET" == "format" ]]; then
elif [[ "$CI_TARGET" == "fix_proto_format" ]]; then
# proto_format.sh needs to build protobuf.
setup_clang_toolchain
BAZEL_BUILD_OPTIONS="${BAZEL_BUILD_OPTIONS[*]}" "${ENVOY_SRCDIR}"/tools/proto_format/proto_format.sh fix
BAZEL_BUILD_OPTIONS="${BAZEL_BUILD_OPTIONS[*]}" "${ENVOY_SRCDIR}/tools/proto_format/proto_format.sh" fix
exit 0
elif [[ "$CI_TARGET" == "check_proto_format" ]]; then
# proto_format.sh needs to build protobuf.
setup_clang_toolchain
echo "Run protoxform test"

echo "Check proto format ..."
BAZEL_BUILD_OPTIONS="${BAZEL_BUILD_OPTIONS[*]}" "${ENVOY_SRCDIR}/tools/proto_format/proto_format.sh" check

echo "Run protoxform/protoprint test ..."
bazel run "${BAZEL_BUILD_OPTIONS[@]}" \
--remote_download_minimal \
--//tools/api_proto_plugin:default_type_db_target=//tools/testdata/protoxform:fix_protos \
--//tools/api_proto_plugin:extra_args=api_version:3.7 \
//tools/protoprint:protoprint_test
BAZEL_BUILD_OPTIONS="${BAZEL_BUILD_OPTIONS[*]}" "${ENVOY_SRCDIR}"/tools/proto_format/proto_format.sh check
exit 0
elif [[ "$CI_TARGET" == "docs" ]]; then
setup_clang_toolchain
Expand Down
114 changes: 112 additions & 2 deletions tools/proto_format/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,25 @@ licenses(["notice"]) # Apache 2

envoy_package()

# Files to include when building or comparing the normalized API
API_FILES = [
"BUILD",
"**/BUILD",
"**/*.proto",
]

# Files to ignore when building or comparing the normalized API
API_FILES_IGNORED = [
"envoy/annotations/BUILD",
"envoy/annotations/deprecation.proto",
"envoy/annotations/resource.proto",
"bazel/**",
"examples/**",
"test/**",
"tools/**",
"envoy/service/auth/**/*",
]

# Bazel query doesnt support json output, and jq insists on inputs with json suffix,
# although can handle "raw" data also. For these reasons these queries have json
# suffix but are not valid json.
Expand Down Expand Up @@ -85,21 +104,112 @@ genrule(
$(location :format_api) \
--outfile=$@ \
--xformed=$(location :xformed) \
--build_file=$(location //tools/type_whisperer:api_build_file) \
--protoprinted=$(location //tools/protoprint:protoprinted) \
""",
exec_tools = [
":format_api",
":xformed",
"//tools/type_whisperer:api_build_file",
"//tools/protoprint:protoprinted",
],
)

genrule(
name = "formatted_api_hashes",
outs = ["formatted-api-hashes.txt"],
cmd = """
TEMPDIR=$$(mktemp -d) \
&& git -C $$TEMPDIR init -q \
&& mkdir $$TEMPDIR/api \
&& tar xf $(location :formatted_api) -C $$TEMPDIR/api \
&& git -C $$TEMPDIR add api/ \
&& git -C $$TEMPDIR ls-files -s api/ > $@ \
&& rm -rf $$TEMPDIR
""",
tools = [":formatted_api"],
)

py_binary(
name = "fetch_normalized_changes",
srcs = ["fetch_normalized_changes.py"],
args = [
"--formatted=$(location :formatted_api)",
"--format_hashes=$(location :formatted_api_hashes)",
"--local_hashes=$(location :local_api_hashes)",
],
data = [
":formatted_api",
":formatted_api_hashes",
":local_api_hashes",
],
)

genrule(
name = "normalized_api_changes",
outs = ["normalized_api_changes.tar"],
cmd = """
$(location :fetch_normalized_changes) \
$@ \
--formatted=$(location :formatted_api) \
--format_hashes=$(location :formatted_api_hashes) \
--local_hashes=$(location :local_api_hashes) \
--local_changes=$(location :local_api_changes)
""",
tools = [
":fetch_normalized_changes",
":formatted_api",
":formatted_api_hashes",
":local_api_changes",
":local_api_hashes",
],
)

py_binary(
name = "proto_sync",
srcs = ["proto_sync.py"],
args = [
"--changed=$(location :normalized_api_changes)",
"--api_root=%s/api" % PATH,
"--formatted=$(location :formatted_api)",
],
data = [":formatted_api"],
data = [":normalized_api_changes"],
)

#
# These 2 targets must run locally to generate a summary of the state
# of the local filesystem API, preferably with `BAZEL_VOLATILE_DIRTY` set in
# the env to detect changes without waiting until the commit hash changes.
#
# Both of these are necessary, and combined will detect any change to the tracked
# API files as well as the addition and removal of new ones.
#

# Stamped, local build to get hashes of local api tree
genrule(
name = "local_api_hashes",
outs = ["api-hashes.txt"],
cmd = """
git -C %s ls-files -os ":!:api/%s" "api/%s" > $@
""" % (
PATH,
'" ":!:api/'.join(API_FILES_IGNORED),
'" "api/'.join(API_FILES),
),
stamp = True,
tags = ["no-remote"],
)

# Stamped, local build to get porcelain status of local api tree
genrule(
name = "local_api_changes",
outs = ["api-changes.txt"],
cmd = """
git -C %s status --porcelain ":!:api/%s" "api/%s" > $@
""" % (
PATH,
'" ":!:api/'.join(API_FILES_IGNORED),
'" "api/'.join(API_FILES),
),
stamp = True,
tags = ["no-remote"],
)
68 changes: 0 additions & 68 deletions tools/proto_format/active_protos_gen.py

This file was deleted.

79 changes: 79 additions & 0 deletions tools/proto_format/fetch_normalized_changes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#!/usr/bin/env python3

#
# Compare hashes for the local filesystem and the normalized API, any files.
#
# Return a tarball containing normalized versions of any changed or mismatched
# files.
#

import argparse
import io
import pathlib
import sys
import tarfile


def sync(outfile, formatted, local_hash_path, local_change_path, format_hash_path):
local_hashes = {}
for line in local_hash_path.read_text().splitlines():
parts = line.split()
if len(parts) == 1:
# Untracked file
continue
mode, fhash, __, path = parts
local_hashes[path] = dict(mode=mode, fhash=fhash)

format_hashes = {}
for line in format_hash_path.read_text().splitlines():
mode, fhash, __, path = line.split()
format_hashes[path] = dict(mode=mode, fhash=fhash)

local_changes = []
for line in local_change_path.read_text().splitlines():
change, path = line.split()
local_changes.append(path)

to_check = []
to_remove = []
for path, data in format_hashes.items():
should_check_update = (
path in local_changes or path not in local_hashes or data != local_hashes[path])
if should_check_update:
to_check.append(path)

for path in list(local_hashes) + local_changes:
if path not in format_hashes:
to_remove.append(path)

if not to_check and not to_remove:
with open(outfile, "w") as f:
f.write("")
sys.exit(0)

# Tarballs are only opened if there are updates to check
with tarfile.open(outfile, "w") as desttar:
with tarfile.open(formatted) as srctar:
for update in to_check:
member = srctar.getmember(f".{update[3:]}")
desttar.addfile(member, srctar.extractfile(member.name))
if to_remove:
data = "\n".join(to_remove).encode("utf-8")
tarinfo = tarfile.TarInfo("REMOVE")
tarinfo.size = len(data)
desttar.addfile(tarinfo, io.BytesIO(data))


if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('outfile')
parser.add_argument('--formatted')
parser.add_argument('--local_hashes')
parser.add_argument('--local_changes')
parser.add_argument('--format_hashes')
args = parser.parse_args()
sync(
str(pathlib.Path(args.outfile).absolute()), str(pathlib.Path(args.formatted).absolute()),
pathlib.Path(args.local_hashes).absolute(),
pathlib.Path(args.local_changes).absolute(),
pathlib.Path(args.format_hashes).absolute())
Loading

0 comments on commit bd346d5

Please sign in to comment.