Skip to content

Commit

Permalink
Dont query remote cache but always use bytestream protocol (#17352)
Browse files Browse the repository at this point in the history
When using `--experimental_remote_build_event_upload=minimal`, instead of querying remote cache to decide whether convert local path of a file to `bytestream://`, it now always use `bytestream://` for BEP referenced files.

Closes #16999.

Closes #17025.

PiperOrigin-RevId: 502351401
Change-Id: Ic858367ffaf3c2a2c20db88ada85fbbe1d93aae8

Co-authored-by: Chi Wang <chiwang@google.com>
  • Loading branch information
ShreeM01 and coeuvre authored Jan 30, 2023
1 parent 0f9912b commit a951b94
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 58 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import build.bazel.remote.execution.v2.RequestMetadata;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.common.eventbus.Subscribe;
import com.google.common.util.concurrent.ListenableFuture;
Expand All @@ -47,7 +48,6 @@
import io.reactivex.rxjava3.schedulers.Schedulers;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -107,10 +107,16 @@ class ByteStreamBuildEventArtifactUploader extends AbstractReferenceCounted
}

public void omitFile(Path file) {
Preconditions.checkState(
remoteBuildEventUploadMode != RemoteBuildEventUploadMode.MINIMAL,
"Cannot omit file in MINIMAL mode");
omittedFiles.add(file.asFragment());
}

public void omitTree(Path treeRoot) {
Preconditions.checkState(
remoteBuildEventUploadMode != RemoteBuildEventUploadMode.MINIMAL,
"Cannot omit tree in MINIMAL mode");
omittedTreeRoots.add(treeRoot.asFragment());
}

Expand Down Expand Up @@ -209,10 +215,6 @@ private static void processQueryResult(
}
}

private boolean shouldQuery(PathMetadata path) {
return path.getDigest() != null && !path.isRemote() && !path.isDirectory();
}

private boolean shouldUpload(PathMetadata path) {
boolean result =
path.getDigest() != null && !path.isRemote() && !path.isDirectory() && !path.isOmitted();
Expand All @@ -239,7 +241,8 @@ private Single<List<PathMetadata>> queryRemoteCache(
List<PathMetadata> filesToQuery = new ArrayList<>();
Set<Digest> digestsToQuery = new HashSet<>();
for (PathMetadata path : paths) {
if (shouldQuery(path)) {
// Query remote cache for files even if omitted from uploading
if (shouldUpload(path) || path.isOmitted()) {
filesToQuery.add(path);
digestsToQuery.add(path.getDigest());
} else {
Expand Down Expand Up @@ -329,16 +332,19 @@ private Single<PathConverter> upload(Set<Path> files) {
reporterUploadError(e);
return new PathMetadata(
file,
/*digest=*/ null,
/*directory=*/ false,
/*remote=*/ false,
/*omitted=*/ false);
/* digest= */ null,
/* directory= */ false,
/* remote= */ false,
/* omitted= */ false);
}
})
.collect(Collectors.toList())
.flatMap(paths -> queryRemoteCache(remoteCache, context, paths))
.flatMap(paths -> uploadLocalFiles(remoteCache, context, paths))
.map(paths -> new PathConverterImpl(remoteServerInstanceName, paths)),
.map(
paths ->
new PathConverterImpl(
remoteServerInstanceName, paths, remoteBuildEventUploadMode)),
RemoteCache::release);
}

Expand Down Expand Up @@ -377,17 +383,23 @@ private static class PathConverterImpl implements PathConverter {
private final Set<Path> skippedPaths;
private final Set<Path> localPaths;

PathConverterImpl(String remoteServerInstanceName, List<PathMetadata> uploads) {
PathConverterImpl(
String remoteServerInstanceName,
List<PathMetadata> uploads,
RemoteBuildEventUploadMode remoteBuildEventUploadMode) {
Preconditions.checkNotNull(uploads);
this.remoteServerInstanceName = remoteServerInstanceName;
pathToDigest = new HashMap<>(uploads.size());
pathToDigest = Maps.newHashMapWithExpectedSize(uploads.size());
ImmutableSet.Builder<Path> skippedPaths = ImmutableSet.builder();
ImmutableSet.Builder<Path> localPaths = ImmutableSet.builder();
for (PathMetadata pair : uploads) {
Path path = pair.getPath();
Digest digest = pair.getDigest();
if (digest != null) {
if (pair.isRemote()) {
// Always use bytestream:// in MINIMAL mode
if (remoteBuildEventUploadMode == RemoteBuildEventUploadMode.MINIMAL) {
pathToDigest.put(path, digest);
} else if (pair.isRemote()) {
pathToDigest.put(path, digest);
} else {
localPaths.add(path);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -302,9 +302,8 @@ public String getTypeDescription() {
"If set to 'all', all local outputs referenced by BEP are uploaded to remote cache.\n"
+ "If set to 'minimal', local outputs referenced by BEP are not uploaded to the"
+ " remote cache, except for files that are important to the consumers of BEP (e.g."
+ " test logs and timing profile).\n"
+ "file:// scheme is used for the paths of local files and bytestream:// scheme is"
+ " used for the paths of (already) uploaded files.\n"
+ " test logs and timing profile). bytestream:// scheme is always used for the uri of"
+ " files even if they are missing from remote cache.\n"
+ "Default to 'all'.")
public RemoteBuildEventUploadMode remoteBuildEventUploadMode;

Expand Down
101 changes: 60 additions & 41 deletions src/test/shell/bazel/remote/remote_build_event_uploader_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,32 @@ else
declare -r EXE_EXT=""
fi

BEP_JSON=bep.json

function expect_bes_file_uploaded() {
local file=$1
if [[ $(cat $BEP_JSON) =~ ${file}\",\"uri\":\"bytestream://localhost:${worker_port}/blobs/([^/]*) ]]; then
if ! remote_cas_file_exist ${BASH_REMATCH[1]}; then
cat $BEP_JSON >> $TEST_log && append_remote_cas_files $TEST_log && fail "$file is not uploaded"
fi
else
cat $BEP_JSON > $TEST_log
fail "$file is not converted to bytestream://"
fi
}

function expect_bes_file_not_uploaded() {
local file=$1
if [[ $(cat $BEP_JSON) =~ ${file}\",\"uri\":\"bytestream://localhost:${worker_port}/blobs/([^/]*) ]]; then
if remote_cas_file_exist ${BASH_REMATCH[1]}; then
cat $BEP_JSON >> $TEST_log && append_remote_cas_files $TEST_log && fail "$file is uploaded"
fi
else
cat $BEP_JSON > $TEST_log
fail "$file is not converted to bytestream://"
fi
}

function test_upload_minimal_convert_paths_for_existed_blobs() {
mkdir -p a
cat > a/BUILD <<EOF
Expand All @@ -84,12 +110,11 @@ EOF
bazel build \
--remote_executor=grpc://localhost:${worker_port} \
--experimental_remote_build_event_upload=minimal \
--build_event_json_file=bep.json \
--build_event_json_file=$BEP_JSON \
//a:foo >& $TEST_log || fail "Failed to build"

cat bep.json > $TEST_log
expect_log "a:foo.*bytestream://" || fail "paths for existed blobs should be converted"
expect_log "command.profile.gz.*bytestream://" || fail "should upload profile data"
expect_bes_file_uploaded foo.txt
expect_bes_file_uploaded command.profile.gz
}

function test_upload_minimal_doesnt_upload_missing_blobs() {
Expand All @@ -106,12 +131,11 @@ EOF
bazel build \
--remote_executor=grpc://localhost:${worker_port} \
--experimental_remote_build_event_upload=minimal \
--build_event_json_file=bep.json \
--build_event_json_file=$BEP_JSON \
//a:foo >& $TEST_log || fail "Failed to build"

cat bep.json > $TEST_log
expect_not_log "a:foo.*bytestream://" || fail "local files are uploaded"
expect_log "command.profile.gz.*bytestream://" || fail "should upload profile data"
expect_bes_file_not_uploaded foo.txt
expect_bes_file_uploaded command.profile.gz
}

function test_upload_minimal_respect_no_upload_results() {
Expand All @@ -128,12 +152,11 @@ EOF
--remote_cache=grpc://localhost:${worker_port} \
--remote_upload_local_results=false \
--experimental_remote_build_event_upload=minimal \
--build_event_json_file=bep.json \
--build_event_json_file=$BEP_JSON \
//a:foo >& $TEST_log || fail "Failed to build"

cat bep.json > $TEST_log
expect_not_log "a:foo.*bytestream://" || fail "local files are uploaded"
expect_log "command.profile.gz.*bytestream://" || fail "should upload profile data"
expect_bes_file_not_uploaded foo.txt
expect_bes_file_uploaded command.profile.gz
}

function test_upload_minimal_respect_no_upload_results_combined_cache() {
Expand All @@ -154,12 +177,11 @@ EOF
--incompatible_remote_results_ignore_disk \
--remote_upload_local_results=false \
--experimental_remote_build_event_upload=minimal \
--build_event_json_file=bep.json \
--build_event_json_file=$BEP_JSON \
//a:foo >& $TEST_log || fail "Failed to build"

cat bep.json > $TEST_log
expect_not_log "a:foo.*bytestream://" || fail "local files are uploaded"
expect_log "command.profile.gz.*bytestream://" || fail "should upload profile data"
expect_bes_file_not_uploaded foo.txt
expect_bes_file_uploaded command.profile.gz
remote_cas_files="$(count_remote_cas_files)"
[[ "$remote_cas_files" == 1 ]] || fail "Expected 1 remote cas entries, not $remote_cas_files"
disk_cas_files="$(count_disk_cas_files $cache_dir)"
Expand Down Expand Up @@ -187,12 +209,11 @@ EOF
bazel build \
--remote_executor=grpc://localhost:${worker_port} \
--experimental_remote_build_event_upload=minimal \
--build_event_json_file=bep.json \
--build_event_json_file=$BEP_JSON \
//a:foo-alias >& $TEST_log || fail "Failed to build"

cat bep.json > $TEST_log
expect_not_log "a:foo.*bytestream://"
expect_log "command.profile.gz.*bytestream://"
expect_bes_file_not_uploaded foo.txt
expect_bes_file_uploaded command.profile.gz
}

function test_upload_minimal_trees_doesnt_upload_missing_blobs() {
Expand All @@ -204,8 +225,9 @@ def _gen_output_dir_impl(ctx):
outputs = [output_dir],
inputs = [],
command = """
mkdir -p $1/sub; \
index=0; while ((index<10)); do echo $index >$1/$index.txt; index=$(($index+1)); done
echo 0 > $1/0.txt
echo 1 > $1/1.txt
mkdir -p $1/sub
echo "Shuffle, duffle, muzzle, muff" > $1/sub/bar
""",
arguments = [output_dir.path],
Expand Down Expand Up @@ -233,13 +255,13 @@ EOF
bazel build \
--remote_executor=grpc://localhost:${worker_port} \
--experimental_remote_build_event_upload=minimal \
--build_event_json_file=bep.json \
--build_event_json_file=$BEP_JSON \
//a:foo >& $TEST_log || fail "Failed to build"

cat bep.json > $TEST_log
expect_not_log "a:foo.*bytestream://" || fail "local tree files are uploaded"
expect_not_log "a/dir/.*bytestream://" || fail "local tree files are uploaded"
expect_log "command.profile.gz.*bytestream://" || fail "should upload profile data"
expect_bes_file_not_uploaded dir/0.txt
expect_bes_file_not_uploaded dir/1.txt
expect_bes_file_not_uploaded dir/sub/bar
expect_bes_file_uploaded command.profile.gz
}

function test_upload_minimal_upload_testlogs() {
Expand All @@ -259,14 +281,13 @@ EOF
bazel test \
--remote_executor=grpc://localhost:${worker_port} \
--experimental_remote_build_event_upload=minimal \
--build_event_json_file=bep.json \
--build_event_json_file=$BEP_JSON \
//a:test >& $TEST_log || fail "Failed to build"

cat bep.json > $TEST_log
expect_not_log "test.sh.*bytestream://" || fail "test script is uploaded"
expect_log "test.log.*bytestream://" || fail "should upload test.log"
expect_log "test.xml.*bytestream://" || fail "should upload test.xml"
expect_log "command.profile.gz.*bytestream://" || fail "should upload profile data"
expect_bes_file_not_uploaded test.sh
expect_bes_file_uploaded test.log
expect_bes_file_uploaded test.xml
expect_bes_file_uploaded command.profile.gz
}

function test_upload_minimal_upload_buildlogs() {
Expand All @@ -283,13 +304,12 @@ EOF
bazel build \
--remote_executor=grpc://localhost:${worker_port} \
--experimental_remote_build_event_upload=minimal \
--build_event_json_file=bep.json \
--build_event_json_file=$BEP_JSON \
//a:foo >& $TEST_log || true

cat bep.json > $TEST_log
expect_log "stdout.*bytestream://" || fail "should upload stdout"
expect_log "stderr.*bytestream://" || fail "should upload stderr"
expect_log "command.profile.gz.*bytestream://" || fail "should upload profile data"
expect_bes_file_uploaded stdout
expect_bes_file_uploaded stderr
expect_bes_file_uploaded command.profile.gz
}

function test_upload_minimal_upload_profile() {
Expand All @@ -306,11 +326,10 @@ EOF
--remote_executor=grpc://localhost:${worker_port} \
--experimental_remote_build_event_upload=minimal \
--profile=mycommand.profile.gz \
--build_event_json_file=bep.json \
--build_event_json_file=$BEP_JSON \
//a:foo >& $TEST_log || fail "Failed to build"

cat bep.json > $TEST_log
expect_log "mycommand.profile.gz.*bytestream://" || fail "should upload profile data"
expect_bes_file_uploaded "mycommand.profile.gz"
}

run_suite "Remote build event uploader tests"
9 changes: 9 additions & 0 deletions src/test/shell/bazel/remote/remote_utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -96,3 +96,12 @@ function count_remote_cas_files() {
echo 0
fi
}

function remote_cas_file_exist() {
local file=$1
[ -f "$cas_path/cas/${file:0:2}/$file" ]
}

function append_remote_cas_files() {
find "$cas_path/cas" -type f >> $1
}

0 comments on commit a951b94

Please sign in to comment.