diff --git a/src/main/java/com/google/devtools/build/lib/exec/AbstractSpawnStrategy.java b/src/main/java/com/google/devtools/build/lib/exec/AbstractSpawnStrategy.java index 01ea75faba166f..d8ec53e7d22a5c 100644 --- a/src/main/java/com/google/devtools/build/lib/exec/AbstractSpawnStrategy.java +++ b/src/main/java/com/google/devtools/build/lib/exec/AbstractSpawnStrategy.java @@ -254,7 +254,7 @@ public ListenableFuture prefetchInputs() return immediateVoidFuture(); } - @Override +@Override public MetadataProvider getMetadataProvider() { return actionExecutionContext.getMetadataProvider(); } diff --git a/src/main/java/com/google/devtools/build/lib/exec/ExecutionOptions.java b/src/main/java/com/google/devtools/build/lib/exec/ExecutionOptions.java index f964d876a7e0c6..a32b9efc7e59d8 100644 --- a/src/main/java/com/google/devtools/build/lib/exec/ExecutionOptions.java +++ b/src/main/java/com/google/devtools/build/lib/exec/ExecutionOptions.java @@ -494,6 +494,17 @@ public boolean usingLocalTestJobs() { + "test log. Otherwise, Bazel generates a test.xml as part of the test action.") public boolean splitXmlGeneration; + @Option( + name = "experimental_remote_cache_eviction_retries", + defaultValue = "0", + documentationCategory = OptionDocumentationCategory.REMOTE, + effectTags = {OptionEffectTag.EXECUTION}, + help = + "The maximum number of attempts to retry if the build encountered remote cache eviction" + + " error. A non-zero value will implicitly set" + + " --incompatible_remote_use_new_exit_code_for_lost_inputs to true.") + public int remoteRetryOnCacheEviction; + /** An enum for specifying different formats of test output. */ public enum TestOutputFormat { SUMMARY, // Provide summary output only. diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java index 979df0fab81636..bd0f63c03ee44a 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java @@ -907,6 +907,7 @@ public void executorInit(CommandEnvironment env, BuildRequest request, ExecutorB actionContextProvider.setTempPathGenerator(tempPathGenerator); + ExecutionOptions executionOptions = env.getOptions().getOptions(ExecutionOptions.class); RemoteOptions remoteOptions = Preconditions.checkNotNull( env.getOptions().getOptions(RemoteOptions.class), "RemoteOptions"); @@ -929,7 +930,8 @@ public void executorInit(CommandEnvironment env, BuildRequest request, ExecutorB tempPathGenerator, patternsToDownload, outputPermissions, - remoteOptions.useNewExitCodeForLostInputs); + remoteOptions.useNewExitCodeForLostInputs + || (executionOptions != null && executionOptions.remoteRetryOnCacheEviction > 0)); env.getEventBus().register(actionInputFetcher); builder.setActionInputPrefetcher(actionInputFetcher); actionContextProvider.setActionInputFetcher(actionInputFetcher); diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java index 0f7bb077577efa..1eba36e9f1d723 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java @@ -557,7 +557,8 @@ private SpawnResult handleError( catastrophe = true; } else if (remoteCacheFailed) { status = Status.REMOTE_CACHE_FAILED; - if (remoteOptions.useNewExitCodeForLostInputs) { + if (remoteOptions.useNewExitCodeForLostInputs + || executionOptions.remoteRetryOnCacheEviction > 0) { detailedCode = FailureDetails.Spawn.Code.REMOTE_CACHE_EVICTED; } else { detailedCode = FailureDetails.Spawn.Code.REMOTE_CACHE_FAILED; diff --git a/src/main/java/com/google/devtools/build/lib/runtime/BlazeCommandDispatcher.java b/src/main/java/com/google/devtools/build/lib/runtime/BlazeCommandDispatcher.java index 0fbd5a2d3f0327..b91d69e7384ac3 100644 --- a/src/main/java/com/google/devtools/build/lib/runtime/BlazeCommandDispatcher.java +++ b/src/main/java/com/google/devtools/build/lib/runtime/BlazeCommandDispatcher.java @@ -43,6 +43,7 @@ import com.google.devtools.build.lib.events.PrintingEventHandler; import com.google.devtools.build.lib.events.Reporter; import com.google.devtools.build.lib.events.StoredEventHandler; +import com.google.devtools.build.lib.exec.ExecutionOptions; import com.google.devtools.build.lib.profiler.MemoryProfiler; import com.google.devtools.build.lib.profiler.Profiler; import com.google.devtools.build.lib.profiler.SilentCloseable; @@ -54,6 +55,7 @@ import com.google.devtools.build.lib.util.AnsiStrippingOutputStream; import com.google.devtools.build.lib.util.DebugLoggerConfigurator; import com.google.devtools.build.lib.util.DetailedExitCode; +import com.google.devtools.build.lib.util.ExitCode; import com.google.devtools.build.lib.util.InterruptedFailureDetails; import com.google.devtools.build.lib.util.LoggingUtil; import com.google.devtools.build.lib.util.Pair; @@ -230,18 +232,29 @@ public BlazeCommandResult exec( return createDetailedCommandResult( retrievedShutdownReason, FailureDetails.Command.Code.PREVIOUSLY_SHUTDOWN); } - BlazeCommandResult result = - execExclusively( - originalCommandLine, - invocationPolicy, - args, - outErr, - firstContactTimeMillis, - commandName, - command, - waitTimeInMs, - startupOptionsTaggedWithBazelRc, - commandExtensions); + BlazeCommandResult result; + int attempt = 0; + while (true) { + try { + result = + execExclusively( + originalCommandLine, + invocationPolicy, + args, + outErr, + firstContactTimeMillis, + commandName, + command, + waitTimeInMs, + startupOptionsTaggedWithBazelRc, + commandExtensions, + attempt); + break; + } catch (RemoteCacheEvictedException e) { + outErr.printErrLn("Found remote cache eviction error, retrying the build..."); + attempt += 1; + } + } if (result.shutdown()) { setShutdownReason( "Server shut down " @@ -289,7 +302,9 @@ private BlazeCommandResult execExclusively( BlazeCommand command, long waitTimeInMs, Optional>> startupOptionsTaggedWithBazelRc, - List commandExtensions) { + List commandExtensions, + int attempt) + throws RemoteCacheEvictedException { // Record the start time for the profiler. Do not put anything before this! long execStartTimeNanos = runtime.getClock().nanoTime(); @@ -631,7 +646,18 @@ private BlazeCommandResult execExclusively( } needToCallAfterCommand = false; - return runtime.afterCommand(env, result); + var newResult = runtime.afterCommand(env, result); + if (newResult.getExitCode().equals(ExitCode.REMOTE_CACHE_EVICTED)) { + var executionOptions = + Preconditions.checkNotNull(options.getOptions(ExecutionOptions.class)); + if (attempt < executionOptions.remoteRetryOnCacheEviction) { + throw new RemoteCacheEvictedException(); + } + } + + return newResult; + } catch (RemoteCacheEvictedException e) { + throw e; } catch (Throwable e) { logger.atSevere().withCause(e).log("Shutting down due to exception"); Crash crash = Crash.from(e); @@ -665,6 +691,8 @@ private BlazeCommandResult execExclusively( } } + private static class RemoteCacheEvictedException extends IOException {} + private static void replayEarlyExitEvents( OutErr outErr, BlazeOptionHandler optionHandler, diff --git a/src/test/java/com/google/devtools/build/lib/remote/BuildWithoutTheBytesIntegrationTest.java b/src/test/java/com/google/devtools/build/lib/remote/BuildWithoutTheBytesIntegrationTest.java index 6828c9abc75b09..c6ea8418c1a46d 100644 --- a/src/test/java/com/google/devtools/build/lib/remote/BuildWithoutTheBytesIntegrationTest.java +++ b/src/test/java/com/google/devtools/build/lib/remote/BuildWithoutTheBytesIntegrationTest.java @@ -528,9 +528,7 @@ public void remoteCacheEvictBlobs_whenPrefetchingInput_exitWithCode39() throws E // Assert: Exit code is 39 assertThat(error) .hasMessageThat() - .contains( - "Build without the Bytes does not work if your remote cache evicts blobs" - + " during builds"); + .contains("Failed to fetch blobs because they do not exist remotely"); assertThat(error).hasMessageThat().contains(String.format("%s/%s", hashCode, bytes.length)); assertThat(error.getDetailedExitCode().getExitCode().getNumericExitCode()).isEqualTo(39); } diff --git a/src/test/shell/bazel/remote/build_without_the_bytes_test.sh b/src/test/shell/bazel/remote/build_without_the_bytes_test.sh index 33ab3bd475e9d4..7768f25b5e98e0 100755 --- a/src/test/shell/bazel/remote/build_without_the_bytes_test.sh +++ b/src/test/shell/bazel/remote/build_without_the_bytes_test.sh @@ -1627,4 +1627,63 @@ end_of_record" expect_log "$expected_result" } +function test_remote_cache_eviction_retries() { + mkdir -p a + + cat > a/BUILD <<'EOF' +genrule( + name = 'foo', + srcs = ['foo.in'], + outs = ['foo.out'], + cmd = 'cat $(SRCS) > $@', +) + +genrule( + name = 'bar', + srcs = ['foo.out', 'bar.in'], + outs = ['bar.out'], + cmd = 'cat $(SRCS) > $@', + tags = ['no-remote-exec'], +) +EOF + + echo foo > a/foo.in + echo bar > a/bar.in + + # Populate remote cache + bazel build \ + --remote_executor=grpc://localhost:${worker_port} \ + --remote_download_minimal \ + //a:bar >& $TEST_log || fail "Failed to build" + + bazel clean + + # Clean build, foo.out isn't downloaded + bazel build \ + --remote_executor=grpc://localhost:${worker_port} \ + --remote_download_minimal \ + //a:bar >& $TEST_log || fail "Failed to build" + + if [[ -f bazel-bin/a/foo.out ]]; then + fail "Expected intermediate output bazel-bin/a/foo.out to not be downloaded" + fi + + # Evict blobs from remote cache + stop_worker + start_worker + + echo "updated bar" > a/bar.in + + # Incremental build triggers remote cache eviction error but Bazel + # automatically retries the build and reruns the generating actions for + # missing blobs + bazel build \ + --remote_executor=grpc://localhost:${worker_port} \ + --remote_download_minimal \ + --experimental_remote_cache_eviction_retries=5 \ + //a:bar >& $TEST_log || fail "Failed to build" + + expect_log "Found remote cache eviction error, retrying the build..." +} + run_suite "Build without the Bytes tests"