From 914eff948be54cbcde847ae800a2e373e61a5c56 Mon Sep 17 00:00:00 2001 From: Jerry Marino Date: Wed, 9 Jun 2021 10:10:35 -0700 Subject: [PATCH] Implement global index store cache (#567) swift and clang write index data based on the status of what resides in `index-store-path`. When using a transient per-swift-library index, it writes O( M imports * N libs) indexer data and slows down compilation significantly: to the tune of 6GB+ index data in my testing. Bazel likes to use per `swift_library` data in order to remote cache them, which needs extra consideration to work with the design of swift and clang and preserve the performance This PR does 2 things to make index while building use the original model of the index-store so we don't have to patch clang and swift for this yet. When the feature, `swift.use_global_index_store`, is set: 1. it writes to a global index cache, `bazel-out/global_index_store` 2. it copies indexstore data (records and units) to where Bazel expected them for caching: e.g. the original location with `swift.index_while_building` I added a detailed description of this feature in the RFC to add it there https://github.com/lyft/index-import/pull/53. In practice, transitive imports will be indexed by deps and if they aren't cached then the fallback is Xcode wil index transitive files if we set those as deps on the unit files --- swift/internal/compiling.bzl | 15 +++ swift/internal/feature_names.bzl | 4 + swift/repositories.bzl | 21 ++++ tools/worker/BUILD | 30 +++++- tools/worker/swift_runner.cc | 168 +++++++++++++++++++++++++++---- tools/worker/swift_runner.h | 20 +++- 6 files changed, 235 insertions(+), 23 deletions(-) diff --git a/swift/internal/compiling.bzl b/swift/internal/compiling.bzl index 8cea13102..fba2971d4 100644 --- a/swift/internal/compiling.bzl +++ b/swift/internal/compiling.bzl @@ -58,6 +58,7 @@ load( "SWIFT_FEATURE_SUPPORTS_SYSTEM_MODULE_FLAG", "SWIFT_FEATURE_SYSTEM_MODULE", "SWIFT_FEATURE_USE_C_MODULES", + "SWIFT_FEATURE_USE_GLOBAL_INDEX_STORE", "SWIFT_FEATURE_USE_GLOBAL_MODULE_CACHE", "SWIFT_FEATURE_VFSOVERLAY", "SWIFT_FEATURE__NUM_THREADS_0_IN_SWIFTCOPTS", @@ -805,6 +806,14 @@ def compile_action_configs( ], features = [SWIFT_FEATURE_ENABLE_SKIP_FUNCTION_BODIES], ), + swift_toolchain_config.action_config( + actions = [swift_action_names.COMPILE], + configurators = [_global_index_store_configurator], + features = [ + SWIFT_FEATURE_INDEX_WHILE_BUILDING, + SWIFT_FEATURE_USE_GLOBAL_INDEX_STORE, + ], + ), # Configure index-while-building. swift_toolchain_config.action_config( @@ -1419,6 +1428,12 @@ def _index_while_building_configurator(prerequisites, args): if not _index_store_path_overridden(prerequisites.user_compile_flags): args.add("-index-store-path", prerequisites.indexstore_directory.path) +def _global_index_store_configurator(prerequisites, args): + """Adds flags for index-store generation to the command line.""" + out_dir = prerequisites.indexstore_directory.dirname.split("/")[0] + path = out_dir + "/_global_index_store" + args.add("-Xwrapped-swift=-global-index-store-import-path=" + path) + def _conditional_compilation_flag_configurator(prerequisites, args): """Adds (non-Clang) conditional compilation flags to the command line.""" all_defines = depset( diff --git a/swift/internal/feature_names.bzl b/swift/internal/feature_names.bzl index 639b516eb..318235d0c 100644 --- a/swift/internal/feature_names.bzl +++ b/swift/internal/feature_names.bzl @@ -99,11 +99,15 @@ SWIFT_FEATURE_ENABLE_TESTING = "swift.enable_testing" SWIFT_FEATURE_FULL_DEBUG_INFO = "swift.full_debug_info" # If enabled, the compilation action for a target will produce an index store. +# https://docs.google.com/document/d/1cH2sTpgSnJZCkZtJl1aY-rzy4uGPcrI-6RrUpdATO2Q/ SWIFT_FEATURE_INDEX_WHILE_BUILDING = "swift.index_while_building" # If enabled the compilation action will not produce indexes for system modules. SWIFT_FEATURE_DISABLE_SYSTEM_INDEX = "swift.disable_system_index" +# Index while building - using a global index store cache +SWIFT_FEATURE_USE_GLOBAL_INDEX_STORE = "swift.use_global_index_store" + # If enabled, compilation actions and module map generation will assume that the # header paths in module maps are relative to the current working directory # (i.e., the workspace root); if disabled, header paths in module maps are diff --git a/swift/repositories.bzl b/swift/repositories.bzl index 0f59e87c2..c27e0c466 100644 --- a/swift/repositories.bzl +++ b/swift/repositories.bzl @@ -108,6 +108,27 @@ def swift_rules_dependencies(): type = "zip", ) + # It relies on `index-import` to import indexes into Bazel's remote + # cache and allow using a global index internally in workers. + # Note: this is only loaded if swift.index_while_building_v2 is enabled + _maybe( + http_archive, + name = "build_bazel_rules_swift_index_import", + build_file_content = """\ +load("@bazel_skylib//rules:native_binary.bzl", "native_binary") + +native_binary( + name = "index_import", + src = "index-import", + out = "index-import", + visibility = ["//visibility:public"], +) +""", + canonical_id = "index-import-5.3.2.6", + urls = ["https://github.com/MobileNativeFoundation/index-import/releases/download/5.3.2.6/index-import.zip"], + sha256 = "61a58363f56c5fd84d4ebebe0d9b5dd90c74ae170405a7b9018e8cf698e679de", + ) + _maybe( swift_autoconfiguration, name = "build_bazel_rules_swift_local_config", diff --git a/tools/worker/BUILD b/tools/worker/BUILD index 6bda082c8..3cd73c71c 100644 --- a/tools/worker/BUILD +++ b/tools/worker/BUILD @@ -18,12 +18,18 @@ config_setting( }, ) +# Internal hinge for index while building V2 feature +config_setting( + name = "use_global_index_store", + values = { + "features": "swift.use_global_index_store", + }, +) + cc_library( name = "compile_with_worker", srcs = [ "compile_with_worker.cc", - "output_file_map.cc", - "output_file_map.h", "work_processor.cc", "work_processor.h", ], @@ -34,7 +40,6 @@ cc_library( "//tools/common:file_system", "//tools/common:path_utils", "//tools/common:temp_file", - "@com_github_nlohmann_json//:json", "@com_google_protobuf//:protobuf", ], ) @@ -50,13 +55,30 @@ cc_library( cc_library( name = "swift_runner", - srcs = ["swift_runner.cc"], + srcs = [ + "output_file_map.cc", + "output_file_map.h", + "swift_runner.cc", + ], hdrs = ["swift_runner.h"], + copts = select({ + ":use_global_index_store": [ + "-DINDEX_IMPORT_PATH=\\\"$(rootpath @build_bazel_rules_swift_index_import//:index_import)\\\"", + ], + "//conditions:default": [], + }), + data = select({ + ":use_global_index_store": [ + "@build_bazel_rules_swift_index_import//:index_import", + ], + "//conditions:default": [], + }), deps = [ "//tools/common:bazel_substitutions", "//tools/common:file_system", "//tools/common:process", "//tools/common:temp_file", + "@com_github_nlohmann_json//:json", ], ) diff --git a/tools/worker/swift_runner.cc b/tools/worker/swift_runner.cc index 0abf53f03..41068581c 100644 --- a/tools/worker/swift_runner.cc +++ b/tools/worker/swift_runner.cc @@ -20,6 +20,7 @@ #include "tools/common/file_system.h" #include "tools/common/process.h" #include "tools/common/temp_file.h" +#include "tools/worker/output_file_map.h" namespace { @@ -129,13 +130,87 @@ int SwiftRunner::Run(std::ostream *stderr_stream, bool stdout_to_stderr) { exit_code = RunSubProcess(rewriter_args, stderr_stream, stdout_to_stderr); } + auto enable_global_index_store = global_index_store_import_path_ != ""; + if (enable_global_index_store) { + OutputFileMap output_file_map; + output_file_map.ReadFromPath(output_file_map_path_); + + auto outputs = output_file_map.incremental_outputs(); + std::map::iterator it; + + std::vector ii_args; +// The index-import runfile path is passed as a define +#if defined(INDEX_IMPORT_PATH) + ii_args.push_back(INDEX_IMPORT_PATH); +#else + // Logical error + std::cerr << "Incorrectly compiled work_processor.cc"; + exit_code = EXIT_FAILURE; + return exit_code; +#endif + + for (it = outputs.begin(); it != outputs.end(); it++) { + // Need the actual output paths of the compiler - not bazel + auto output_path = it->first; + auto file_type = output_path.substr(output_path.find_last_of(".") + 1); + if (file_type == "o") { + ii_args.push_back("-import-output-file"); + ii_args.push_back(output_path); + } + } + + auto exec_root = GetCurrentDirectory(); + // Copy back from the global index store to bazel's index store + ii_args.push_back(exec_root + "/" + global_index_store_import_path_); + ii_args.push_back(exec_root + "/" + index_store_path_); + exit_code = + RunSubProcess(ii_args, stderr_stream, /*stdout_to_stderr=*/true); + } return exit_code; } +// Marker for end of iteration +class StreamIteratorEnd {}; + +// Basic iterator over an ifstream +class StreamIterator { + public: + StreamIterator(std::ifstream &file) : file_{file} { next(); } + + const std::string &operator*() const { return str_; } + + StreamIterator &operator++() { + next(); + return *this; + } + + bool operator!=(StreamIteratorEnd) const { return !!file_; } + + private: + void next() { std::getline(file_, str_); } + + std::ifstream &file_; + std::string str_; +}; + +class ArgsFile { + public: + ArgsFile(std::ifstream &file) : file_(file) {} + + StreamIterator begin() { return StreamIterator{file_}; } + + StreamIteratorEnd end() { return StreamIteratorEnd{}; } + + private: + std::ifstream &file_; +}; + bool SwiftRunner::ProcessPossibleResponseFile( const std::string &arg, std::function consumer) { auto path = arg.substr(1); std::ifstream original_file(path); + ArgsFile args_file(original_file); + // If we couldn't open it, maybe it's not a file; maybe it's just some other // argument that starts with "@" such as "@loader_path/..." if (!original_file.good()) { @@ -143,15 +218,17 @@ bool SwiftRunner::ProcessPossibleResponseFile( return false; } + // Read the file to a vector to prevent double I/O + auto args = ParseArguments(args_file); + // If we're forcing response files, process and send the arguments from this // file directly to the consumer; they'll all get written to the same response // file at the end of processing all the arguments. if (force_response_file_) { - std::string arg_from_file; - while (std::getline(original_file, arg_from_file)) { + for (auto it = args.begin(); it != args.end(); ++it) { // Arguments in response files might be quoted/escaped, so we need to // unescape them ourselves. - ProcessArgument(Unescape(arg_from_file), consumer); + ProcessArgument(it, Unescape(*it), consumer); } return true; } @@ -161,12 +238,10 @@ bool SwiftRunner::ProcessPossibleResponseFile( bool changed = false; std::string arg_from_file; std::vector new_args; - - while (std::getline(original_file, arg_from_file)) { - changed |= - ProcessArgument(arg_from_file, [&](const std::string &processed_arg) { - new_args.push_back(processed_arg); - }); + for (auto it = args.begin(); it != args.end(); ++it) { + changed |= ProcessArgument(it, *it, [&](const std::string &processed_arg) { + new_args.push_back(processed_arg); + }); } if (changed) { @@ -182,10 +257,11 @@ bool SwiftRunner::ProcessPossibleResponseFile( return changed; } +template bool SwiftRunner::ProcessArgument( - const std::string &arg, std::function consumer) { + Iterator &itr, const std::string &arg, + std::function consumer) { bool changed = false; - if (arg[0] == '@') { changed = ProcessPossibleResponseFile(arg, consumer); } else { @@ -198,8 +274,8 @@ bool SwiftRunner::ProcessArgument( consumer(GetCurrentDirectory() + "=."); changed = true; } else if (new_arg == "-coverage-prefix-pwd-is-dot") { - // Get the actual current working directory (the workspace root), which we - // didn't know at analysis time. + // Get the actual current working directory (the workspace root), which + // we didn't know at analysis time. consumer("-coverage-prefix-map"); consumer(GetCurrentDirectory() + "=."); changed = true; @@ -213,7 +289,8 @@ bool SwiftRunner::ProcessArgument( temp_directories_.push_back(std::move(module_cache_dir)); changed = true; } else if (StripPrefix("-generated-header-rewriter=", new_arg)) { - generated_header_rewriter_path_ = new_arg; + changed = true; + } else if (StripPrefix("-global-index-store-import-path=", new_arg)) { changed = true; } else { // TODO(allevato): Report that an unknown wrapper arg was found and give @@ -221,6 +298,28 @@ bool SwiftRunner::ProcessArgument( changed = true; } } else { + // Process default arguments + if (arg == "-index-store-path") { + consumer("-index-store-path"); + ++itr; + + // If there was a global index store set, pass that to swiftc. + // Otherwise, pass the users. We later copy index data onto the users. + if (global_index_store_import_path_ != "") { + new_arg = global_index_store_import_path_; + } else { + new_arg = index_store_path_; + } + changed = true; + } else if (arg == "-output-file-map") { + // Save the output file map to the value proceeding + // `-output-file-map` + consumer("-output-file-map"); + ++itr; + new_arg = output_file_map_path_; + changed = true; + } + // Apply any other text substitutions needed in the argument (i.e., for // Apple toolchains). // @@ -236,6 +335,36 @@ bool SwiftRunner::ProcessArgument( return changed; } +template +std::vector SwiftRunner::ParseArguments(Iterator itr) { + std::vector out_args; + for (auto it = itr.begin(); it != itr.end(); ++it) { + auto arg = *it; + out_args.push_back(arg); + + if (StripPrefix("-Xwrapped-swift=", arg)) { + if (StripPrefix("-global-index-store-import-path=", arg)) { + global_index_store_import_path_ = arg; + } else if (StripPrefix("-generated-header-rewriter=", arg)) { + generated_header_rewriter_path_ = arg; + } + } else { + if (arg == "-output-file-map") { + ++it; + arg = *it; + output_file_map_path_ = arg; + out_args.push_back(arg); + } else if (arg == "-index-store-path") { + ++it; + arg = *it; + index_store_path_ = arg; + out_args.push_back(arg); + } + } + } + return out_args; +} + std::vector SwiftRunner::ProcessArguments( const std::vector &args) { std::vector new_args; @@ -248,16 +377,19 @@ std::vector SwiftRunner::ProcessArguments( #endif // The tool is assumed to be the first argument. Push it directly. - auto it = args.begin(); + auto parsed_args = ParseArguments(args); + + auto it = parsed_args.begin(); new_args.push_back(*it++); // If we're forcing response files, push the remaining processed args onto a // different vector that we write out below. If not, push them directly onto // the vector being returned. auto &args_destination = force_response_file_ ? response_file_args : new_args; - while (it != args.end()) { - ProcessArgument( - *it, [&](const std::string &arg) { args_destination.push_back(arg); }); + while (it != parsed_args.end()) { + ProcessArgument(it, *it, [&](const std::string &arg) { + args_destination.push_back(arg); + }); ++it; } diff --git a/tools/worker/swift_runner.h b/tools/worker/swift_runner.h index e662b614c..ad5389576 100644 --- a/tools/worker/swift_runner.h +++ b/tools/worker/swift_runner.h @@ -97,9 +97,15 @@ class SwiftRunner { // // This method has file system side effects, creating temporary files and // directories as needed for a particular substitution. - bool ProcessArgument(const std::string &arg, + template + bool ProcessArgument(Iterator &itr, const std::string &arg, std::function consumer); + // Parses arguments to ivars and returns a vector of strings from the iterator. + // This method doesn't actually mutate any of the arguments. + template + std::vector ParseArguments(Iterator itr); + // Applies substitutions to the given command line arguments, returning the // results in a new vector. std::vector ProcessArguments( @@ -128,6 +134,18 @@ class SwiftRunner { // The path to the generated header rewriter tool, if one is being used for // this compilation. std::string generated_header_rewriter_path_; + + // The path of the output map file + std::string output_file_map_path_; + + // The index store path argument passed to the runner + std::string index_store_path_; + + // The path of the global index store when using + // swift.use_global_index_store. When set, this is passed to `swiftc` as the + // `-index-store-path`. After running `swiftc` `index-import` copies relevant + // index outputs into the `index_store_path` to integrate outputs with Bazel. + std::string global_index_store_import_path_; }; #endif // BUILD_BAZEL_RULES_SWIFT_TOOLS_WORKER_SWIFT_RUNNER_H_