Skip to content

Speed up (up to 7x!) building large system libraries #20577

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Oct 31, 2023
2 changes: 1 addition & 1 deletion emcc.py
Original file line number Diff line number Diff line change
Expand Up @@ -3644,7 +3644,7 @@ def consume_arg_file():
elif check_flag('--jcache'):
logger.error('jcache is no longer supported')
elif check_arg('--cache'):
config.CACHE = os.path.normpath(consume_arg())
config.CACHE = os.path.abspath(consume_arg())
cache.setup()
# Ensure child processes share the same cache (e.g. when using emcc to compiler system
# libraries)
Expand Down
12 changes: 9 additions & 3 deletions tools/building.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,9 +294,15 @@ def get_command_with_possible_response_file(cmd):
# One of None, 0 or 1. (None: do default decision, 0: force disable, 1: force enable)
force_response_files = os.getenv('EM_FORCE_RESPONSE_FILES')

# 8k is a bit of an arbitrary limit, but a reasonable one
# for max command line size before we use a response file
if (len(shared.shlex_join(cmd)) <= 8192 and force_response_files != '1') or force_response_files == '0':
# Different OS have different limits. The most limiting usually is Windows one
# which is set at 8191 characters. We could just use that, but it leads to
# problems when invoking shell wrappers (e.g. emcc.bat), which, in turn,
# pass arguments to some longer command like `(full path to Clang) ...args`.
# In that scenario, even if the initial command line is short enough, the
# subprocess can still run into the Command Line Too Long error.
# Reduce the limit by ~1K for now to be on the safe side, but we might need to
# adjust this in the future if it turns out not to be enough.
if (len(shared.shlex_join(cmd)) <= 7000 and force_response_files != '1') or force_response_files == '0':
return cmd

logger.debug('using response file for %s' % cmd[0])
Expand Down
2 changes: 1 addition & 1 deletion tools/ports/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def build_port(src_dir, output_path, port_name, includes=[], flags=[], cxxflags=
commands.append(cmd)
objects.append(obj)

system_libs.run_build_commands(commands)
system_libs.run_build_commands(commands, num_inputs=len(srcs))
system_libs.create_lib(output_path, objects)

return output_path
Expand Down
5 changes: 3 additions & 2 deletions tools/shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,8 @@ def cap_max_workers_in_pool(max_workers):

def run_multiple_processes(commands,
env=None,
route_stdout_to_temp_files_suffix=None):
route_stdout_to_temp_files_suffix=None,
cwd=None):
"""Runs multiple subprocess commands.

route_stdout_to_temp_files_suffix : string
Expand Down Expand Up @@ -205,7 +206,7 @@ def get_finished_process():
if DEBUG:
logger.debug('Running subprocess %d/%d: %s' % (i + 1, len(commands), ' '.join(commands[i])))
print_compiler_stage(commands[i])
proc = subprocess.Popen(commands[i], stdout=stdout, stderr=None, env=env)
proc = subprocess.Popen(commands[i], stdout=stdout, stderr=None, env=env, cwd=cwd)
processes[i] = proc
if route_stdout_to_temp_files_suffix:
std_outs.append((i, stdout.name))
Expand Down
69 changes: 47 additions & 22 deletions tools/system_libs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# found in the LICENSE file.

import re
from time import time
from .toolchain_profiler import ToolchainProfiler

import itertools
Expand Down Expand Up @@ -82,13 +83,14 @@ def clean_env():
return safe_env


def run_build_commands(commands):
def run_build_commands(commands, num_inputs, build_dir=None):
# Before running a set of build commands make sure the common sysroot
# headers are installed. This prevents each sub-process from attempting
# to setup the sysroot itself.
ensure_sysroot()
shared.run_multiple_processes(commands, env=clean_env())
logger.info('compiled %d inputs' % len(commands))
start_time = time()
shared.run_multiple_processes(commands, env=clean_env(), cwd=build_dir)
logger.info(f'compiled {num_inputs} inputs in {time() - start_time:.2f}s')


def objectfile_sort_key(filename):
Expand Down Expand Up @@ -483,33 +485,22 @@ def build_objects(self, build_dir):
By default, this builds all the source files returned by `self.get_files()`,
with the `cflags` returned by `self.get_cflags()`.
"""
batches = {}
commands = []
objects = []
objects = set()
cflags = self.get_cflags()
if self.deterministic_paths:
source_dir = utils.path_from_root()
cflags += [f'-ffile-prefix-map={source_dir}=/emsdk/emscripten',
'-fdebug-compilation-dir=/emsdk/emscripten']
case_insensitive = is_case_insensitive(build_dir)
for src in self.get_files():
object_basename = shared.unsuffixed_basename(src)
# Resolve duplicates by appending unique.
# This is needed on case insensitve filesystem to handle,
# for example, _exit.o and _Exit.o.
if case_insensitive:
object_basename = object_basename.lower()
o = os.path.join(build_dir, object_basename + '.o')
object_uuid = 0
# Find a unique basename
while o in objects:
object_uuid += 1
o = os.path.join(build_dir, f'{object_basename}__{object_uuid}.o')
ext = shared.suffix(src)
if ext in ('.s', '.S', '.c'):
cmd = [shared.EMCC]
cmd = shared.EMCC
else:
cmd = [shared.EMXX]

cmd = shared.EMXX
cmd = [cmd, '-c']
if ext == '.s':
# .s files are processed directly by the assembler. In this case we can't pass
# pre-processor flags such as `-I` and `-D` but we still want core flags such as
Expand All @@ -518,9 +509,43 @@ def build_objects(self, build_dir):
else:
cmd += cflags
cmd = self.customize_build_cmd(cmd, src)
commands.append(cmd + ['-c', src, '-o', o])
objects.append(o)
run_build_commands(commands)

object_basename = shared.unsuffixed_basename(src)
if case_insensitive:
object_basename = object_basename.lower()
o = os.path.join(build_dir, object_basename + '.o')
if o in objects:
# If we have seen a file with the same name before, we are on a case-insensitive
# filesystem and need a separate command to compile this file with a
# custom unique output object filename, as batch compile doesn't allow
# such customization.
#
# This is needed to handle, for example, _exit.o and _Exit.o.
object_uuid = 0
# Find a unique basename
while o in objects:
object_uuid += 1
o = os.path.join(build_dir, f'{object_basename}__{object_uuid}.o')
commands.append(cmd + [src, '-o', o])
else:
# Use relative paths to reduce the length of the command line.
# This allows to avoid switching to a response file as often.
src = os.path.relpath(src, build_dir)
batches.setdefault(tuple(cmd), []).append(src)
objects.add(o)

# Choose a chunk size that is large enough to avoid too many subprocesses
# but not too large to avoid task starvation.
# For now the heuristic is to split inputs by 2x number of cores.
chunk_size = max(1, len(objects) // (2 * shared.get_num_cores()))
# Convert batches to commands.
for cmd, srcs in batches.items():
cmd = list(cmd)
for i in range(0, len(srcs), chunk_size):
chunk_srcs = srcs[i:i + chunk_size]
commands.append(building.get_command_with_possible_response_file(cmd + chunk_srcs))

run_build_commands(commands, num_inputs=len(objects), build_dir=build_dir)
return objects

def customize_build_cmd(self, cmd, _filename):
Expand Down