emscripten-core · RReverser · Oct 31, 2023 · Oct 30, 2023 · Oct 30, 2023 · Oct 31, 2023
diff --git a/emcc.py b/emcc.py
@@ -3644,7 +3644,7 @@ def consume_arg_file():
     elif check_flag('--jcache'):
       logger.error('jcache is no longer supported')
     elif check_arg('--cache'):
-      config.CACHE = os.path.normpath(consume_arg())
+      config.CACHE = os.path.abspath(consume_arg())
       cache.setup()
       # Ensure child processes share the same cache (e.g. when using emcc to compiler system
       # libraries)

diff --git a/tools/building.py b/tools/building.py
@@ -294,9 +294,15 @@ def get_command_with_possible_response_file(cmd):
   # One of None, 0 or 1. (None: do default decision, 0: force disable, 1: force enable)
   force_response_files = os.getenv('EM_FORCE_RESPONSE_FILES')
 
-  # 8k is a bit of an arbitrary limit, but a reasonable one
-  # for max command line size before we use a response file
-  if (len(shared.shlex_join(cmd)) <= 8192 and force_response_files != '1') or force_response_files == '0':
+  # Different OS have different limits. The most limiting usually is Windows one
+  # which is set at 8191 characters. We could just use that, but it leads to
+  # problems when invoking shell wrappers (e.g. emcc.bat), which, in turn,
+  # pass arguments to some longer command like `(full path to Clang) ...args`.
+  # In that scenario, even if the initial command line is short enough, the
+  # subprocess can still run into the Command Line Too Long error.
+  # Reduce the limit by ~1K for now to be on the safe side, but we might need to
+  # adjust this in the future if it turns out not to be enough.
+  if (len(shared.shlex_join(cmd)) <= 7000 and force_response_files != '1') or force_response_files == '0':
     return cmd
 
   logger.debug('using response file for %s' % cmd[0])

diff --git a/tools/ports/__init__.py b/tools/ports/__init__.py
@@ -169,7 +169,7 @@ def build_port(src_dir, output_path, port_name, includes=[], flags=[], cxxflags=
         commands.append(cmd)
         objects.append(obj)
 
-      system_libs.run_build_commands(commands)
+      system_libs.run_build_commands(commands, num_inputs=len(srcs))
       system_libs.create_lib(output_path, objects)
 
     return output_path

diff --git a/tools/shared.py b/tools/shared.py
@@ -153,7 +153,8 @@ def cap_max_workers_in_pool(max_workers):
 
 def run_multiple_processes(commands,
                            env=None,
-                           route_stdout_to_temp_files_suffix=None):
+                           route_stdout_to_temp_files_suffix=None,
+                           cwd=None):
   """Runs multiple subprocess commands.
 
   route_stdout_to_temp_files_suffix : string
@@ -205,7 +206,7 @@ def get_finished_process():
       if DEBUG:
         logger.debug('Running subprocess %d/%d: %s' % (i + 1, len(commands), ' '.join(commands[i])))
       print_compiler_stage(commands[i])
-      proc = subprocess.Popen(commands[i], stdout=stdout, stderr=None, env=env)
+      proc = subprocess.Popen(commands[i], stdout=stdout, stderr=None, env=env, cwd=cwd)
       processes[i] = proc
       if route_stdout_to_temp_files_suffix:
         std_outs.append((i, stdout.name))

diff --git a/tools/system_libs.py b/tools/system_libs.py
@@ -4,6 +4,7 @@
 # found in the LICENSE file.
 
 import re
+from time import time
 from .toolchain_profiler import ToolchainProfiler
 
 import itertools
@@ -82,13 +83,14 @@ def clean_env():
   return safe_env
 
 
-def run_build_commands(commands):
+def run_build_commands(commands, num_inputs, build_dir=None):
   # Before running a set of build commands make sure the common sysroot
   # headers are installed.  This prevents each sub-process from attempting
   # to setup the sysroot itself.
   ensure_sysroot()
-  shared.run_multiple_processes(commands, env=clean_env())
-  logger.info('compiled %d inputs' % len(commands))
+  start_time = time()
+  shared.run_multiple_processes(commands, env=clean_env(), cwd=build_dir)
+  logger.info(f'compiled {num_inputs} inputs in {time() - start_time:.2f}s')
 
 
 def objectfile_sort_key(filename):
@@ -483,33 +485,22 @@ def build_objects(self, build_dir):
     By default, this builds all the source files returned by `self.get_files()`,
     with the `cflags` returned by `self.get_cflags()`.
     """
+    batches = {}
     commands = []
-    objects = []
+    objects = set()
     cflags = self.get_cflags()
     if self.deterministic_paths:
       source_dir = utils.path_from_root()
       cflags += [f'-ffile-prefix-map={source_dir}=/emsdk/emscripten',
                  '-fdebug-compilation-dir=/emsdk/emscripten']
     case_insensitive = is_case_insensitive(build_dir)
     for src in self.get_files():
-      object_basename = shared.unsuffixed_basename(src)
-      # Resolve duplicates by appending unique.
-      # This is needed on case insensitve filesystem to handle,
-      # for example, _exit.o and _Exit.o.
-      if case_insensitive:
-        object_basename = object_basename.lower()
-      o = os.path.join(build_dir, object_basename + '.o')
-      object_uuid = 0
-      # Find a unique basename
-      while o in objects:
-        object_uuid += 1
-        o = os.path.join(build_dir, f'{object_basename}__{object_uuid}.o')
       ext = shared.suffix(src)
       if ext in ('.s', '.S', '.c'):
-        cmd = [shared.EMCC]
+        cmd = shared.EMCC
       else:
-        cmd = [shared.EMXX]
-
+        cmd = shared.EMXX
+      cmd = [cmd, '-c']
       if ext == '.s':
         # .s files are processed directly by the assembler.  In this case we can't pass
         # pre-processor flags such as `-I` and `-D` but we still want core flags such as
@@ -518,9 +509,43 @@ def build_objects(self, build_dir):
       else:
         cmd += cflags
       cmd = self.customize_build_cmd(cmd, src)
-      commands.append(cmd + ['-c', src, '-o', o])
-      objects.append(o)
-    run_build_commands(commands)
+
+      object_basename = shared.unsuffixed_basename(src)
+      if case_insensitive:
+        object_basename = object_basename.lower()
+      o = os.path.join(build_dir, object_basename + '.o')
+      if o in objects:
+        # If we have seen a file with the same name before, we are on a case-insensitive
+        # filesystem and need a separate command to compile this file with a
+        # custom unique output object filename, as batch compile doesn't allow
+        # such customization.
+        #
+        # This is needed to handle, for example, _exit.o and _Exit.o.
+        object_uuid = 0
+        # Find a unique basename
+        while o in objects:
+          object_uuid += 1
+          o = os.path.join(build_dir, f'{object_basename}__{object_uuid}.o')
+        commands.append(cmd + [src, '-o', o])
+      else:
+        # Use relative paths to reduce the length of the command line.
+        # This allows to avoid switching to a response file as often.
+        src = os.path.relpath(src, build_dir)
+        batches.setdefault(tuple(cmd), []).append(src)
+      objects.add(o)
+
+    # Choose a chunk size that is large enough to avoid too many subprocesses
+    # but not too large to avoid task starvation.
+    # For now the heuristic is to split inputs by 2x number of cores.
+    chunk_size = max(1, len(objects) // (2 * shared.get_num_cores()))
+    # Convert batches to commands.
+    for cmd, srcs in batches.items():
+      cmd = list(cmd)
+      for i in range(0, len(srcs), chunk_size):
+        chunk_srcs = srcs[i:i + chunk_size]
+        commands.append(building.get_command_with_possible_response_file(cmd + chunk_srcs))
+
+    run_build_commands(commands, num_inputs=len(objects), build_dir=build_dir)
     return objects
 
   def customize_build_cmd(self, cmd, _filename):