Skip to content

Commit ed29f03

Browse files
committed
Cache symbol lists used by LLD_REPORT_UNDEFINED. NFC
This means that the JS libraries only only need to be processed when there is cache miss. The cost of processing the JS libraries is about 300ms on my machine which is about 30% of the link time for hello world. When there is cache hit this cost is reduced to 3ms. This change is in prepartion for switching this mode on my default in. See: #16003
1 parent dce48ab commit ed29f03

File tree

1 file changed

+54
-3
lines changed

1 file changed

+54
-3
lines changed

emcc.py

Lines changed: 54 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
from tools.toolchain_profiler import ToolchainProfiler
2424

2525
import base64
26+
import glob
27+
import hashlib
2628
import json
2729
import logging
2830
import os
@@ -39,7 +41,7 @@
3941

4042

4143
import emscripten
42-
from tools import shared, system_libs, utils, ports
44+
from tools import shared, system_libs, utils, ports, filelock
4345
from tools import colored_logger, diagnostics, building
4446
from tools.shared import unsuffixed, unsuffixed_basename, WINDOWS, safe_copy
4547
from tools.shared import run_process, read_and_preprocess, exit_with_error, DEBUG
@@ -500,8 +502,7 @@ def ensure_archive_index(archive_file):
500502
run_process([shared.LLVM_RANLIB, archive_file])
501503

502504

503-
@ToolchainProfiler.profile_block('JS symbol generation')
504-
def get_all_js_syms():
505+
def generate_js_symbols():
505506
# Runs the js compiler to generate a list of all symbols available in the JS
506507
# libraries. This must be done separately for each linker invokation since the
507508
# list of symbols depends on what settings are used.
@@ -516,6 +517,56 @@ def get_all_js_syms():
516517
if shared.is_c_symbol(name):
517518
name = shared.demangle_c_symbol_name(name)
518519
library_syms.add(name)
520+
return library_syms
521+
522+
523+
@ToolchainProfiler.profile_block('JS symbol generation')
524+
def get_all_js_syms():
525+
# Avoiding using the cache when generating struct info since
526+
# this step is performed while the cache is locked.
527+
if settings.BOOTSTRAPPING_STRUCT_INFO or config.FROZEN_CACHE:
528+
return generate_js_symbols()
529+
530+
# We define a cache hit as when the settings and `--js-library` contents are
531+
# identical.
532+
input_files = [json.dumps(settings.dict(), sort_keys=True, indent=2)]
533+
for jslib in sorted(glob.glob(utils.path_from_root('src') + '/library*.js')):
534+
input_files.append(read_file(jslib))
535+
for jslib in settings.JS_LIBRARIES:
536+
if not os.path.isabs(jslib):
537+
jslib = utils.path_from_root('src', jslib)
538+
input_files.append(read_file(jslib))
539+
content = '\n'.join(input_files)
540+
content_hash = hashlib.sha1(content.encode('utf-8')).hexdigest()
541+
542+
def build_symbol_list(filename):
543+
"""Only called when there is no existing symbol list for a given content hash.
544+
"""
545+
library_syms = generate_js_symbols()
546+
write_file(filename, '\n'.join(library_syms) + '\n')
547+
548+
# We need to use a seperate lock here for symbol lists because, unlike with system libraries,
549+
# its normally for these file to get pruned as part of normal operation. This means that it
550+
# can be deleted between the `cache.get()` then the `read_file`.
551+
with filelock.FileLock(cache.get_path(cache.get_path('symbol_lists.lock'))):
552+
filename = cache.get(f'symbol_lists/{content_hash}.txt', build_symbol_list)
553+
library_syms = read_file(filename).splitlines()
554+
555+
# Limit of the overall side of the cache to 100.
556+
# This code will get test coverage once we make LLD_REPORT_UNDEFINED the default
557+
# since under those cicumstances a full test run of `other` or `core` generates
558+
# ~1000 unique symbol lists.
559+
cache_limit = 100
560+
root = cache.get_path('symbol_lists')
561+
if len(os.listdir(root)) > cache_limit:
562+
files = []
563+
for f in os.listdir(root):
564+
f = os.path.join(root, f)
565+
files.append((f, os.path.getmtime(f)))
566+
files.sort(key=lambda x: x[1])
567+
# Delete all of the newest N files
568+
for f, _ in files[:-cache_limit]:
569+
delete_file(f)
519570

520571
return library_syms
521572

0 commit comments

Comments
 (0)