Skip to content

Commit 922deca

Browse files
committed
Enable LLD_REPORT_UNDEFINED by default
This makes undefined symbol errors more precise by including the name of the object that references the undefined symbol. Its also paves the way (in my mind anyway) for finally fixing reverse dependencies in a salable way. See #15982. That PR uses an alternative script for the pre-processing of dependencies but also fundamentally relies on processing JS libraries both before and after linking. The cost is about 300ms per link operation due to double processing of the JS libraries. This cost is fixed for most projects (since most project don't add a lot JS libraries over time in the way that they add native code object). I imagine even in the most pathological cases JS libraries usage will be dwarfed by native object file usage so even in those cases the native linking will likely always dominate the link time. If the 300ms extra link time causes issues, for example with cmake or autoconf, that do a lot linking of small programs, we could consider hashing the config setting and caching the result of the processing based on them.
1 parent 7c9b97a commit 922deca

File tree

8 files changed

+52
-49
lines changed

8 files changed

+52
-49
lines changed

ChangeLog.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,11 @@ See docs/process.md for more on how version tagging works.
2020

2121
3.1.28 (in development)
2222
-----------------------
23+
- `LLD_REPORT_UNDEFINED` is now enabled by default. This makes undefined symbol
24+
errors more precise by including the name of the object that references the
25+
undefined symbol. The old behaviour (of allowing all undefined symbols at
26+
wasm-ld time and reporting them later when processing JS library files) is
27+
still available using `-sLLD_REPORT_UNDEFINED=0`. (#16003)
2328
- musl libc updated from v1.2.2 to v1.2.3. (#18270)
2429
- The default emscripten config file no longer contains `EMSCRIPTEN_ROOT`. This
2530
setting has long been completely ignored by emscripten itself. For

emcc.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1827,6 +1827,7 @@ def phase_linker_setup(options, state, newargs):
18271827
if 'EXPORTED_FUNCTIONS' in user_settings:
18281828
if '_main' not in settings.USER_EXPORTED_FUNCTIONS:
18291829
settings.EXPECT_MAIN = 0
1830+
settings.IGNORE_MISSING_MAIN = 1
18301831
else:
18311832
assert not settings.EXPORTED_FUNCTIONS
18321833
settings.EXPORTED_FUNCTIONS = ['_main']
@@ -1855,10 +1856,7 @@ def phase_linker_setup(options, state, newargs):
18551856
if not settings.PURE_WASI and '-nostdlib' not in newargs and '-nodefaultlibs' not in newargs:
18561857
default_setting('STACK_OVERFLOW_CHECK', max(settings.ASSERTIONS, settings.STACK_OVERFLOW_CHECK))
18571858

1858-
if settings.LLD_REPORT_UNDEFINED or settings.STANDALONE_WASM:
1859-
# Reporting undefined symbols at wasm-ld time requires us to know if we have a `main` function
1860-
# or not, as does standalone wasm mode.
1861-
# TODO(sbc): Remove this once this becomes the default
1859+
if settings.STANDALONE_WASM:
18621860
settings.IGNORE_MISSING_MAIN = 0
18631861

18641862
# For users that opt out of WARN_ON_UNDEFINED_SYMBOLS we assume they also

src/library.js

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1258,17 +1258,19 @@ mergeInto(LibraryManager.library, {
12581258
// built with SUPPORT_LONGJMP=1, the object file contains references of not
12591259
// longjmp but _emscripten_throw_longjmp, which is called from
12601260
// emscripten_longjmp.
1261-
_emscripten_throw_longjmp: function() { error('longjmp support was disabled (SUPPORT_LONGJMP=0), but it is required by the code (either set SUPPORT_LONGJMP=1, or remove uses of it in the project)'); },
12621261
get _emscripten_throw_longjmp__deps() {
12631262
return this.longjmp__deps;
12641263
},
12651264
#endif
1265+
_emscripten_throw_longjmp: function() {
1266+
error('longjmp support was disabled (SUPPORT_LONGJMP=0), but it is required by the code (either set SUPPORT_LONGJMP=1, or remove uses of it in the project)');
1267+
},
12661268
// will never be emitted, as the dep errors at compile time
12671269
longjmp: function(env, value) {
1268-
abort('longjmp not supported');
1270+
abort('longjmp not supported (build with -s SUPPORT_LONGJMP)');
12691271
},
1270-
setjmp: function(env, value) {
1271-
abort('setjmp not supported');
1272+
setjmp: function(env) {
1273+
abort('setjmp not supported (build with -s SUPPORT_LONGJMP)');
12721274
},
12731275
#endif
12741276

src/settings.js

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1917,12 +1917,13 @@ var USE_OFFSET_CONVERTER = false;
19171917
// This is enabled automatically when using -g4 with sanitizers.
19181918
var LOAD_SOURCE_MAP = false;
19191919

1920-
// If set to 1, the JS compiler is run before wasm-ld so that the linker can
1921-
// report undefined symbols within the binary. Without this option the linker
1922-
// doesn't know which symbols might be defined in JS so reporting of undefined
1923-
// symbols is delayed until the JS compiler is run.
1920+
// If set to 0, delay undefined symbol report until after wasm-ld runs. This
1921+
// avoids running the the JS compiler prior to wasm-ld, but reduces the amount
1922+
// of information in the undefined symbol message (Since JS compiler cannot
1923+
// report the name of the object file that contains the reference to the
1924+
// undefined symbol).
19241925
// [link]
1925-
var LLD_REPORT_UNDEFINED = false;
1926+
var LLD_REPORT_UNDEFINED = true;
19261927

19271928
// Default to c++ mode even when run as `emcc` rather then `emc++`.
19281929
// When this is disabled `em++` is required when compiling and linking C++

test/common.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1043,6 +1043,8 @@ def expect_fail(self, cmd, expect_traceback=False, **args):
10431043
self.assertContained('Traceback', proc.stderr)
10441044
elif not WINDOWS or 'Access is denied' not in proc.stderr:
10451045
self.assertNotContained('Traceback', proc.stderr)
1046+
if EMTEST_VERBOSE:
1047+
sys.stderr.write(proc.stderr)
10461048
return proc.stderr
10471049

10481050
# excercise dynamic linker.

test/test_core.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4142,8 +4142,8 @@ def test_dylink_basics_no_modify(self):
41424142
self.do_basic_dylink_test()
41434143

41444144
@needs_dylink
4145-
def test_dylink_basics_lld_report_undefined(self):
4146-
self.set_setting('LLD_REPORT_UNDEFINED')
4145+
def test_dylink_basics_no_lld_report_undefined(self):
4146+
self.set_setting('LLD_REPORT_UNDEFINED', 0)
41474147
self.do_basic_dylink_test()
41484148

41494149
@needs_dylink
@@ -5143,9 +5143,6 @@ def test_dylink_rtti(self):
51435143
# in the another module.
51445144
# Each module will define its own copy of certain COMDAT symbols such as
51455145
# each classs's typeinfo, but at runtime they should both use the same one.
5146-
# Use LLD_REPORT_UNDEFINED to test that it works as expected with weak/COMDAT
5147-
# symbols.
5148-
self.set_setting('LLD_REPORT_UNDEFINED')
51495146
header = '''
51505147
#include <cstddef>
51515148
@@ -6154,7 +6151,6 @@ def test_unistd_io(self):
61546151
'nodefs': (['NODEFS']),
61556152
})
61566153
def test_unistd_misc(self, fs):
6157-
self.set_setting('LLD_REPORT_UNDEFINED')
61586154
self.emcc_args += ['-D' + fs]
61596155
if fs == 'NODEFS':
61606156
self.require_node()
@@ -9407,9 +9403,8 @@ def test_undefined_main(self):
94079403
# In standalone we don't support implicitly building without main. The user has to explicitly
94089404
# opt out (see below).
94099405
err = self.expect_fail([EMCC, test_file('core/test_ctors_no_main.cpp')] + self.get_emcc_args())
9410-
self.assertContained('error: undefined symbol: main/__main_argc_argv (referenced by top-level compiled C/C++ code)', err)
9411-
self.assertContained('warning: To build in STANDALONE_WASM mode without a main(), use emcc --no-entry', err)
9412-
elif not self.get_setting('LLD_REPORT_UNDEFINED') and not self.get_setting('STRICT'):
9406+
self.assertContained('undefined symbol: main', err)
9407+
elif not self.get_setting('STRICT'):
94139408
# Traditionally in emscripten we allow main to be implicitly undefined. This allows programs
94149409
# with a main and libraries without a main to be compiled identically.
94159410
# However we are trying to move away from that model to a more explicit opt-out model. See:
@@ -9427,6 +9422,9 @@ def test_undefined_main(self):
94279422
self.do_core_test('test_ctors_no_main.cpp')
94289423
self.clear_setting('EXPORTED_FUNCTIONS')
94299424

9425+
# Marked as impure since the WASI reactor modules (modules without main)
9426+
# are not yet suppored by the wasm engines we test against.
9427+
@also_with_standalone_wasm(impure=True)
94309428
def test_undefined_main_explict(self):
94319429
# If we pass --no-entry this test should compile without issue
94329430
self.emcc_args.append('--no-entry')
@@ -9699,7 +9697,6 @@ def setUp(self):
96999697
settings={'ALLOW_MEMORY_GROWTH': 1})
97009698

97019699
# Experimental modes (not tested by CI)
9702-
lld = make_run('lld', emcc_args=[], settings={'LLD_REPORT_UNDEFINED': 1})
97039700
minimal0 = make_run('minimal0', emcc_args=['-g'], settings={'MINIMAL_RUNTIME': 1})
97049701

97059702
# TestCoreBase is just a shape for the specific subclasses, we don't test it itself

test/test_other.py

Lines changed: 24 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2160,8 +2160,8 @@ def test_undefined_symbols(self, action):
21602160
print(proc.stderr)
21612161
if value or action is None:
21622162
# The default is that we error in undefined symbols
2163-
self.assertContained('error: undefined symbol: something', proc.stderr)
2164-
self.assertContained('error: undefined symbol: elsey', proc.stderr)
2163+
self.assertContained('undefined symbol: something', proc.stderr)
2164+
self.assertContained('undefined symbol: elsey', proc.stderr)
21652165
check_success = False
21662166
elif action == 'ERROR' and not value:
21672167
# Error disables, should only warn
@@ -3541,7 +3541,7 @@ def test_js_lib_missing_sig(self):
35413541
def test_js_lib_quoted_key(self):
35423542
create_file('lib.js', r'''
35433543
mergeInto(LibraryManager.library, {
3544-
__internal_data:{
3544+
__internal_data:{
35453545
'<' : 0,
35463546
'white space' : 1
35473547
},
@@ -6584,7 +6584,7 @@ def test_no_warn_exported_jslibfunc(self):
65846584
err = self.expect_fail([EMCC, test_file('hello_world.c'),
65856585
'-sDEFAULT_LIBRARY_FUNCS_TO_INCLUDE=alGetError',
65866586
'-sEXPORTED_FUNCTIONS=_main,_alGet'])
6587-
self.assertContained('undefined exported symbol: "_alGet"', err)
6587+
self.assertContained('error: undefined exported symbol: "_alGet" [-Wundefined] [-Werror]', err)
65886588

65896589
def test_musl_syscalls(self):
65906590
self.run_process([EMCC, test_file('hello_world.c')])
@@ -8354,7 +8354,7 @@ def test_full_js_library(self):
83548354
def test_full_js_library_undefined(self):
83558355
create_file('main.c', 'void foo(); int main() { foo(); return 0; }')
83568356
err = self.expect_fail([EMCC, 'main.c', '-sSTRICT_JS', '-sINCLUDE_FULL_LIBRARY'])
8357-
self.assertContained('error: undefined symbol: foo', err)
8357+
self.assertContained('undefined symbol: foo', err)
83588358

83598359
def test_full_js_library_except(self):
83608360
self.set_setting('INCLUDE_FULL_LIBRARY', 1)
@@ -9010,19 +9010,20 @@ def test_js_preprocess(self):
90109010

90119011
err = self.run_process([EMCC, test_file('hello_world.c'), '--js-library', 'lib.js'], stderr=PIPE).stderr
90129012
self.assertContained('JSLIB: none of the above', err)
9013-
self.assertEqual(err.count('JSLIB'), 1)
9013+
self.assertNotContained('JSLIB: MAIN_MODULE', err)
9014+
self.assertNotContained('JSLIB: EXIT_RUNTIME', err)
90149015

90159016
err = self.run_process([EMCC, test_file('hello_world.c'), '--js-library', 'lib.js', '-sMAIN_MODULE'], stderr=PIPE).stderr
90169017
self.assertContained('JSLIB: MAIN_MODULE=1', err)
9017-
self.assertEqual(err.count('JSLIB'), 1)
9018+
self.assertNotContained('JSLIB: EXIT_RUNTIME', err)
90189019

90199020
err = self.run_process([EMCC, test_file('hello_world.c'), '--js-library', 'lib.js', '-sMAIN_MODULE=2'], stderr=PIPE).stderr
90209021
self.assertContained('JSLIB: MAIN_MODULE=2', err)
9021-
self.assertEqual(err.count('JSLIB'), 1)
9022+
self.assertNotContained('JSLIB: EXIT_RUNTIME', err)
90229023

90239024
err = self.run_process([EMCC, test_file('hello_world.c'), '--js-library', 'lib.js', '-sEXIT_RUNTIME'], stderr=PIPE).stderr
90249025
self.assertContained('JSLIB: EXIT_RUNTIME', err)
9025-
self.assertEqual(err.count('JSLIB'), 1)
9026+
self.assertNotContained('JSLIB: MAIN_MODULE', err)
90269027

90279028
def test_html_preprocess(self):
90289029
src_file = test_file('module/test_stdin.c')
@@ -9195,7 +9196,7 @@ def test_dash_s_list_parsing(self):
91959196
# stray slash
91969197
('EXPORTED_FUNCTIONS=["_a", "_b",\\ "_c", "_d"]', 'undefined exported symbol: "\\\\ "_c"'),
91979198
# missing comma
9198-
('EXPORTED_FUNCTIONS=["_a", "_b" "_c", "_d"]', 'undefined exported symbol: "_b" "_c"'),
9199+
('EXPORTED_FUNCTIONS=["_a", "_b" "_c", "_d"]', 'emcc: error: undefined exported symbol: "_b" "_c" [-Wundefined] [-Werror]'),
91999200
]:
92009201
print(export_arg)
92019202
proc = self.run_process([EMCC, 'src.c', '-s', export_arg], stdout=PIPE, stderr=PIPE, check=not expected)
@@ -10880,20 +10881,20 @@ def test_signature_mismatch(self):
1088010881
self.expect_fail([EMCC, '-Wl,--fatal-warnings', 'a.c', 'b.c'])
1088110882
self.expect_fail([EMCC, '-sSTRICT', 'a.c', 'b.c'])
1088210883

10884+
# TODO(sbc): Remove these tests once we remove the LLD_REPORT_UNDEFINED
1088310885
def test_lld_report_undefined(self):
1088410886
create_file('main.c', 'void foo(); int main() { foo(); return 0; }')
10885-
stderr = self.expect_fail([EMCC, '-sLLD_REPORT_UNDEFINED', 'main.c'])
10886-
self.assertContained('wasm-ld: error:', stderr)
10887-
self.assertContained('main_0.o: undefined symbol: foo', stderr)
10887+
stderr = self.expect_fail([EMCC, '-sLLD_REPORT_UNDEFINED=0', 'main.c'])
10888+
self.assertContained('error: undefined symbol: foo (referenced by top-level compiled C/C++ code)', stderr)
1088810889

1088910890
def test_lld_report_undefined_reverse_deps(self):
10890-
self.run_process([EMCC, '-sLLD_REPORT_UNDEFINED', '-sREVERSE_DEPS=all', test_file('hello_world.c')])
10891+
self.run_process([EMCC, '-sLLD_REPORT_UNDEFINED=0', '-sREVERSE_DEPS=all', test_file('hello_world.c')])
1089110892

1089210893
def test_lld_report_undefined_exceptions(self):
10893-
self.run_process([EMXX, '-sLLD_REPORT_UNDEFINED', '-fwasm-exceptions', test_file('hello_libcxx.cpp')])
10894+
self.run_process([EMXX, '-sLLD_REPORT_UNDEFINED=0', '-fwasm-exceptions', test_file('hello_libcxx.cpp')])
1089410895

1089510896
def test_lld_report_undefined_main_module(self):
10896-
self.run_process([EMCC, '-sLLD_REPORT_UNDEFINED', '-sMAIN_MODULE=2', test_file('hello_world.c')])
10897+
self.run_process([EMCC, '-sLLD_REPORT_UNDEFINED=0', '-sMAIN_MODULE=2', test_file('hello_world.c')])
1089710898

1089810899
# Verifies that warning messages that Closure outputs are recorded to console
1089910900
def test_closure_warnings(self):
@@ -11031,14 +11032,12 @@ def test_linker_version(self):
1103111032
def test_chained_js_error_diagnostics(self):
1103211033
err = self.expect_fail([EMCC, test_file('test_chained_js_error_diagnostics.c'), '--js-library', test_file('test_chained_js_error_diagnostics.js')])
1103311034
self.assertContained("error: undefined symbol: nonexistent_function (referenced by bar__deps: ['nonexistent_function'], referenced by foo__deps: ['bar'], referenced by top-level compiled C/C++ code)", err)
11034-
# Check that we don't recommend LLD_REPORT_UNDEFINED for chained dependencies.
11035-
self.assertNotContained('LLD_REPORT_UNDEFINED', err)
1103611035

11037-
# Test without chaining. In this case we don't include the JS library at all resulting in `foo`
11038-
# being undefined in the native code and in this case we recommend LLD_REPORT_UNDEFINED.
11036+
# Test without chaining. In this case we don't include the JS library at
11037+
# all resulting in `foo` being undefined in the native code.
1103911038
err = self.expect_fail([EMCC, test_file('test_chained_js_error_diagnostics.c')])
11040-
self.assertContained('error: undefined symbol: foo (referenced by top-level compiled C/C++ code)', err)
11041-
self.assertContained('Link with `-sLLD_REPORT_UNDEFINED` to get more information on undefined symbols', err)
11039+
self.assertContained('undefined symbol: foo', err)
11040+
self.assertNotContained('referenced by top-level compiled C/C++ code', err)
1104211041

1104311042
def test_xclang_flag(self):
1104411043
create_file('foo.h', ' ')
@@ -11476,7 +11475,7 @@ def test_split_main_module(self):
1147611475

1147711476
self.run_process([EMCC, side_src, '-sSIDE_MODULE', '-g', '-o', 'libhello.wasm'])
1147811477

11479-
self.emcc_args += ['-g']
11478+
self.emcc_args += ['-g', 'libhello.wasm']
1148011479
self.emcc_args += ['-sMAIN_MODULE=2']
1148111480
self.emcc_args += ['-sEXPORTED_FUNCTIONS=_printf']
1148211481
self.emcc_args += ['-sSPLIT_MODULE', '-Wno-experimental']
@@ -11840,7 +11839,7 @@ def test_no_main_with_PROXY_TO_PTHREAD(self):
1184011839
void foo() {}
1184111840
''')
1184211841
err = self.expect_fail([EMCC, 'lib.cpp', '-pthread', '-sPROXY_TO_PTHREAD'])
11843-
self.assertContained('error: PROXY_TO_PTHREAD proxies main() for you, but no main exists', err)
11842+
self.assertContained('crt1_proxy_main.o: undefined symbol: main', err)
1184411843

1184511844
def test_archive_bad_extension(self):
1184611845
# Regression test for https://github.com/emscripten-core/emscripten/issues/14012
@@ -11882,7 +11881,7 @@ def test_unimplemented_syscalls(self, args):
1188211881
cmd = [EMCC, 'main.c', '-sASSERTIONS'] + args
1188311882
if args:
1188411883
err = self.expect_fail(cmd)
11885-
self.assertContained('error: undefined symbol: __syscall_mincore', err)
11884+
self.assertContained('undefined symbol: __syscall_mincore', err)
1188611885
else:
1188711886
self.run_process(cmd)
1188811887
err = self.run_js('a.out.js')

tools/gen_struct_info.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,6 @@ def inspect_headers(headers, cflags):
269269
'-nostdlib',
270270
compiler_rt,
271271
'-sBOOTSTRAPPING_STRUCT_INFO',
272-
'-sLLD_REPORT_UNDEFINED',
273272
'-sSTRICT',
274273
'-sASSERTIONS=0',
275274
# Use SINGLE_FILE so there is only a single

0 commit comments

Comments
 (0)