Skip to content

When compiling, use a single clang command for all input sources. NFC #20713

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 65 additions & 46 deletions emcc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1332,9 +1332,6 @@ def run(args):
logger.debug('stopping after compile phase')
for flag in state.link_flags:
diagnostics.warning('unused-command-line-argument', "argument unused during compilation: '%s'" % flag[1])
for f in linker_inputs:
diagnostics.warning('unused-command-line-argument', "%s: linker input file unused because linking not done" % f[1])

return 0

# We have now passed the compile phase, allow reading/writing of all settings.
Expand Down Expand Up @@ -1627,8 +1624,6 @@ def phase_setup(options, state, newargs):
elif '-M' in newargs or '-MM' in newargs:
options.default_object_extension = '.mout' # not bitcode, not js; but just dependency rule of the input file

if options.output_file and len(input_files) > 1:
exit_with_error('cannot specify -o with -c/-S/-E/-M and multiple source files')
else:
for arg in state.orig_args:
if any(arg.startswith(f) for f in COMPILE_ONLY_FLAGS):
Expand Down Expand Up @@ -3030,6 +3025,19 @@ def get_full_import_name(name):
return target, wasm_target


def get_clang_output_extension(state):
if '-emit-llvm' in state.orig_args:
if state.has_dash_S:
return '.ll'
else:
return '.bc'

if state.has_dash_S:
return '.s'
else:
return '.o'


@ToolchainProfiler.profile_block('compile inputs')
def phase_compile_inputs(options, state, newargs, input_files):
def is_link_flag(flag):
Expand Down Expand Up @@ -3064,40 +3072,62 @@ def get_language_mode(args):
language_mode = get_language_mode(newargs)
use_cxx = 'c++' in language_mode or run_via_emxx

def get_clang_command(src_file):
return compiler + get_cflags(state.orig_args, use_cxx) + compile_args + [src_file]
def get_clang_command():
return compiler + get_cflags(state.orig_args, use_cxx) + compile_args

def get_clang_command_preprocessed(src_file):
return compiler + get_clang_flags(state.orig_args) + compile_args + [src_file]
def get_clang_command_preprocessed():
return compiler + get_clang_flags(state.orig_args) + compile_args

def get_clang_command_asm(src_file):
return compiler + get_target_flags() + compile_args + [src_file]
def get_clang_command_asm():
return compiler + get_target_flags() + compile_args

# preprocessor-only (-E) support
if state.mode == Mode.PREPROCESS_ONLY:
for input_file in [x[1] for x in input_files]:
cmd = get_clang_command(input_file)
if options.output_file:
cmd += ['-o', options.output_file]
# Do not compile, but just output the result from preprocessing stage or
# output the dependency rule. Warning: clang and gcc behave differently
# with -MF! (clang seems to not recognize it)
logger.debug(('just preprocessor ' if state.has_dash_E else 'just dependencies: ') + ' '.join(cmd))
shared.check_call(cmd)
inputs = [i[1] for i in input_files]
cmd = get_clang_command() + inputs
if options.output_file:
cmd += ['-o', options.output_file]
# Do not compile, but just output the result from preprocessing stage or
# output the dependency rule. Warning: clang and gcc behave differently
# with -MF! (clang seems to not recognize it)
logger.debug(('just preprocessor ' if state.has_dash_E else 'just dependencies: ') + ' '.join(cmd))
shared.check_call(cmd)
return []

# Precompiled headers support
if state.mode == Mode.PCH:
headers = [header for _, header in input_files]
for header in headers:
inputs = [i[1] for i in input_files]
for header in inputs:
if not shared.suffix(header) in HEADER_ENDINGS:
exit_with_error(f'cannot mix precompiled headers with non-header inputs: {headers} : {header}')
cmd = get_clang_command(header)
if options.output_file:
cmd += ['-o', options.output_file]
logger.debug(f"running (for precompiled headers): {cmd[0]} {' '.join(cmd[1:])}")
shared.check_call(cmd)
return []
exit_with_error(f'cannot mix precompiled headers with non-header inputs: {inputs} : {header}')
cmd = get_clang_command() + inputs
if options.output_file:
cmd += ['-o', options.output_file]
logger.debug(f"running (for precompiled headers): {cmd[0]} {' '.join(cmd[1:])}")
shared.check_call(cmd)
return []

if state.mode == Mode.COMPILE_ONLY:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this limited to compile-only mode? Wouldn't it work for compile-and-link too?

Copy link
Collaborator Author

@sbc100 sbc100 Nov 14, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It compile-and-link mode it more complicated because we don't want to leave any object files in the current working directory. If we could find a way to extent this later to cover all cases that would be awesome but for now we want control over the output filenames in that case (just like the clang driver internally does).

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought we could just pass the args to Clang so e.g. emcc foo.c bar.c -o output.js becomes clang foo.c bar.c -o output.wasm but I guess it's not that simple?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the problem is that we don't really on clang as out linker-driver, we do it outselves. We run wam-ld and then a bunch of post-link tools from emcc.py.

If we could convert to using clang as the linker driver (i.e. have clang run wasm-ld for us) that would a really good step in the right direction.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In other words, when linking we currently manage the names and locations of the temporary object files, and not clang. Maybe we could improve on this in the future.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense. Leaving as a future improvement sgtm.

inputs = [i[1] for i in input_files]
if all(get_file_suffix(i) in ASSEMBLY_ENDINGS for i in inputs):
cmd = get_clang_command_asm() + inputs
else:
cmd = get_clang_command() + inputs
if options.output_file:
cmd += ['-o', options.output_file]
if get_file_suffix(options.output_file) == '.bc' and not settings.LTO and '-emit-llvm' not in state.orig_args:
diagnostics.warning('emcc', '.bc output file suffix used without -flto or -emit-llvm. Consider using .o extension since emcc will output an object file, not a bitcode file')
shared.check_call(cmd)
if not options.output_file:
# Rename object files to match --default-obj-ext
# TODO: Remove '--default-obj-ext' to reduce this complexity
ext = get_clang_output_extension(state)
if options.default_object_extension != ext:
for i in inputs:
output = unsuffixed_basename(i) + ext
new_output = unsuffixed_basename(i) + options.default_object_extension
move_file(output, new_output)
return []

linker_inputs = []
seen_names = {}
Expand All @@ -3108,32 +3138,21 @@ def uniquename(name):
return unsuffixed(name) + '_' + seen_names[name] + shared.suffix(name)

def get_object_filename(input_file):
if state.mode == Mode.COMPILE_ONLY:
# In compile-only mode we don't use any temp file. The object files
# are written directly to their final output locations.
if options.output_file:
assert len(input_files) == 1
if get_file_suffix(options.output_file) == '.bc' and not settings.LTO and '-emit-llvm' not in state.orig_args:
diagnostics.warning('emcc', '.bc output file suffix used without -flto or -emit-llvm. Consider using .o extension since emcc will output an object file, not a bitcode file')
return options.output_file
else:
return unsuffixed_basename(input_file) + options.default_object_extension
else:
return in_temp(unsuffixed(uniquename(input_file)) + options.default_object_extension)
return in_temp(unsuffixed(uniquename(input_file)) + options.default_object_extension)

def compile_source_file(i, input_file):
logger.debug(f'compiling source file: {input_file}')
output_file = get_object_filename(input_file)
if state.mode not in (Mode.COMPILE_ONLY, Mode.PREPROCESS_ONLY):
linker_inputs.append((i, output_file))
linker_inputs.append((i, output_file))
if get_file_suffix(input_file) in ASSEMBLY_ENDINGS:
cmd = get_clang_command_asm(input_file)
cmd = get_clang_command_asm()
elif get_file_suffix(input_file) in PREPROCESSED_ENDINGS:
cmd = get_clang_command_preprocessed(input_file)
cmd = get_clang_command_preprocessed()
else:
cmd = get_clang_command(input_file)
cmd = get_clang_command()
if get_file_suffix(input_file) in ['.pcm']:
cmd = [c for c in cmd if not c.startswith('-fprebuilt-module-path=')]
cmd += [input_file]
if not state.has_dash_c:
cmd += ['-c']
cmd += ['-o', output_file]
Expand Down
7 changes: 4 additions & 3 deletions test/test_other.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,7 +589,7 @@ def test_multiple_sources(self):
self.clear()
err = self.expect_fail(cmd + ['-o', 'out.o'])

self.assertContained('cannot specify -o with -c/-S/-E/-M and multiple source files', err)
self.assertContained('clang: error: cannot specify -o when generating multiple output files', err)
self.assertNotExists('twopart_main.o')
self.assertNotExists('twopart_side.o')
self.assertNotExists(test_file('twopart_main.o'))
Expand Down Expand Up @@ -5280,10 +5280,11 @@ def test_dashS_stdout(self):
self.assertEqual(os.listdir('.'), [])
self.assertContained('hello_world.c', stdout)

def test_emit_llvm(self):
def test_emit_llvm_asm(self):
self.run_process([EMCC, test_file('hello_world.c'), '-S', '-emit-llvm'])
self.assertIsLLVMAsm('hello_world.ll')

def test_emit_llvm(self):
self.run_process([EMCC, test_file('hello_world.c'), '-c', '-emit-llvm'])
self.assertTrue(building.is_bitcode('hello_world.bc'))

Expand Down Expand Up @@ -11414,7 +11415,7 @@ def test_linker_flags_unused(self):
def test_linker_input_unused(self):
self.run_process([EMXX, '-c', test_file('hello_world.cpp')])
err = self.run_process([EMCC, 'hello_world.o', '-c', '-o', 'out.o'], stderr=PIPE).stderr
self.assertContained("warning: hello_world.o: linker input file unused because linking not done [-Wunused-command-line-argument", err)
self.assertContained("clang: warning: hello_world.o: 'linker' input unused [-Wunused-command-line-argument]", err)
# In this case the compiler does not produce any output file.
self.assertNotExists('out.o')

Expand Down