Skip to content

Commit 19957ab

Browse files
authored
No system lib multiprocessing (#13493)
* Drop using python multiprocessing pool. * Change llvm_nm_multiple() to use run_multiple_processes() * flake * fix stdout pipe * flake * Profile block string run_multiple_processes * Remove unnecessary EMCC_SKIP_SANITY_CHECK set * Remove EMCC_CCACHE related code. * Restore bitcode linking. * cleanup * Rewrite bitcode linking to avoid python multiprocessing pool. * Flake
1 parent f1bf33c commit 19957ab

File tree

7 files changed

+194
-227
lines changed

7 files changed

+194
-227
lines changed

ChangeLog.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ See docs/process.md for more on how version tagging works.
2020

2121
Current Trunk
2222
-------------
23+
- Removed use of Python multiprocessing library because of stability issues. Added
24+
new environment variable EM_PYTHON_MULTIPROCESSING=1 that can be enabled
25+
to revert back to using Python multiprocessing. (#13493)
2326
- Binaryen now always inlines single-use functions. This should reduce code size
2427
and improve performance (#13744).
2528
- Fix generating of symbol files with `--emit-symbol-map` for JS targets.

embuilder.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import argparse
1616
import logging
1717
import sys
18+
import time
1819

1920
from tools import shared
2021
from tools import system_libs
@@ -112,6 +113,9 @@ def build_port(port_name):
112113

113114
def main():
114115
global force
116+
117+
all_build_start_time = time.time()
118+
115119
parser = argparse.ArgumentParser(description=__doc__,
116120
formatter_class=argparse.RawDescriptionHelpFormatter,
117121
epilog=get_help())
@@ -166,6 +170,7 @@ def main():
166170
print('Building targets: %s' % ' '.join(tasks))
167171
for what in tasks:
168172
logger.info('building and verifying ' + what)
173+
start_time = time.time()
169174
if what in SYSTEM_LIBRARIES:
170175
library = SYSTEM_LIBRARIES[what]
171176
if force:
@@ -260,7 +265,13 @@ def main():
260265
logger.error('unfamiliar build target: ' + what)
261266
return 1
262267

263-
logger.info('...success')
268+
time_taken = time.time() - start_time
269+
logger.info('...success. Took %s(%.2fs)' % (('%02d:%02d mins ' % (time_taken // 60, time_taken % 60) if time_taken >= 60 else ''), time_taken))
270+
271+
if len(tasks) > 1:
272+
all_build_time_taken = time.time() - all_build_start_time
273+
logger.info('Built %d targets in %s(%.2fs)' % (len(tasks), ('%02d:%02d mins ' % (all_build_time_taken // 60, all_build_time_taken % 60) if all_build_time_taken >= 60 else ''), all_build_time_taken))
274+
264275
return 0
265276

266277

tests/runner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -791,7 +791,7 @@ def get_library(self, name, generated_libs, configure=['sh', './configure'],
791791
configure_args=[], make=['make'], make_args=None,
792792
env_init={}, cache_name_extra='', native=False):
793793
if make_args is None:
794-
make_args = ['-j', str(building.get_num_cores())]
794+
make_args = ['-j', str(shared.get_num_cores())]
795795

796796
build_dir = self.get_build_dir()
797797
output_dir = self.get_dir()

tools/building.py

Lines changed: 62 additions & 153 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,16 @@
33
# University of Illinois/NCSA Open Source License. Both these licenses can be
44
# found in the LICENSE file.
55

6-
import atexit
76
import json
87
import logging
9-
import multiprocessing
108
import os
119
import re
1210
import shlex
1311
import shutil
1412
import subprocess
1513
import sys
1614
import tempfile
17-
from subprocess import STDOUT, PIPE
15+
from subprocess import PIPE
1816

1917
from . import diagnostics
2018
from . import response_file
@@ -36,7 +34,6 @@
3634
logger = logging.getLogger('building')
3735

3836
# Building
39-
multiprocessing_pool = None
4037
binaryen_checked = False
4138

4239
EXPECTED_BINARYEN_VERSION = 100
@@ -77,55 +74,46 @@ def warn_if_duplicate_entries(archive_contents, archive_filename):
7774
diagnostics.warning('emcc', msg)
7875

7976

80-
# This function creates a temporary directory specified by the 'dir' field in
81-
# the returned dictionary. Caller is responsible for cleaning up those files
82-
# after done.
83-
def extract_archive_contents(archive_file):
84-
lines = run_process([LLVM_AR, 't', archive_file], stdout=PIPE).stdout.splitlines()
85-
# ignore empty lines
86-
contents = [l for l in lines if len(l)]
87-
if len(contents) == 0:
88-
logger.debug('Archive %s appears to be empty (recommendation: link an .so instead of .a)' % archive_file)
89-
return {
90-
'returncode': 0,
91-
'dir': None,
92-
'files': []
93-
}
94-
95-
# `ar` files can only contains filenames. Just to be sure, verify that each
96-
# file has only as filename component and is not absolute
97-
for f in contents:
98-
assert not os.path.dirname(f)
99-
assert not os.path.isabs(f)
100-
101-
warn_if_duplicate_entries(contents, archive_file)
102-
103-
# create temp dir
104-
temp_dir = tempfile.mkdtemp('_archive_contents', 'emscripten_temp_')
105-
106-
# extract file in temp dir
107-
proc = run_process([LLVM_AR, 'xo', archive_file], stdout=PIPE, stderr=STDOUT, cwd=temp_dir)
108-
abs_contents = [os.path.join(temp_dir, c) for c in contents]
77+
# Extracts the given list of archive files and outputs their contents
78+
def extract_archive_contents(archive_files):
79+
archive_results = shared.run_multiple_processes([[LLVM_AR, 't', a] for a in archive_files], pipe_stdout=True)
10980

110-
# check that all files were created
111-
missing_contents = [x for x in abs_contents if not os.path.exists(x)]
112-
if missing_contents:
113-
exit_with_error('llvm-ar failed to extract file(s) ' + str(missing_contents) + ' from archive file ' + f + '! Error:' + str(proc.stdout))
114-
115-
return {
116-
'returncode': proc.returncode,
117-
'dir': temp_dir,
118-
'files': abs_contents
119-
}
81+
unpack_temp_dir = tempfile.mkdtemp('_archive_contents', 'emscripten_temp_')
12082

83+
def clean_at_exit():
84+
try_delete(unpack_temp_dir)
85+
shared.atexit.register(clean_at_exit)
12186

122-
def g_multiprocessing_initializer(*args):
123-
for item in args:
124-
(key, value) = item.split('=', 1)
125-
if key == 'EMCC_POOL_CWD':
126-
os.chdir(value)
127-
else:
128-
os.environ[key] = value
87+
archive_contents = []
88+
89+
for i in range(len(archive_results)):
90+
a = archive_results[i]
91+
contents = [l for l in a.splitlines() if len(l)]
92+
if len(contents) == 0:
93+
logger.debug('Archive %s appears to be empty (recommendation: link an .so instead of .a)' % a)
94+
95+
# `ar` files can only contains filenames. Just to be sure, verify that each
96+
# file has only as filename component and is not absolute
97+
for f in contents:
98+
assert not os.path.dirname(f)
99+
assert not os.path.isabs(f)
100+
101+
warn_if_duplicate_entries(contents, a)
102+
103+
archive_contents += [{
104+
'archive_name': archive_files[i],
105+
'o_files': [os.path.join(unpack_temp_dir, c) for c in contents]
106+
}]
107+
108+
shared.run_multiple_processes([[LLVM_AR, 'xo', a] for a in archive_files], cwd=unpack_temp_dir)
109+
110+
# check that all files were created
111+
for a in archive_contents:
112+
missing_contents = [x for x in a['o_files'] if not os.path.exists(x)]
113+
if missing_contents:
114+
exit_with_error('llvm-ar failed to extract file(s) ' + str(missing_contents) + ' from archive file ' + f + '!')
115+
116+
return archive_contents
129117

130118

131119
def unique_ordered(values):
@@ -152,74 +140,6 @@ def clear():
152140
_is_ar_cache.clear()
153141

154142

155-
def get_num_cores():
156-
return int(os.environ.get('EMCC_CORES', multiprocessing.cpu_count()))
157-
158-
159-
# Multiprocessing pools are very slow to build up and tear down, and having
160-
# several pools throughout the application has a problem of overallocating
161-
# child processes. Therefore maintain a single centralized pool that is shared
162-
# between all pooled task invocations.
163-
def get_multiprocessing_pool():
164-
global multiprocessing_pool
165-
if not multiprocessing_pool:
166-
cores = get_num_cores()
167-
168-
# If running with one core only, create a mock instance of a pool that does not
169-
# actually spawn any new subprocesses. Very useful for internal debugging.
170-
if cores == 1:
171-
class FakeMultiprocessor(object):
172-
def map(self, func, tasks, *args, **kwargs):
173-
results = []
174-
for t in tasks:
175-
results += [func(t)]
176-
return results
177-
178-
def map_async(self, func, tasks, *args, **kwargs):
179-
class Result:
180-
def __init__(self, func, tasks):
181-
self.func = func
182-
self.tasks = tasks
183-
184-
def get(self, timeout):
185-
results = []
186-
for t in tasks:
187-
results += [func(t)]
188-
return results
189-
190-
return Result(func, tasks)
191-
192-
multiprocessing_pool = FakeMultiprocessor()
193-
else:
194-
child_env = [
195-
# Multiprocessing pool children must have their current working
196-
# directory set to a safe path that is guaranteed not to die in
197-
# between of executing commands, or otherwise the pool children will
198-
# have trouble spawning subprocesses of their own.
199-
'EMCC_POOL_CWD=' + path_from_root(),
200-
# Multiprocessing pool children can't spawn their own linear number of
201-
# children, that could cause a quadratic amount of spawned processes.
202-
'EMCC_CORES=1'
203-
]
204-
multiprocessing_pool = multiprocessing.Pool(processes=cores, initializer=g_multiprocessing_initializer, initargs=child_env)
205-
206-
def close_multiprocessing_pool():
207-
global multiprocessing_pool
208-
try:
209-
# Shut down the pool explicitly, because leaving that for Python to do at process shutdown is buggy and can generate
210-
# noisy "WindowsError: [Error 5] Access is denied" spam which is not fatal.
211-
multiprocessing_pool.terminate()
212-
multiprocessing_pool.join()
213-
multiprocessing_pool = None
214-
except OSError as e:
215-
# Mute the "WindowsError: [Error 5] Access is denied" errors, raise all others through
216-
if not (sys.platform.startswith('win') and isinstance(e, WindowsError) and e.winerror == 5):
217-
raise
218-
atexit.register(close_multiprocessing_pool)
219-
220-
return multiprocessing_pool
221-
222-
223143
# .. but for Popen, we cannot have doublequotes, so provide functionality to
224144
# remove them when needed.
225145
def remove_quotes(arg):
@@ -291,11 +211,19 @@ def llvm_nm_multiple(files):
291211
# We can issue multiple files in a single llvm-nm calls, but only if those
292212
# files are all .o or .bc files. Because of llvm-nm output format, we cannot
293213
# llvm-nm multiple .a files in one call, but those must be individually checked.
294-
if len(llvm_nm_files) > 1:
295-
llvm_nm_files = [f for f in files if f.endswith('.o') or f.endswith('.bc')]
296214

297-
if len(llvm_nm_files) > 0:
298-
cmd = [LLVM_NM] + llvm_nm_files
215+
o_files = [f for f in llvm_nm_files if os.path.splitext(f)[1].lower() in ['.o', '.obj', '.bc']]
216+
a_files = [f for f in llvm_nm_files if f not in o_files]
217+
218+
# Issue parallel calls for .a files
219+
if len(a_files) > 0:
220+
results = shared.run_multiple_processes([[LLVM_NM, a] for a in a_files], pipe_stdout=True, check=False)
221+
for i in range(len(results)):
222+
nm_cache[a_files[i]] = parse_symbols(results[i])
223+
224+
# Issue a single batch call for multiple .o files
225+
if len(o_files) > 0:
226+
cmd = [LLVM_NM] + o_files
299227
cmd = get_command_with_possible_response_file(cmd)
300228
results = run_process(cmd, stdout=PIPE, stderr=PIPE, check=False)
301229

@@ -319,11 +247,11 @@ def llvm_nm_multiple(files):
319247
# so loop over the report to extract the results
320248
# for each individual file.
321249

322-
filename = llvm_nm_files[0]
250+
filename = o_files[0]
323251

324252
# When we dispatched more than one file, we must manually parse
325253
# the file result delimiters (like shown structured above)
326-
if len(llvm_nm_files) > 1:
254+
if len(o_files) > 1:
327255
file_start = 0
328256
i = 0
329257

@@ -340,18 +268,11 @@ def llvm_nm_multiple(files):
340268

341269
nm_cache[filename] = parse_symbols(results[file_start:])
342270
else:
343-
# We only dispatched a single file, we can just parse that directly
344-
# to the output.
271+
# We only dispatched a single file, so can parse all of the result directly
272+
# to that file.
345273
nm_cache[filename] = parse_symbols(results)
346274

347-
# Any .a files that have multiple .o files will have hard time parsing. Scan those
348-
# sequentially to confirm. TODO: Move this to use run_multiple_processes()
349-
# when available.
350-
for f in files:
351-
if f not in nm_cache:
352-
nm_cache[f] = llvm_nm(f)
353-
354-
return [nm_cache[f] for f in files]
275+
return [nm_cache[f] if f in nm_cache else ObjectFileInfo(1, '') for f in files]
355276

356277

357278
def llvm_nm(file):
@@ -373,25 +294,13 @@ def read_link_inputs(files):
373294
object_names.append(absolute_path_f)
374295

375296
# Archives contain objects, so process all archives first in parallel to obtain the object files in them.
376-
pool = get_multiprocessing_pool()
377-
object_names_in_archives = pool.map(extract_archive_contents, archive_names)
378-
379-
def clean_temporary_archive_contents_directory(directory):
380-
def clean_at_exit():
381-
try_delete(directory)
382-
if directory:
383-
atexit.register(clean_at_exit)
384-
385-
for n in range(len(archive_names)):
386-
if object_names_in_archives[n]['returncode'] != 0:
387-
raise Exception('llvm-ar failed on archive ' + archive_names[n] + '!')
388-
ar_contents[archive_names[n]] = object_names_in_archives[n]['files']
389-
clean_temporary_archive_contents_directory(object_names_in_archives[n]['dir'])
390-
391-
for o in object_names_in_archives:
392-
for f in o['files']:
393-
if f not in nm_cache:
394-
object_names.append(f)
297+
archive_contents = extract_archive_contents(archive_names)
298+
299+
for a in archive_contents:
300+
ar_contents[os.path.abspath(a['archive_name'])] = a['o_files']
301+
for o in a['o_files']:
302+
if o not in nm_cache:
303+
object_names.append(o)
395304

396305
# Next, extract symbols from all object files (either standalone or inside archives we just extracted)
397306
# The results are not used here directly, but populated to llvm-nm cache structure.

0 commit comments

Comments
 (0)