3
3
# University of Illinois/NCSA Open Source License. Both these licenses can be
4
4
# found in the LICENSE file.
5
5
6
- import atexit
7
6
import json
8
7
import logging
9
- import multiprocessing
10
8
import os
11
9
import re
12
10
import shlex
13
11
import shutil
14
12
import subprocess
15
13
import sys
16
14
import tempfile
17
- from subprocess import STDOUT , PIPE
15
+ from subprocess import PIPE
18
16
19
17
from . import diagnostics
20
18
from . import response_file
36
34
logger = logging .getLogger ('building' )
37
35
38
36
# Building
39
- multiprocessing_pool = None
40
37
binaryen_checked = False
41
38
42
39
EXPECTED_BINARYEN_VERSION = 100
@@ -77,55 +74,46 @@ def warn_if_duplicate_entries(archive_contents, archive_filename):
77
74
diagnostics .warning ('emcc' , msg )
78
75
79
76
80
- # This function creates a temporary directory specified by the 'dir' field in
81
- # the returned dictionary. Caller is responsible for cleaning up those files
82
- # after done.
83
- def extract_archive_contents (archive_file ):
84
- lines = run_process ([LLVM_AR , 't' , archive_file ], stdout = PIPE ).stdout .splitlines ()
85
- # ignore empty lines
86
- contents = [l for l in lines if len (l )]
87
- if len (contents ) == 0 :
88
- logger .debug ('Archive %s appears to be empty (recommendation: link an .so instead of .a)' % archive_file )
89
- return {
90
- 'returncode' : 0 ,
91
- 'dir' : None ,
92
- 'files' : []
93
- }
94
-
95
- # `ar` files can only contains filenames. Just to be sure, verify that each
96
- # file has only as filename component and is not absolute
97
- for f in contents :
98
- assert not os .path .dirname (f )
99
- assert not os .path .isabs (f )
100
-
101
- warn_if_duplicate_entries (contents , archive_file )
102
-
103
- # create temp dir
104
- temp_dir = tempfile .mkdtemp ('_archive_contents' , 'emscripten_temp_' )
105
-
106
- # extract file in temp dir
107
- proc = run_process ([LLVM_AR , 'xo' , archive_file ], stdout = PIPE , stderr = STDOUT , cwd = temp_dir )
108
- abs_contents = [os .path .join (temp_dir , c ) for c in contents ]
77
+ # Extracts the given list of archive files and outputs their contents
78
+ def extract_archive_contents (archive_files ):
79
+ archive_results = shared .run_multiple_processes ([[LLVM_AR , 't' , a ] for a in archive_files ], pipe_stdout = True )
109
80
110
- # check that all files were created
111
- missing_contents = [x for x in abs_contents if not os .path .exists (x )]
112
- if missing_contents :
113
- exit_with_error ('llvm-ar failed to extract file(s) ' + str (missing_contents ) + ' from archive file ' + f + '! Error:' + str (proc .stdout ))
114
-
115
- return {
116
- 'returncode' : proc .returncode ,
117
- 'dir' : temp_dir ,
118
- 'files' : abs_contents
119
- }
81
+ unpack_temp_dir = tempfile .mkdtemp ('_archive_contents' , 'emscripten_temp_' )
120
82
83
+ def clean_at_exit ():
84
+ try_delete (unpack_temp_dir )
85
+ shared .atexit .register (clean_at_exit )
121
86
122
- def g_multiprocessing_initializer (* args ):
123
- for item in args :
124
- (key , value ) = item .split ('=' , 1 )
125
- if key == 'EMCC_POOL_CWD' :
126
- os .chdir (value )
127
- else :
128
- os .environ [key ] = value
87
+ archive_contents = []
88
+
89
+ for i in range (len (archive_results )):
90
+ a = archive_results [i ]
91
+ contents = [l for l in a .splitlines () if len (l )]
92
+ if len (contents ) == 0 :
93
+ logger .debug ('Archive %s appears to be empty (recommendation: link an .so instead of .a)' % a )
94
+
95
+ # `ar` files can only contains filenames. Just to be sure, verify that each
96
+ # file has only as filename component and is not absolute
97
+ for f in contents :
98
+ assert not os .path .dirname (f )
99
+ assert not os .path .isabs (f )
100
+
101
+ warn_if_duplicate_entries (contents , a )
102
+
103
+ archive_contents += [{
104
+ 'archive_name' : archive_files [i ],
105
+ 'o_files' : [os .path .join (unpack_temp_dir , c ) for c in contents ]
106
+ }]
107
+
108
+ shared .run_multiple_processes ([[LLVM_AR , 'xo' , a ] for a in archive_files ], cwd = unpack_temp_dir )
109
+
110
+ # check that all files were created
111
+ for a in archive_contents :
112
+ missing_contents = [x for x in a ['o_files' ] if not os .path .exists (x )]
113
+ if missing_contents :
114
+ exit_with_error ('llvm-ar failed to extract file(s) ' + str (missing_contents ) + ' from archive file ' + f + '!' )
115
+
116
+ return archive_contents
129
117
130
118
131
119
def unique_ordered (values ):
@@ -152,74 +140,6 @@ def clear():
152
140
_is_ar_cache .clear ()
153
141
154
142
155
- def get_num_cores ():
156
- return int (os .environ .get ('EMCC_CORES' , multiprocessing .cpu_count ()))
157
-
158
-
159
- # Multiprocessing pools are very slow to build up and tear down, and having
160
- # several pools throughout the application has a problem of overallocating
161
- # child processes. Therefore maintain a single centralized pool that is shared
162
- # between all pooled task invocations.
163
- def get_multiprocessing_pool ():
164
- global multiprocessing_pool
165
- if not multiprocessing_pool :
166
- cores = get_num_cores ()
167
-
168
- # If running with one core only, create a mock instance of a pool that does not
169
- # actually spawn any new subprocesses. Very useful for internal debugging.
170
- if cores == 1 :
171
- class FakeMultiprocessor (object ):
172
- def map (self , func , tasks , * args , ** kwargs ):
173
- results = []
174
- for t in tasks :
175
- results += [func (t )]
176
- return results
177
-
178
- def map_async (self , func , tasks , * args , ** kwargs ):
179
- class Result :
180
- def __init__ (self , func , tasks ):
181
- self .func = func
182
- self .tasks = tasks
183
-
184
- def get (self , timeout ):
185
- results = []
186
- for t in tasks :
187
- results += [func (t )]
188
- return results
189
-
190
- return Result (func , tasks )
191
-
192
- multiprocessing_pool = FakeMultiprocessor ()
193
- else :
194
- child_env = [
195
- # Multiprocessing pool children must have their current working
196
- # directory set to a safe path that is guaranteed not to die in
197
- # between of executing commands, or otherwise the pool children will
198
- # have trouble spawning subprocesses of their own.
199
- 'EMCC_POOL_CWD=' + path_from_root (),
200
- # Multiprocessing pool children can't spawn their own linear number of
201
- # children, that could cause a quadratic amount of spawned processes.
202
- 'EMCC_CORES=1'
203
- ]
204
- multiprocessing_pool = multiprocessing .Pool (processes = cores , initializer = g_multiprocessing_initializer , initargs = child_env )
205
-
206
- def close_multiprocessing_pool ():
207
- global multiprocessing_pool
208
- try :
209
- # Shut down the pool explicitly, because leaving that for Python to do at process shutdown is buggy and can generate
210
- # noisy "WindowsError: [Error 5] Access is denied" spam which is not fatal.
211
- multiprocessing_pool .terminate ()
212
- multiprocessing_pool .join ()
213
- multiprocessing_pool = None
214
- except OSError as e :
215
- # Mute the "WindowsError: [Error 5] Access is denied" errors, raise all others through
216
- if not (sys .platform .startswith ('win' ) and isinstance (e , WindowsError ) and e .winerror == 5 ):
217
- raise
218
- atexit .register (close_multiprocessing_pool )
219
-
220
- return multiprocessing_pool
221
-
222
-
223
143
# .. but for Popen, we cannot have doublequotes, so provide functionality to
224
144
# remove them when needed.
225
145
def remove_quotes (arg ):
@@ -291,11 +211,19 @@ def llvm_nm_multiple(files):
291
211
# We can issue multiple files in a single llvm-nm calls, but only if those
292
212
# files are all .o or .bc files. Because of llvm-nm output format, we cannot
293
213
# llvm-nm multiple .a files in one call, but those must be individually checked.
294
- if len (llvm_nm_files ) > 1 :
295
- llvm_nm_files = [f for f in files if f .endswith ('.o' ) or f .endswith ('.bc' )]
296
214
297
- if len (llvm_nm_files ) > 0 :
298
- cmd = [LLVM_NM ] + llvm_nm_files
215
+ o_files = [f for f in llvm_nm_files if os .path .splitext (f )[1 ].lower () in ['.o' , '.obj' , '.bc' ]]
216
+ a_files = [f for f in llvm_nm_files if f not in o_files ]
217
+
218
+ # Issue parallel calls for .a files
219
+ if len (a_files ) > 0 :
220
+ results = shared .run_multiple_processes ([[LLVM_NM , a ] for a in a_files ], pipe_stdout = True , check = False )
221
+ for i in range (len (results )):
222
+ nm_cache [a_files [i ]] = parse_symbols (results [i ])
223
+
224
+ # Issue a single batch call for multiple .o files
225
+ if len (o_files ) > 0 :
226
+ cmd = [LLVM_NM ] + o_files
299
227
cmd = get_command_with_possible_response_file (cmd )
300
228
results = run_process (cmd , stdout = PIPE , stderr = PIPE , check = False )
301
229
@@ -319,11 +247,11 @@ def llvm_nm_multiple(files):
319
247
# so loop over the report to extract the results
320
248
# for each individual file.
321
249
322
- filename = llvm_nm_files [0 ]
250
+ filename = o_files [0 ]
323
251
324
252
# When we dispatched more than one file, we must manually parse
325
253
# the file result delimiters (like shown structured above)
326
- if len (llvm_nm_files ) > 1 :
254
+ if len (o_files ) > 1 :
327
255
file_start = 0
328
256
i = 0
329
257
@@ -340,18 +268,11 @@ def llvm_nm_multiple(files):
340
268
341
269
nm_cache [filename ] = parse_symbols (results [file_start :])
342
270
else :
343
- # We only dispatched a single file, we can just parse that directly
344
- # to the output .
271
+ # We only dispatched a single file, so can parse all of the result directly
272
+ # to that file .
345
273
nm_cache [filename ] = parse_symbols (results )
346
274
347
- # Any .a files that have multiple .o files will have hard time parsing. Scan those
348
- # sequentially to confirm. TODO: Move this to use run_multiple_processes()
349
- # when available.
350
- for f in files :
351
- if f not in nm_cache :
352
- nm_cache [f ] = llvm_nm (f )
353
-
354
- return [nm_cache [f ] for f in files ]
275
+ return [nm_cache [f ] if f in nm_cache else ObjectFileInfo (1 , '' ) for f in files ]
355
276
356
277
357
278
def llvm_nm (file ):
@@ -373,25 +294,13 @@ def read_link_inputs(files):
373
294
object_names .append (absolute_path_f )
374
295
375
296
# Archives contain objects, so process all archives first in parallel to obtain the object files in them.
376
- pool = get_multiprocessing_pool ()
377
- object_names_in_archives = pool .map (extract_archive_contents , archive_names )
378
-
379
- def clean_temporary_archive_contents_directory (directory ):
380
- def clean_at_exit ():
381
- try_delete (directory )
382
- if directory :
383
- atexit .register (clean_at_exit )
384
-
385
- for n in range (len (archive_names )):
386
- if object_names_in_archives [n ]['returncode' ] != 0 :
387
- raise Exception ('llvm-ar failed on archive ' + archive_names [n ] + '!' )
388
- ar_contents [archive_names [n ]] = object_names_in_archives [n ]['files' ]
389
- clean_temporary_archive_contents_directory (object_names_in_archives [n ]['dir' ])
390
-
391
- for o in object_names_in_archives :
392
- for f in o ['files' ]:
393
- if f not in nm_cache :
394
- object_names .append (f )
297
+ archive_contents = extract_archive_contents (archive_names )
298
+
299
+ for a in archive_contents :
300
+ ar_contents [os .path .abspath (a ['archive_name' ])] = a ['o_files' ]
301
+ for o in a ['o_files' ]:
302
+ if o not in nm_cache :
303
+ object_names .append (o )
395
304
396
305
# Next, extract symbols from all object files (either standalone or inside archives we just extracted)
397
306
# The results are not used here directly, but populated to llvm-nm cache structure.
0 commit comments