@@ -182,15 +182,27 @@ async def _compile(
182
182
183
183
async def _build_stencils (self ) -> dict [str , _stencils .StencilGroup ]:
184
184
generated_cases = PYTHON_EXECUTOR_CASES_C_H .read_text ()
185
- opnames = sorted (re .findall (r"\n {8}case (\w+): \{\n" , generated_cases ))
185
+ cases_and_opnames = sorted (
186
+ re .findall (
187
+ r"\n {8}(case (\w+): \{\n.*?\n {8}\})" , generated_cases , flags = re .DOTALL
188
+ )
189
+ )
186
190
tasks = []
187
191
with tempfile .TemporaryDirectory () as tempdir :
188
192
work = pathlib .Path (tempdir ).resolve ()
189
193
async with asyncio .TaskGroup () as group :
190
194
coro = self ._compile ("trampoline" , TOOLS_JIT / "trampoline.c" , work )
191
195
tasks .append (group .create_task (coro , name = "trampoline" ))
192
- for opname in opnames :
193
- coro = self ._compile (opname , TOOLS_JIT_TEMPLATE_C , work )
196
+ template = TOOLS_JIT_TEMPLATE_C .read_text ()
197
+ for case , opname in cases_and_opnames :
198
+ # Write out a copy of the template with *only* this case
199
+ # inserted. This is about twice as fast as #include'ing all
200
+ # of executor_cases.c.h each time we compile (since the C
201
+ # compiler wastes a bunch of time parsing the dead code for
202
+ # all of the other cases):
203
+ c = work / f"{ opname } .c"
204
+ c .write_text (template .replace ("CASE" , case ))
205
+ coro = self ._compile (opname , c , work )
194
206
tasks .append (group .create_task (coro , name = opname ))
195
207
return {task .get_name (): task .result () for task in tasks }
196
208
0 commit comments