Skip to content

Commit 5118592

Browse files
authoredAug 14, 2024
pythonGH-113464: Speed up JIT builds (pythonGH-122839)
1 parent 6ae942f commit 5118592

File tree

2 files changed

+19
-5
lines changed

2 files changed

+19
-5
lines changed
 

‎Tools/jit/_targets.py

+15-3
Original file line numberDiff line numberDiff line change
@@ -182,15 +182,27 @@ async def _compile(
182182

183183
async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]:
184184
generated_cases = PYTHON_EXECUTOR_CASES_C_H.read_text()
185-
opnames = sorted(re.findall(r"\n {8}case (\w+): \{\n", generated_cases))
185+
cases_and_opnames = sorted(
186+
re.findall(
187+
r"\n {8}(case (\w+): \{\n.*?\n {8}\})", generated_cases, flags=re.DOTALL
188+
)
189+
)
186190
tasks = []
187191
with tempfile.TemporaryDirectory() as tempdir:
188192
work = pathlib.Path(tempdir).resolve()
189193
async with asyncio.TaskGroup() as group:
190194
coro = self._compile("trampoline", TOOLS_JIT / "trampoline.c", work)
191195
tasks.append(group.create_task(coro, name="trampoline"))
192-
for opname in opnames:
193-
coro = self._compile(opname, TOOLS_JIT_TEMPLATE_C, work)
196+
template = TOOLS_JIT_TEMPLATE_C.read_text()
197+
for case, opname in cases_and_opnames:
198+
# Write out a copy of the template with *only* this case
199+
# inserted. This is about twice as fast as #include'ing all
200+
# of executor_cases.c.h each time we compile (since the C
201+
# compiler wastes a bunch of time parsing the dead code for
202+
# all of the other cases):
203+
c = work / f"{opname}.c"
204+
c.write_text(template.replace("CASE", case))
205+
coro = self._compile(opname, c, work)
194206
tasks.append(group.create_task(coro, name=opname))
195207
return {task.get_name(): task.result() for task in tasks}
196208

‎Tools/jit/template.c

+4-2
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,8 @@ do { \
8484
#undef WITHIN_STACK_BOUNDS
8585
#define WITHIN_STACK_BOUNDS() 1
8686

87+
#define TIER_TWO 2
88+
8789
_Py_CODEUNIT *
8890
_JIT_ENTRY(_PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate)
8991
{
@@ -107,9 +109,9 @@ _JIT_ENTRY(_PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState
107109
OPT_STAT_INC(uops_executed);
108110
UOP_STAT_INC(uopcode, execution_count);
109111

110-
// The actual instruction definitions (only one will be used):
111112
switch (uopcode) {
112-
#include "executor_cases.c.h"
113+
// The actual instruction definition gets inserted here:
114+
CASE
113115
default:
114116
Py_UNREACHABLE();
115117
}

0 commit comments

Comments
 (0)