Skip to content

Commit 4768ece

Browse files
feat: execute jitted code in BB_BRANCH (python#65)
* refactor: return BB metadata from BB creation function * feat: add execute jitted code in BB_BRANCH * nit: disable JIT debug * nit: Removed debugging comment in ceval_macros.h * nit: Removed indirection warning in jit.py --------- Co-authored-by: Jules <julia.poo.poo.poo@gmail.com>
1 parent 2d04e2e commit 4768ece

File tree

5 files changed

+112
-47
lines changed

5 files changed

+112
-47
lines changed

Include/internal/pycore_code.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ extern int _PyStaticCode_Init(PyCodeObject *co);
281281

282282
PyAPI_FUNC(_Py_CODEUNIT *) _PyCode_Tier2Warmup(struct _PyInterpreterFrame *,
283283
_Py_CODEUNIT *);
284-
extern _Py_CODEUNIT *_PyTier2_GenerateNextBB(
284+
extern _PyTier2BBMetadata *_PyTier2_GenerateNextBB(
285285
struct _PyInterpreterFrame *frame,
286286
uint16_t bb_id_tagged,
287287
_Py_CODEUNIT *curr_executing_instr,

Python/bytecodes.c

Lines changed: 47 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ dummy_func(
8888
inst(RESUME, (--)) {
8989
if (cframe.use_tracing == 0) {
9090
next_instr = _PyCode_Tier2Warmup(frame, next_instr);
91+
DISPATCH();
9192
}
9293
// GO_TO_INSTRUCTION(RESUME_QUICK);
9394
assert(frame == cframe.current_frame);
@@ -3294,57 +3295,80 @@ dummy_func(
32943295
// Tier 2 instructions
32953296
// Type propagator assumes this doesn't affect type context
32963297
inst(BB_BRANCH, (unused/10 --)) {
3297-
_Py_CODEUNIT *t2_nextinstr = NULL;
32983298
_PyBBBranchCache *cache = (_PyBBBranchCache *)next_instr;
3299+
_PyTier2BBMetadata *meta = NULL;
32993300
_Py_CODEUNIT *tier1_fallback = NULL;
33003301
if (BB_TEST_IS_SUCCESSOR(frame)) {
3301-
// Rewrite self
3302-
_py_set_opcode(next_instr - 1, BB_BRANCH_IF_FLAG_UNSET);
33033302
// Generate consequent.
3304-
t2_nextinstr = _PyTier2_GenerateNextBB(
3303+
meta = _PyTier2_GenerateNextBB(
33053304
frame, cache->bb_id_tagged, next_instr - 1,
33063305
0, &tier1_fallback, frame->bb_test);
3307-
if (t2_nextinstr == NULL) {
3306+
if (meta == NULL) {
33083307
// Fall back to tier 1.
33093308
next_instr = tier1_fallback;
33103309
DISPATCH();
33113310
}
3311+
// Rewrite self
3312+
_py_set_opcode(next_instr - 1, BB_BRANCH_IF_FLAG_UNSET);
3313+
memcpy(cache->consequent_trace, &meta->machine_code, sizeof(uint64_t));
33123314
}
33133315
else {
3314-
// Rewrite self
3315-
_py_set_opcode(next_instr - 1, BB_BRANCH_IF_FLAG_SET);
33163316
// Generate alternative.
3317-
t2_nextinstr = _PyTier2_GenerateNextBB(
3317+
meta = _PyTier2_GenerateNextBB(
33183318
frame, cache->bb_id_tagged, next_instr - 1,
33193319
oparg, &tier1_fallback, frame->bb_test);
3320-
if (t2_nextinstr == NULL) {
3320+
if (meta == NULL) {
33213321
// Fall back to tier 1.
3322-
next_instr = tier1_fallback + oparg;
3322+
next_instr = tier1_fallback;
33233323
DISPATCH();
33243324
}
3325+
// Rewrite self
3326+
_py_set_opcode(next_instr - 1, BB_BRANCH_IF_FLAG_SET);
3327+
memcpy(cache->alternative_trace, &meta->machine_code, sizeof(uint64_t));
33253328
}
3326-
Py_ssize_t forward_jump = t2_nextinstr - next_instr;
3329+
Py_ssize_t forward_jump = meta->tier2_start - next_instr;
33273330
assert((uint16_t)forward_jump == forward_jump);
33283331
cache->successor_jumpby = (uint16_t)forward_jump;
3329-
next_instr = t2_nextinstr;
3330-
DISPATCH();
3332+
next_instr = meta->tier2_start;
3333+
// Could not generate machine code, fall back to tier 2 instructions.
3334+
if (meta->machine_code == NULL) {
3335+
DISPATCH();
3336+
}
3337+
// The following code is partially adapted from Brandt Bucher's https://github.com/brandtbucher/cpython/blob/justin/Python/bytecodes.c#L2175
3338+
_PyJITReturnCode status = ((_PyJITFunction)(meta->machine_code))(tstate, frame, stack_pointer, next_instr);
3339+
frame = cframe.current_frame;
3340+
next_instr = frame->prev_instr;
3341+
stack_pointer = _PyFrame_GetStackPointer(frame);
3342+
switch (status) {
3343+
case _JUSTIN_RETURN_DEOPT:
3344+
NEXTOPARG();
3345+
opcode = _PyOpcode_Deopt[opcode];
3346+
DISPATCH_GOTO();
3347+
case _JUSTIN_RETURN_OK:
3348+
DISPATCH();
3349+
case _JUSTIN_RETURN_GOTO_ERROR:
3350+
goto error;
3351+
}
3352+
//Py_UNREACHABLE();
33313353
}
33323354

33333355
inst(BB_BRANCH_IF_FLAG_UNSET, (unused/10 --)) {
33343356
if (!BB_TEST_IS_SUCCESSOR(frame)) {
33353357
_Py_CODEUNIT *curr = next_instr - 1;
3336-
_Py_CODEUNIT *t2_nextinstr = NULL;
3358+
_PyTier2BBMetadata *meta = NULL;
33373359
_PyBBBranchCache *cache = (_PyBBBranchCache *)next_instr;
33383360
_Py_CODEUNIT *tier1_fallback = NULL;
33393361

3340-
t2_nextinstr = _PyTier2_GenerateNextBB(
3362+
meta = _PyTier2_GenerateNextBB(
33413363
frame, cache->bb_id_tagged, next_instr - 1,
33423364
oparg, &tier1_fallback, frame->bb_test);
3343-
if (t2_nextinstr == NULL) {
3365+
if (meta == NULL) {
33443366
// Fall back to tier 1.
33453367
next_instr = tier1_fallback;
33463368
}
3347-
next_instr = t2_nextinstr;
3369+
else {
3370+
next_instr = meta->tier2_start;
3371+
}
33483372

33493373
// Rewrite self
33503374
_PyTier2_RewriteForwardJump(curr, next_instr);
@@ -3368,18 +3392,21 @@ dummy_func(
33683392
inst(BB_BRANCH_IF_FLAG_SET, (unused/10 --)) {
33693393
if (BB_TEST_IS_SUCCESSOR(frame)) {
33703394
_Py_CODEUNIT *curr = next_instr - 1;
3395+
_PyTier2BBMetadata *meta = NULL;
33713396
_Py_CODEUNIT *t2_nextinstr = NULL;
33723397
_PyBBBranchCache *cache = (_PyBBBranchCache *)next_instr;
33733398
_Py_CODEUNIT *tier1_fallback = NULL;
3374-
t2_nextinstr = _PyTier2_GenerateNextBB(
3399+
meta = _PyTier2_GenerateNextBB(
33753400
frame, cache->bb_id_tagged, next_instr - 1,
33763401
// v We generate from the tier1 consequent BB, so offset (oparg) is 0.
33773402
0, &tier1_fallback, frame->bb_test);
3378-
if (t2_nextinstr == NULL) {
3403+
if (meta == NULL) {
33793404
// Fall back to tier 1.
33803405
next_instr = tier1_fallback;
33813406
}
3382-
next_instr = t2_nextinstr;
3407+
else {
3408+
next_instr = meta->tier2_start;
3409+
}
33833410

33843411
// Rewrite self
33853412
_PyTier2_RewriteForwardJump(curr, next_instr);

Python/generated_cases.c.h

Lines changed: 48 additions & 19 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/jit.c

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ _PyJITFunction
107107
_PyJIT_CompileTrace(int size, _Py_CODEUNIT **trace, int *jump_target_trace_offsets, int n_jump_targets)
108108
{
109109
assert(size > 0);
110+
assert(n_jump_targets > 0);
110111
if (!stencils_loaded) {
111112
stencils_loaded = 1;
112113
for (size_t i = 0; i < Py_ARRAY_LENGTH(stencils); i++) {
@@ -137,6 +138,7 @@ _PyJIT_CompileTrace(int size, _Py_CODEUNIT **trace, int *jump_target_trace_offse
137138
}
138139
unsigned char *head = memory;
139140
uintptr_t patches[] = GET_PATCHES();
141+
uintptr_t patches_entrypoint[] = GET_PATCHES();
140142
//// First, the trampoline:
141143
//const Stencil *stencil = &trampoline_stencil;
142144
//patches[HOLE_base] = (uintptr_t)head;
@@ -147,14 +149,15 @@ _PyJIT_CompileTrace(int size, _Py_CODEUNIT **trace, int *jump_target_trace_offse
147149
int seen_jump_targets = 0;
148150
// Allocate all the entry point (trampoline) stencils,
149151
unsigned char *entry_points = alloc(trampoline_stencil.nbytes * n_jump_targets);
152+
unsigned char *first_entry_point = entry_points;
150153
for (int i = 0; i < size; i++) {
151154
// For each jump target, create an entry trampoline.
152155
if (i == jump_target_trace_offsets[seen_jump_targets]) {
153156
seen_jump_targets++;
154157
const Stencil *trampoline = &trampoline_stencil;
155-
patches[HOLE_base] = (uintptr_t)entry_points;
156-
patches[HOLE_continue] = (uintptr_t)head;
157-
copy_and_patch(entry_points, trampoline, patches);
158+
patches_entrypoint[HOLE_base] = (uintptr_t)entry_points;
159+
patches_entrypoint[HOLE_continue] = (uintptr_t)head;
160+
copy_and_patch(entry_points, trampoline, patches_entrypoint);
158161
entry_points += trampoline->nbytes;
159162
}
160163
_Py_CODEUNIT *instruction = trace[i];
@@ -171,5 +174,10 @@ _PyJIT_CompileTrace(int size, _Py_CODEUNIT **trace, int *jump_target_trace_offse
171174
// Wow, done already?
172175
assert(memory + nbytes == head);
173176
assert(seen_jump_targets == n_jump_targets);
174-
return (_PyJITFunction)entry_points;
177+
assert(first_entry_point + trampoline_stencil.nbytes * n_jump_targets == entry_points);
178+
#ifdef Py_DEBUG
179+
_PyJITFunction temp = (_PyJITFunction)first_entry_point;
180+
assert(temp);
181+
#endif
182+
return (_PyJITFunction)first_entry_point;
175183
}

Python/tier2.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1040,6 +1040,7 @@ allocate_bb_metadata(PyCodeObject *co, _Py_CODEUNIT *tier2_start,
10401040

10411041
}
10421042

1043+
metadata->machine_code = NULL;
10431044
metadata->tier2_start = tier2_start;
10441045
metadata->tier1_end = tier1_end;
10451046
metadata->type_context = type_context;
@@ -2927,9 +2928,9 @@ _PyTier2_GenerateNextBBMeta(
29272928
* @param jumpby How many instructions to jump by before we start scanning what to generate.
29282929
* @param tier1_fallback Signals the tier 1 instruction to fall back to should generation fail.
29292930
* @param bb_flag Whether to genreate consequent or alternative BB.
2930-
* @return The next tier 2 instruction to execute.
2931+
* @return The BB's metadata
29312932
*/
2932-
_Py_CODEUNIT *
2933+
_PyTier2BBMetadata *
29332934
_PyTier2_GenerateNextBB(
29342935
_PyInterpreterFrame *frame,
29352936
uint16_t bb_id_tagged,
@@ -2948,7 +2949,7 @@ _PyTier2_GenerateNextBB(
29482949
if (metadata == NULL) {
29492950
return NULL;
29502951
}
2951-
return metadata->tier2_start;
2952+
return metadata;
29522953
}
29532954

29542955
/**

0 commit comments

Comments
 (0)