Skip to content

Commit

Permalink
feat: execute jitted code in BB_BRANCH (python#65)
Browse files Browse the repository at this point in the history
* refactor: return BB metadata from BB creation function

* feat: add execute jitted code in BB_BRANCH

* nit: disable JIT debug

* nit: Removed debugging comment in ceval_macros.h

* nit: Removed indirection warning in jit.py

---------

Co-authored-by: Jules <julia.poo.poo.poo@gmail.com>
  • Loading branch information
Fidget-Spinner and JuliaPoo authored Jul 13, 2023
1 parent 2d04e2e commit 4768ece
Show file tree
Hide file tree
Showing 5 changed files with 112 additions and 47 deletions.
2 changes: 1 addition & 1 deletion Include/internal/pycore_code.h
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ extern int _PyStaticCode_Init(PyCodeObject *co);

PyAPI_FUNC(_Py_CODEUNIT *) _PyCode_Tier2Warmup(struct _PyInterpreterFrame *,
_Py_CODEUNIT *);
extern _Py_CODEUNIT *_PyTier2_GenerateNextBB(
extern _PyTier2BBMetadata *_PyTier2_GenerateNextBB(
struct _PyInterpreterFrame *frame,
uint16_t bb_id_tagged,
_Py_CODEUNIT *curr_executing_instr,
Expand Down
67 changes: 47 additions & 20 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ dummy_func(
inst(RESUME, (--)) {
if (cframe.use_tracing == 0) {
next_instr = _PyCode_Tier2Warmup(frame, next_instr);
DISPATCH();
}
// GO_TO_INSTRUCTION(RESUME_QUICK);
assert(frame == cframe.current_frame);
Expand Down Expand Up @@ -3294,57 +3295,80 @@ dummy_func(
// Tier 2 instructions
// Type propagator assumes this doesn't affect type context
inst(BB_BRANCH, (unused/10 --)) {
_Py_CODEUNIT *t2_nextinstr = NULL;
_PyBBBranchCache *cache = (_PyBBBranchCache *)next_instr;
_PyTier2BBMetadata *meta = NULL;
_Py_CODEUNIT *tier1_fallback = NULL;
if (BB_TEST_IS_SUCCESSOR(frame)) {
// Rewrite self
_py_set_opcode(next_instr - 1, BB_BRANCH_IF_FLAG_UNSET);
// Generate consequent.
t2_nextinstr = _PyTier2_GenerateNextBB(
meta = _PyTier2_GenerateNextBB(
frame, cache->bb_id_tagged, next_instr - 1,
0, &tier1_fallback, frame->bb_test);
if (t2_nextinstr == NULL) {
if (meta == NULL) {
// Fall back to tier 1.
next_instr = tier1_fallback;
DISPATCH();
}
// Rewrite self
_py_set_opcode(next_instr - 1, BB_BRANCH_IF_FLAG_UNSET);
memcpy(cache->consequent_trace, &meta->machine_code, sizeof(uint64_t));
}
else {
// Rewrite self
_py_set_opcode(next_instr - 1, BB_BRANCH_IF_FLAG_SET);
// Generate alternative.
t2_nextinstr = _PyTier2_GenerateNextBB(
meta = _PyTier2_GenerateNextBB(
frame, cache->bb_id_tagged, next_instr - 1,
oparg, &tier1_fallback, frame->bb_test);
if (t2_nextinstr == NULL) {
if (meta == NULL) {
// Fall back to tier 1.
next_instr = tier1_fallback + oparg;
next_instr = tier1_fallback;
DISPATCH();
}
// Rewrite self
_py_set_opcode(next_instr - 1, BB_BRANCH_IF_FLAG_SET);
memcpy(cache->alternative_trace, &meta->machine_code, sizeof(uint64_t));
}
Py_ssize_t forward_jump = t2_nextinstr - next_instr;
Py_ssize_t forward_jump = meta->tier2_start - next_instr;
assert((uint16_t)forward_jump == forward_jump);
cache->successor_jumpby = (uint16_t)forward_jump;
next_instr = t2_nextinstr;
DISPATCH();
next_instr = meta->tier2_start;
// Could not generate machine code, fall back to tier 2 instructions.
if (meta->machine_code == NULL) {
DISPATCH();
}
// The following code is partially adapted from Brandt Bucher's https://github.com/brandtbucher/cpython/blob/justin/Python/bytecodes.c#L2175
_PyJITReturnCode status = ((_PyJITFunction)(meta->machine_code))(tstate, frame, stack_pointer, next_instr);
frame = cframe.current_frame;
next_instr = frame->prev_instr;
stack_pointer = _PyFrame_GetStackPointer(frame);
switch (status) {
case _JUSTIN_RETURN_DEOPT:
NEXTOPARG();
opcode = _PyOpcode_Deopt[opcode];
DISPATCH_GOTO();
case _JUSTIN_RETURN_OK:
DISPATCH();
case _JUSTIN_RETURN_GOTO_ERROR:
goto error;
}
//Py_UNREACHABLE();
}

inst(BB_BRANCH_IF_FLAG_UNSET, (unused/10 --)) {
if (!BB_TEST_IS_SUCCESSOR(frame)) {
_Py_CODEUNIT *curr = next_instr - 1;
_Py_CODEUNIT *t2_nextinstr = NULL;
_PyTier2BBMetadata *meta = NULL;
_PyBBBranchCache *cache = (_PyBBBranchCache *)next_instr;
_Py_CODEUNIT *tier1_fallback = NULL;

t2_nextinstr = _PyTier2_GenerateNextBB(
meta = _PyTier2_GenerateNextBB(
frame, cache->bb_id_tagged, next_instr - 1,
oparg, &tier1_fallback, frame->bb_test);
if (t2_nextinstr == NULL) {
if (meta == NULL) {
// Fall back to tier 1.
next_instr = tier1_fallback;
}
next_instr = t2_nextinstr;
else {
next_instr = meta->tier2_start;
}

// Rewrite self
_PyTier2_RewriteForwardJump(curr, next_instr);
Expand All @@ -3368,18 +3392,21 @@ dummy_func(
inst(BB_BRANCH_IF_FLAG_SET, (unused/10 --)) {
if (BB_TEST_IS_SUCCESSOR(frame)) {
_Py_CODEUNIT *curr = next_instr - 1;
_PyTier2BBMetadata *meta = NULL;
_Py_CODEUNIT *t2_nextinstr = NULL;
_PyBBBranchCache *cache = (_PyBBBranchCache *)next_instr;
_Py_CODEUNIT *tier1_fallback = NULL;
t2_nextinstr = _PyTier2_GenerateNextBB(
meta = _PyTier2_GenerateNextBB(
frame, cache->bb_id_tagged, next_instr - 1,
// v We generate from the tier1 consequent BB, so offset (oparg) is 0.
0, &tier1_fallback, frame->bb_test);
if (t2_nextinstr == NULL) {
if (meta == NULL) {
// Fall back to tier 1.
next_instr = tier1_fallback;
}
next_instr = t2_nextinstr;
else {
next_instr = meta->tier2_start;
}

// Rewrite self
_PyTier2_RewriteForwardJump(curr, next_instr);
Expand Down
67 changes: 48 additions & 19 deletions Python/generated_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 12 additions & 4 deletions Python/jit.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ _PyJITFunction
_PyJIT_CompileTrace(int size, _Py_CODEUNIT **trace, int *jump_target_trace_offsets, int n_jump_targets)
{
assert(size > 0);
assert(n_jump_targets > 0);
if (!stencils_loaded) {
stencils_loaded = 1;
for (size_t i = 0; i < Py_ARRAY_LENGTH(stencils); i++) {
Expand Down Expand Up @@ -137,6 +138,7 @@ _PyJIT_CompileTrace(int size, _Py_CODEUNIT **trace, int *jump_target_trace_offse
}
unsigned char *head = memory;
uintptr_t patches[] = GET_PATCHES();
uintptr_t patches_entrypoint[] = GET_PATCHES();
//// First, the trampoline:
//const Stencil *stencil = &trampoline_stencil;
//patches[HOLE_base] = (uintptr_t)head;
Expand All @@ -147,14 +149,15 @@ _PyJIT_CompileTrace(int size, _Py_CODEUNIT **trace, int *jump_target_trace_offse
int seen_jump_targets = 0;
// Allocate all the entry point (trampoline) stencils,
unsigned char *entry_points = alloc(trampoline_stencil.nbytes * n_jump_targets);
unsigned char *first_entry_point = entry_points;
for (int i = 0; i < size; i++) {
// For each jump target, create an entry trampoline.
if (i == jump_target_trace_offsets[seen_jump_targets]) {
seen_jump_targets++;
const Stencil *trampoline = &trampoline_stencil;
patches[HOLE_base] = (uintptr_t)entry_points;
patches[HOLE_continue] = (uintptr_t)head;
copy_and_patch(entry_points, trampoline, patches);
patches_entrypoint[HOLE_base] = (uintptr_t)entry_points;
patches_entrypoint[HOLE_continue] = (uintptr_t)head;
copy_and_patch(entry_points, trampoline, patches_entrypoint);
entry_points += trampoline->nbytes;
}
_Py_CODEUNIT *instruction = trace[i];
Expand All @@ -171,5 +174,10 @@ _PyJIT_CompileTrace(int size, _Py_CODEUNIT **trace, int *jump_target_trace_offse
// Wow, done already?
assert(memory + nbytes == head);
assert(seen_jump_targets == n_jump_targets);
return (_PyJITFunction)entry_points;
assert(first_entry_point + trampoline_stencil.nbytes * n_jump_targets == entry_points);
#ifdef Py_DEBUG
_PyJITFunction temp = (_PyJITFunction)first_entry_point;
assert(temp);
#endif
return (_PyJITFunction)first_entry_point;
}
7 changes: 4 additions & 3 deletions Python/tier2.c
Original file line number Diff line number Diff line change
Expand Up @@ -1040,6 +1040,7 @@ allocate_bb_metadata(PyCodeObject *co, _Py_CODEUNIT *tier2_start,

}

metadata->machine_code = NULL;
metadata->tier2_start = tier2_start;
metadata->tier1_end = tier1_end;
metadata->type_context = type_context;
Expand Down Expand Up @@ -2927,9 +2928,9 @@ _PyTier2_GenerateNextBBMeta(
* @param jumpby How many instructions to jump by before we start scanning what to generate.
* @param tier1_fallback Signals the tier 1 instruction to fall back to should generation fail.
* @param bb_flag Whether to genreate consequent or alternative BB.
* @return The next tier 2 instruction to execute.
* @return The BB's metadata
*/
_Py_CODEUNIT *
_PyTier2BBMetadata *
_PyTier2_GenerateNextBB(
_PyInterpreterFrame *frame,
uint16_t bb_id_tagged,
Expand All @@ -2948,7 +2949,7 @@ _PyTier2_GenerateNextBB(
if (metadata == NULL) {
return NULL;
}
return metadata->tier2_start;
return metadata;
}

/**
Expand Down

0 comments on commit 4768ece

Please sign in to comment.