diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index d79887ef533762..b28055b0d16df8 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -281,7 +281,7 @@ extern int _PyStaticCode_Init(PyCodeObject *co); PyAPI_FUNC(_Py_CODEUNIT *) _PyCode_Tier2Warmup(struct _PyInterpreterFrame *, _Py_CODEUNIT *); -extern _Py_CODEUNIT *_PyTier2_GenerateNextBB( +extern _PyTier2BBMetadata *_PyTier2_GenerateNextBB( struct _PyInterpreterFrame *frame, uint16_t bb_id_tagged, _Py_CODEUNIT *curr_executing_instr, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index c97f35a4b42fb6..8bee9dcd1e02f2 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -88,6 +88,7 @@ dummy_func( inst(RESUME, (--)) { if (cframe.use_tracing == 0) { next_instr = _PyCode_Tier2Warmup(frame, next_instr); + DISPATCH(); } // GO_TO_INSTRUCTION(RESUME_QUICK); assert(frame == cframe.current_frame); @@ -3294,57 +3295,80 @@ dummy_func( // Tier 2 instructions // Type propagator assumes this doesn't affect type context inst(BB_BRANCH, (unused/10 --)) { - _Py_CODEUNIT *t2_nextinstr = NULL; _PyBBBranchCache *cache = (_PyBBBranchCache *)next_instr; + _PyTier2BBMetadata *meta = NULL; _Py_CODEUNIT *tier1_fallback = NULL; if (BB_TEST_IS_SUCCESSOR(frame)) { - // Rewrite self - _py_set_opcode(next_instr - 1, BB_BRANCH_IF_FLAG_UNSET); // Generate consequent. - t2_nextinstr = _PyTier2_GenerateNextBB( + meta = _PyTier2_GenerateNextBB( frame, cache->bb_id_tagged, next_instr - 1, 0, &tier1_fallback, frame->bb_test); - if (t2_nextinstr == NULL) { + if (meta == NULL) { // Fall back to tier 1. next_instr = tier1_fallback; DISPATCH(); } + // Rewrite self + _py_set_opcode(next_instr - 1, BB_BRANCH_IF_FLAG_UNSET); + memcpy(cache->consequent_trace, &meta->machine_code, sizeof(uint64_t)); } else { - // Rewrite self - _py_set_opcode(next_instr - 1, BB_BRANCH_IF_FLAG_SET); // Generate alternative. - t2_nextinstr = _PyTier2_GenerateNextBB( + meta = _PyTier2_GenerateNextBB( frame, cache->bb_id_tagged, next_instr - 1, oparg, &tier1_fallback, frame->bb_test); - if (t2_nextinstr == NULL) { + if (meta == NULL) { // Fall back to tier 1. - next_instr = tier1_fallback + oparg; + next_instr = tier1_fallback; DISPATCH(); } + // Rewrite self + _py_set_opcode(next_instr - 1, BB_BRANCH_IF_FLAG_SET); + memcpy(cache->alternative_trace, &meta->machine_code, sizeof(uint64_t)); } - Py_ssize_t forward_jump = t2_nextinstr - next_instr; + Py_ssize_t forward_jump = meta->tier2_start - next_instr; assert((uint16_t)forward_jump == forward_jump); cache->successor_jumpby = (uint16_t)forward_jump; - next_instr = t2_nextinstr; - DISPATCH(); + next_instr = meta->tier2_start; + // Could not generate machine code, fall back to tier 2 instructions. + if (meta->machine_code == NULL) { + DISPATCH(); + } + // The following code is partially adapted from Brandt Bucher's https://github.com/brandtbucher/cpython/blob/justin/Python/bytecodes.c#L2175 + _PyJITReturnCode status = ((_PyJITFunction)(meta->machine_code))(tstate, frame, stack_pointer, next_instr); + frame = cframe.current_frame; + next_instr = frame->prev_instr; + stack_pointer = _PyFrame_GetStackPointer(frame); + switch (status) { + case _JUSTIN_RETURN_DEOPT: + NEXTOPARG(); + opcode = _PyOpcode_Deopt[opcode]; + DISPATCH_GOTO(); + case _JUSTIN_RETURN_OK: + DISPATCH(); + case _JUSTIN_RETURN_GOTO_ERROR: + goto error; + } + //Py_UNREACHABLE(); } inst(BB_BRANCH_IF_FLAG_UNSET, (unused/10 --)) { if (!BB_TEST_IS_SUCCESSOR(frame)) { _Py_CODEUNIT *curr = next_instr - 1; - _Py_CODEUNIT *t2_nextinstr = NULL; + _PyTier2BBMetadata *meta = NULL; _PyBBBranchCache *cache = (_PyBBBranchCache *)next_instr; _Py_CODEUNIT *tier1_fallback = NULL; - t2_nextinstr = _PyTier2_GenerateNextBB( + meta = _PyTier2_GenerateNextBB( frame, cache->bb_id_tagged, next_instr - 1, oparg, &tier1_fallback, frame->bb_test); - if (t2_nextinstr == NULL) { + if (meta == NULL) { // Fall back to tier 1. next_instr = tier1_fallback; } - next_instr = t2_nextinstr; + else { + next_instr = meta->tier2_start; + } // Rewrite self _PyTier2_RewriteForwardJump(curr, next_instr); @@ -3368,18 +3392,21 @@ dummy_func( inst(BB_BRANCH_IF_FLAG_SET, (unused/10 --)) { if (BB_TEST_IS_SUCCESSOR(frame)) { _Py_CODEUNIT *curr = next_instr - 1; + _PyTier2BBMetadata *meta = NULL; _Py_CODEUNIT *t2_nextinstr = NULL; _PyBBBranchCache *cache = (_PyBBBranchCache *)next_instr; _Py_CODEUNIT *tier1_fallback = NULL; - t2_nextinstr = _PyTier2_GenerateNextBB( + meta = _PyTier2_GenerateNextBB( frame, cache->bb_id_tagged, next_instr - 1, // v We generate from the tier1 consequent BB, so offset (oparg) is 0. 0, &tier1_fallback, frame->bb_test); - if (t2_nextinstr == NULL) { + if (meta == NULL) { // Fall back to tier 1. next_instr = tier1_fallback; } - next_instr = t2_nextinstr; + else { + next_instr = meta->tier2_start; + } // Rewrite self _PyTier2_RewriteForwardJump(curr, next_instr); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index e92d7a889f30bb..32d15b9cb824de 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -64,6 +64,7 @@ TARGET(RESUME) { if (cframe.use_tracing == 0) { next_instr = _PyCode_Tier2Warmup(frame, next_instr); + DISPATCH(); } // GO_TO_INSTRUCTION(RESUME_QUICK); assert(frame == cframe.current_frame); @@ -4272,57 +4273,82 @@ } TARGET(BB_BRANCH) { - _Py_CODEUNIT *t2_nextinstr = NULL; _PyBBBranchCache *cache = (_PyBBBranchCache *)next_instr; + _PyTier2BBMetadata *meta = NULL; _Py_CODEUNIT *tier1_fallback = NULL; if (BB_TEST_IS_SUCCESSOR(frame)) { - // Rewrite self - _py_set_opcode(next_instr - 1, BB_BRANCH_IF_FLAG_UNSET); // Generate consequent. - t2_nextinstr = _PyTier2_GenerateNextBB( + meta = _PyTier2_GenerateNextBB( frame, cache->bb_id_tagged, next_instr - 1, 0, &tier1_fallback, frame->bb_test); - if (t2_nextinstr == NULL) { + if (meta == NULL) { // Fall back to tier 1. next_instr = tier1_fallback; DISPATCH(); } + // Rewrite self + _py_set_opcode(next_instr - 1, BB_BRANCH_IF_FLAG_UNSET); + memcpy(cache->consequent_trace, &meta->machine_code, sizeof(uint64_t)); } else { - // Rewrite self - _py_set_opcode(next_instr - 1, BB_BRANCH_IF_FLAG_SET); // Generate alternative. - t2_nextinstr = _PyTier2_GenerateNextBB( + meta = _PyTier2_GenerateNextBB( frame, cache->bb_id_tagged, next_instr - 1, oparg, &tier1_fallback, frame->bb_test); - if (t2_nextinstr == NULL) { + if (meta == NULL) { // Fall back to tier 1. - next_instr = tier1_fallback + oparg; + next_instr = tier1_fallback; DISPATCH(); } + // Rewrite self + _py_set_opcode(next_instr - 1, BB_BRANCH_IF_FLAG_SET); + memcpy(cache->alternative_trace, &meta->machine_code, sizeof(uint64_t)); } - Py_ssize_t forward_jump = t2_nextinstr - next_instr; + Py_ssize_t forward_jump = meta->tier2_start - next_instr; assert((uint16_t)forward_jump == forward_jump); cache->successor_jumpby = (uint16_t)forward_jump; - next_instr = t2_nextinstr; + next_instr = meta->tier2_start; + // Could not generate machine code, fall back to tier 2 instructions. + if (meta->machine_code == NULL) { + DISPATCH(); + } + // The following code is partially adapted from Brandt Bucher's https://github.com/brandtbucher/cpython/blob/justin/Python/bytecodes.c#L2175 + _PyJITReturnCode status = ((_PyJITFunction)(uintptr_t)(meta->machine_code))(tstate, frame, stack_pointer, next_instr); + frame = cframe.current_frame; + next_instr = frame->prev_instr; + stack_pointer = _PyFrame_GetStackPointer(frame); + switch (status) { + case _JUSTIN_RETURN_DEOPT: + NEXTOPARG(); + opcode = _PyOpcode_Deopt[opcode]; + DISPATCH_GOTO(); + case _JUSTIN_RETURN_OK: + DISPATCH(); + case _JUSTIN_RETURN_GOTO_ERROR: + goto error; + } + //Py_UNREACHABLE(); + next_instr += 10; DISPATCH(); } TARGET(BB_BRANCH_IF_FLAG_UNSET) { if (!BB_TEST_IS_SUCCESSOR(frame)) { _Py_CODEUNIT *curr = next_instr - 1; - _Py_CODEUNIT *t2_nextinstr = NULL; + _PyTier2BBMetadata *meta = NULL; _PyBBBranchCache *cache = (_PyBBBranchCache *)next_instr; _Py_CODEUNIT *tier1_fallback = NULL; - t2_nextinstr = _PyTier2_GenerateNextBB( + meta = _PyTier2_GenerateNextBB( frame, cache->bb_id_tagged, next_instr - 1, oparg, &tier1_fallback, frame->bb_test); - if (t2_nextinstr == NULL) { + if (meta == NULL) { // Fall back to tier 1. next_instr = tier1_fallback; } - next_instr = t2_nextinstr; + else { + next_instr = meta->tier2_start; + } // Rewrite self _PyTier2_RewriteForwardJump(curr, next_instr); @@ -4346,18 +4372,21 @@ TARGET(BB_BRANCH_IF_FLAG_SET) { if (BB_TEST_IS_SUCCESSOR(frame)) { _Py_CODEUNIT *curr = next_instr - 1; + _PyTier2BBMetadata *meta = NULL; _Py_CODEUNIT *t2_nextinstr = NULL; _PyBBBranchCache *cache = (_PyBBBranchCache *)next_instr; _Py_CODEUNIT *tier1_fallback = NULL; - t2_nextinstr = _PyTier2_GenerateNextBB( + meta = _PyTier2_GenerateNextBB( frame, cache->bb_id_tagged, next_instr - 1, // v We generate from the tier1 consequent BB, so offset (oparg) is 0. 0, &tier1_fallback, frame->bb_test); - if (t2_nextinstr == NULL) { + if (meta == NULL) { // Fall back to tier 1. next_instr = tier1_fallback; } - next_instr = t2_nextinstr; + else { + next_instr = meta->tier2_start; + } // Rewrite self _PyTier2_RewriteForwardJump(curr, next_instr); diff --git a/Python/jit.c b/Python/jit.c index 4952c1974cc2ab..f89c56e6ba2cc5 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -107,6 +107,7 @@ _PyJITFunction _PyJIT_CompileTrace(int size, _Py_CODEUNIT **trace, int *jump_target_trace_offsets, int n_jump_targets) { assert(size > 0); + assert(n_jump_targets > 0); if (!stencils_loaded) { stencils_loaded = 1; for (size_t i = 0; i < Py_ARRAY_LENGTH(stencils); i++) { @@ -137,6 +138,7 @@ _PyJIT_CompileTrace(int size, _Py_CODEUNIT **trace, int *jump_target_trace_offse } unsigned char *head = memory; uintptr_t patches[] = GET_PATCHES(); + uintptr_t patches_entrypoint[] = GET_PATCHES(); //// First, the trampoline: //const Stencil *stencil = &trampoline_stencil; //patches[HOLE_base] = (uintptr_t)head; @@ -147,14 +149,15 @@ _PyJIT_CompileTrace(int size, _Py_CODEUNIT **trace, int *jump_target_trace_offse int seen_jump_targets = 0; // Allocate all the entry point (trampoline) stencils, unsigned char *entry_points = alloc(trampoline_stencil.nbytes * n_jump_targets); + unsigned char *first_entry_point = entry_points; for (int i = 0; i < size; i++) { // For each jump target, create an entry trampoline. if (i == jump_target_trace_offsets[seen_jump_targets]) { seen_jump_targets++; const Stencil *trampoline = &trampoline_stencil; - patches[HOLE_base] = (uintptr_t)entry_points; - patches[HOLE_continue] = (uintptr_t)head; - copy_and_patch(entry_points, trampoline, patches); + patches_entrypoint[HOLE_base] = (uintptr_t)entry_points; + patches_entrypoint[HOLE_continue] = (uintptr_t)head; + copy_and_patch(entry_points, trampoline, patches_entrypoint); entry_points += trampoline->nbytes; } _Py_CODEUNIT *instruction = trace[i]; @@ -171,5 +174,10 @@ _PyJIT_CompileTrace(int size, _Py_CODEUNIT **trace, int *jump_target_trace_offse // Wow, done already? assert(memory + nbytes == head); assert(seen_jump_targets == n_jump_targets); - return (_PyJITFunction)entry_points; + assert(first_entry_point + trampoline_stencil.nbytes * n_jump_targets == entry_points); +#ifdef Py_DEBUG + _PyJITFunction temp = (_PyJITFunction)first_entry_point; + assert(temp); +#endif + return (_PyJITFunction)first_entry_point; } diff --git a/Python/tier2.c b/Python/tier2.c index de93b8ca7dcdfa..f422ef6ee987e6 100644 --- a/Python/tier2.c +++ b/Python/tier2.c @@ -1040,6 +1040,7 @@ allocate_bb_metadata(PyCodeObject *co, _Py_CODEUNIT *tier2_start, } + metadata->machine_code = NULL; metadata->tier2_start = tier2_start; metadata->tier1_end = tier1_end; metadata->type_context = type_context; @@ -2927,9 +2928,9 @@ _PyTier2_GenerateNextBBMeta( * @param jumpby How many instructions to jump by before we start scanning what to generate. * @param tier1_fallback Signals the tier 1 instruction to fall back to should generation fail. * @param bb_flag Whether to genreate consequent or alternative BB. - * @return The next tier 2 instruction to execute. + * @return The BB's metadata */ -_Py_CODEUNIT * +_PyTier2BBMetadata * _PyTier2_GenerateNextBB( _PyInterpreterFrame *frame, uint16_t bb_id_tagged, @@ -2948,7 +2949,7 @@ _PyTier2_GenerateNextBB( if (metadata == NULL) { return NULL; } - return metadata->tier2_start; + return metadata; } /**