Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-106149: move jump target resolution from optimizer to assembler #106150

Merged
merged 2 commits into from
Jun 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions Include/internal/pycore_compile.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ extern int _PyAST_Optimize(
int ff_features);

typedef struct {
int h_offset;
int h_label;
int h_startdepth;
int h_preserve_lasti;
} _PyCompile_ExceptHandlerInfo;
Expand All @@ -38,6 +38,10 @@ typedef struct {
int i_oparg;
_PyCompilerSrcLocation i_loc;
_PyCompile_ExceptHandlerInfo i_except_handler_info;

/* Used by the assembler */
int i_target;
int i_offset;
} _PyCompile_Instruction;

typedef struct {
Expand Down Expand Up @@ -85,8 +89,6 @@ int _PyCompile_EnsureArrayLargeEnough(

int _PyCompile_ConstCacheMergeOne(PyObject *const_cache, PyObject **obj);

int _PyCompile_InstrSize(int opcode, int oparg);

/* Access compiler internals for unit testing */

PyAPI_FUNC(PyObject*) _PyCompile_CodeGen(
Expand Down
2 changes: 0 additions & 2 deletions Include/internal/pycore_flowgraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,6 @@ typedef struct _PyCfgBasicblock_ {
int b_predecessors;
/* depth of stack upon entry of block, computed by stackdepth() */
int b_startdepth;
/* instruction offset for block, computed by assemble_jump_offsets() */
int b_offset;
/* Basic block is an exception handler that preserves lasti */
unsigned b_preserve_lasti : 1;
/* Used by compiler passes to mark whether they have visited a basic block. */
Expand Down
111 changes: 101 additions & 10 deletions Python/assemble.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "pycore_code.h" // write_location_entry_start()
#include "pycore_compile.h"
#include "pycore_opcode.h" // _PyOpcode_Caches[] and opcode category macros
#include "pycore_opcode_utils.h" // IS_BACKWARDS_JUMP_OPCODE
#include "opcode_metadata.h" // IS_PSEUDO_INSTR


Expand Down Expand Up @@ -34,6 +35,18 @@ same_location(location a, location b)
a.end_col_offset == b.end_col_offset;
}

static int
instr_size(instruction *instr)
{
int opcode = instr->i_opcode;
int oparg = instr->i_oparg;
assert(!IS_PSEUDO_INSTR(opcode));
assert(OPCODE_HAS_ARG(opcode) || oparg == 0);
int extended_args = (0xFFFFFF < oparg) + (0xFFFF < oparg) + (0xFF < oparg);
int caches = _PyOpcode_Caches[opcode];
return extended_args + 1 + caches;
}

struct assembler {
PyObject *a_bytecode; /* bytes containing bytecode */
int a_offset; /* offset into bytecode */
Expand Down Expand Up @@ -118,6 +131,7 @@ assemble_emit_exception_table_item(struct assembler *a, int value, int msb)

static int
assemble_emit_exception_table_entry(struct assembler *a, int start, int end,
int handler_offset,
_PyCompile_ExceptHandlerInfo *handler)
{
Py_ssize_t len = PyBytes_GET_SIZE(a->a_except_table);
Expand All @@ -126,7 +140,7 @@ assemble_emit_exception_table_entry(struct assembler *a, int start, int end,
}
int size = end-start;
assert(end > start);
int target = handler->h_offset;
int target = handler_offset;
int depth = handler->h_startdepth - 1;
if (handler->h_preserve_lasti > 0) {
depth -= 1;
Expand All @@ -145,24 +159,30 @@ assemble_exception_table(struct assembler *a, instr_sequence *instrs)
{
int ioffset = 0;
_PyCompile_ExceptHandlerInfo handler;
handler.h_offset = -1;
handler.h_label = -1;
handler.h_startdepth = -1;
handler.h_preserve_lasti = -1;
int start = -1;
for (int i = 0; i < instrs->s_used; i++) {
instruction *instr = &instrs->s_instrs[i];
if (instr->i_except_handler_info.h_offset != handler.h_offset) {
if (handler.h_offset >= 0) {
if (instr->i_except_handler_info.h_label != handler.h_label) {
if (handler.h_label >= 0) {
int handler_offset = instrs->s_instrs[handler.h_label].i_offset;
RETURN_IF_ERROR(
assemble_emit_exception_table_entry(a, start, ioffset, &handler));
assemble_emit_exception_table_entry(a, start, ioffset,
handler_offset,
&handler));
}
start = ioffset;
handler = instr->i_except_handler_info;
}
ioffset += _PyCompile_InstrSize(instr->i_opcode, instr->i_oparg);
ioffset += instr_size(instr);
}
if (handler.h_offset >= 0) {
RETURN_IF_ERROR(assemble_emit_exception_table_entry(a, start, ioffset, &handler));
if (handler.h_label >= 0) {
int handler_offset = instrs->s_instrs[handler.h_label].i_offset;
RETURN_IF_ERROR(assemble_emit_exception_table_entry(a, start, ioffset,
handler_offset,
&handler));
}
return SUCCESS;
}
Expand Down Expand Up @@ -329,7 +349,7 @@ assemble_location_info(struct assembler *a, instr_sequence *instrs,
loc = instr->i_loc;
size = 0;
}
size += _PyCompile_InstrSize(instr->i_opcode, instr->i_oparg);
size += instr_size(instr);
}
RETURN_IF_ERROR(assemble_emit_location(a, loc, size));
return SUCCESS;
Expand Down Expand Up @@ -385,7 +405,7 @@ assemble_emit_instr(struct assembler *a, instruction *instr)
Py_ssize_t len = PyBytes_GET_SIZE(a->a_bytecode);
_Py_CODEUNIT *code;

int size = _PyCompile_InstrSize(instr->i_opcode, instr->i_oparg);
int size = instr_size(instr);
if (a->a_offset + size >= len / (int)sizeof(_Py_CODEUNIT)) {
if (len > PY_SSIZE_T_MAX / 2) {
return ERROR;
Expand Down Expand Up @@ -585,12 +605,83 @@ makecode(_PyCompile_CodeUnitMetadata *umd, struct assembler *a, PyObject *const_
return co;
}

static int
resolve_jump_offsets(instr_sequence *instrs)
{
/* Compute the size of each instruction and fixup jump args.
* Replace instruction index with position in bytecode.
*/

for (int i = 0; i < instrs->s_used; i++) {
instruction *instr = &instrs->s_instrs[i];
if (OPCODE_HAS_JUMP(instr->i_opcode)) {
instr->i_target = instr->i_oparg;
}
}

int extended_arg_recompile;

do {
int totsize = 0;
for (int i = 0; i < instrs->s_used; i++) {
instruction *instr = &instrs->s_instrs[i];
instr->i_offset = totsize;
int isize = instr_size(instr);
totsize += isize;
}
extended_arg_recompile = 0;

int offset = 0;
for (int i = 0; i < instrs->s_used; i++) {
instruction *instr = &instrs->s_instrs[i];
int isize = instr_size(instr);
/* jump offsets are computed relative to
* the instruction pointer after fetching
* the jump instruction.
*/
offset += isize;
if (OPCODE_HAS_JUMP(instr->i_opcode)) {
instruction *target = &instrs->s_instrs[instr->i_target];
instr->i_oparg = target->i_offset;
if (instr->i_oparg < offset) {
assert(IS_BACKWARDS_JUMP_OPCODE(instr->i_opcode));
instr->i_oparg = offset - instr->i_oparg;
}
else {
assert(!IS_BACKWARDS_JUMP_OPCODE(instr->i_opcode));
instr->i_oparg = instr->i_oparg - offset;
}
if (instr_size(instr) != isize) {
extended_arg_recompile = 1;
}
}
}
/* XXX: This is an awful hack that could hurt performance, but
on the bright side it should work until we come up
with a better solution.

The issue is that in the first loop instr_size() is
called, and it requires i_oparg be set appropriately.
There is a bootstrap problem because i_oparg is
calculated in the second loop above.

So we loop until we stop seeing new EXTENDED_ARGs.
The only EXTENDED_ARGs that could be popping up are
ones in jump instructions. So this should converge
fairly quickly.
*/
} while (extended_arg_recompile);
return SUCCESS;
}

PyCodeObject *
_PyAssemble_MakeCodeObject(_PyCompile_CodeUnitMetadata *umd, PyObject *const_cache,
PyObject *consts, int maxdepth, instr_sequence *instrs,
int nlocalsplus, int code_flags, PyObject *filename)
{
if (resolve_jump_offsets(instrs) < 0) {
return NULL;
}
PyCodeObject *co = NULL;

struct assembler a;
Expand Down
17 changes: 5 additions & 12 deletions Python/compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -131,16 +131,6 @@ enum {
};


int
_PyCompile_InstrSize(int opcode, int oparg)
{
assert(!IS_PSEUDO_INSTR(opcode));
assert(OPCODE_HAS_ARG(opcode) || oparg == 0);
int extended_args = (0xFFFFFF < oparg) + (0xFFFF < oparg) + (0xFF < oparg);
int caches = _PyOpcode_Caches[opcode];
return extended_args + 1 + caches;
}

typedef _PyCompile_Instruction instruction;
typedef _PyCompile_InstructionSequence instr_sequence;

Expand Down Expand Up @@ -7717,17 +7707,20 @@ cfg_to_instr_sequence(cfg_builder *g, instr_sequence *seq)
RETURN_IF_ERROR(instr_sequence_use_label(seq, b->b_label.id));
for (int i = 0; i < b->b_iused; i++) {
cfg_instr *instr = &b->b_instr[i];
if (OPCODE_HAS_JUMP(instr->i_opcode)) {
instr->i_oparg = instr->i_target->b_label.id;
}
RETURN_IF_ERROR(
instr_sequence_addop(seq, instr->i_opcode, instr->i_oparg, instr->i_loc));

_PyCompile_ExceptHandlerInfo *hi = &seq->s_instrs[seq->s_used-1].i_except_handler_info;
if (instr->i_except != NULL) {
hi->h_offset = instr->i_except->b_offset;
hi->h_label = instr->i_except->b_label.id;
hi->h_startdepth = instr->i_except->b_startdepth;
hi->h_preserve_lasti = instr->i_except->b_preserve_lasti;
}
else {
hi->h_offset = -1;
hi->h_label = -1;
}
}
}
Expand Down
80 changes: 2 additions & 78 deletions Python/flowgraph.c
Original file line number Diff line number Diff line change
Expand Up @@ -166,22 +166,6 @@ _PyBasicblock_InsertInstruction(basicblock *block, int pos, cfg_instr *instr) {
return SUCCESS;
}

static int
instr_size(cfg_instr *instruction)
{
return _PyCompile_InstrSize(instruction->i_opcode, instruction->i_oparg);
}

static int
blocksize(basicblock *b)
{
int size = 0;
for (int i = 0; i < b->b_iused; i++) {
size += instr_size(&b->b_instr[i]);
}
return size;
}

/* For debugging purposes only */
#if 0
static void
Expand Down Expand Up @@ -212,9 +196,9 @@ static void
dump_basicblock(const basicblock *b)
{
const char *b_return = basicblock_returns(b) ? "return " : "";
fprintf(stderr, "%d: [EH=%d CLD=%d WRM=%d NO_FT=%d %p] used: %d, depth: %d, offset: %d %s\n",
fprintf(stderr, "%d: [EH=%d CLD=%d WRM=%d NO_FT=%d %p] used: %d, depth: %d, %s\n",
b->b_label.id, b->b_except_handler, b->b_cold, b->b_warm, BB_NO_FALLTHROUGH(b), b, b->b_iused,
b->b_startdepth, b->b_offset, b_return);
b->b_startdepth, b_return);
if (b->b_instr) {
int i;
for (i = 0; i < b->b_iused; i++) {
Expand Down Expand Up @@ -480,71 +464,11 @@ normalize_jumps(_PyCfgBuilder *g)
return SUCCESS;
}

static void
resolve_jump_offsets(basicblock *entryblock)
{
int bsize, totsize, extended_arg_recompile;

/* Compute the size of each block and fixup jump args.
Replace block pointer with position in bytecode. */
do {
totsize = 0;
for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
bsize = blocksize(b);
b->b_offset = totsize;
totsize += bsize;
}
extended_arg_recompile = 0;
for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
bsize = b->b_offset;
for (int i = 0; i < b->b_iused; i++) {
cfg_instr *instr = &b->b_instr[i];
int isize = instr_size(instr);
/* jump offsets are computed relative to
* the instruction pointer after fetching
* the jump instruction.
*/
bsize += isize;
if (is_jump(instr)) {
instr->i_oparg = instr->i_target->b_offset;
if (instr->i_oparg < bsize) {
assert(IS_BACKWARDS_JUMP_OPCODE(instr->i_opcode));
instr->i_oparg = bsize - instr->i_oparg;
}
else {
assert(!IS_BACKWARDS_JUMP_OPCODE(instr->i_opcode));
instr->i_oparg -= bsize;
}
if (instr_size(instr) != isize) {
extended_arg_recompile = 1;
}
}
}
}

/* XXX: This is an awful hack that could hurt performance, but
on the bright side it should work until we come up
with a better solution.

The issue is that in the first loop blocksize() is called
which calls instr_size() which requires i_oparg be set
appropriately. There is a bootstrap problem because
i_oparg is calculated in the second loop above.

So we loop until we stop seeing new EXTENDED_ARGs.
The only EXTENDED_ARGs that could be popping up are
ones in jump instructions. So this should converge
fairly quickly.
*/
} while (extended_arg_recompile);
}

int
_PyCfg_ResolveJumps(_PyCfgBuilder *g)
{
RETURN_IF_ERROR(normalize_jumps(g));
assert(no_redundant_jumps(g));
resolve_jump_offsets(g->g_entryblock);
return SUCCESS;
}

Expand Down