Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-89987: Shrink the BINARY_SUBSCR caches #103022

Merged
merged 6 commits into from
Mar 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions Include/cpython/object.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,18 @@ struct _typeobject {
* It should should be treated as an opaque blob
* by code other than the specializer and interpreter. */
struct _specialization_cache {
// In order to avoid bloating the bytecode with lots of inline caches, the
// members of this structure have a somewhat unique contract. They are set
// by the specialization machinery, and are invalidated by PyType_Modified.
// The rules for using them are as follows:
// - If getitem is non-NULL, then it is the same Python function that
// PyType_Lookup(cls, "__getitem__") would return.
// - If getitem is NULL, then getitem_version is meaningless.
// - If getitem->func_version == getitem_version, then getitem can be called
// with two positional arguments and no keyword arguments, and has neither
// *args nor **kwargs (as required by BINARY_SUBSCR_GETITEM):
PyObject *getitem;
uint32_t getitem_version;
};

/* The *real* layout of a type object when allocated on the heap */
Expand Down
2 changes: 0 additions & 2 deletions Include/internal/pycore_code.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,6 @@ typedef struct {

typedef struct {
uint16_t counter;
uint16_t type_version[2];
uint16_t func_version;
} _PyBinarySubscrCache;

#define INLINE_CACHE_ENTRIES_BINARY_SUBSCR CACHE_ENTRIES(_PyBinarySubscrCache)
Expand Down
2 changes: 1 addition & 1 deletion Include/internal/pycore_opcode.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion Lib/importlib/_bootstrap_external.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,9 @@ def _write_atomic(path, data, mode=0o666):
# Python 3.12a6 3519 (Modify SEND instruction)
# Python 3.12a6 3520 (Remove PREP_RERAISE_STAR, add CALL_INTRINSIC_2)
# Python 3.12a7 3521 (Shrink the LOAD_GLOBAL caches)
# Python 3.12a7 3522 (Removed JUMP_IF_FALSE_OR_POP/JUMP_IF_TRUE_OR_POP)
# Python 3.12a7 3523 (Convert COMPARE_AND_BRANCH back to COMPARE_OP)
# Python 3.12a7 3524 (Shrink the BINARY_SUBSCR caches)

# Python 3.13 will start with 3550

Expand All @@ -452,7 +454,7 @@ def _write_atomic(path, data, mode=0o666):
# Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array
# in PC/launcher.c must also be updated.

MAGIC_NUMBER = (3523).to_bytes(2, 'little') + b'\r\n'
MAGIC_NUMBER = (3524).to_bytes(2, 'little') + b'\r\n'

_RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c

Expand Down
2 changes: 0 additions & 2 deletions Lib/opcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,8 +392,6 @@ def pseudo_op(name, op, real_ops):
},
"BINARY_SUBSCR": {
"counter": 1,
"type_version": 2,
"func_version": 1,
},
"FOR_ITER": {
"counter": 1,
Expand Down
2 changes: 1 addition & 1 deletion Lib/test/test_dis.py
Original file line number Diff line number Diff line change
Expand Up @@ -1108,7 +1108,7 @@ def test_binary_specialize(self):
1 2 LOAD_NAME 0 (a)
4 LOAD_CONST 0 (0)
6 %s
16 RETURN_VALUE
10 RETURN_VALUE
"""
co_list = compile('a[0]', "<list>", "eval")
self.code_quicken(lambda: exec(co_list, {}, {'a': [0]}))
Expand Down
2 changes: 1 addition & 1 deletion Lib/test/test_sys.py
Original file line number Diff line number Diff line change
Expand Up @@ -1556,7 +1556,7 @@ def delx(self): del self.__x
'10P' # PySequenceMethods
'2P' # PyBufferProcs
'6P'
'1P' # Specializer cache
'1PI' # Specializer cache
)
class newstyleclass(object): pass
# Separate block for PyDictKeysObject with 8 keys and 5 entries
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Reduce the number of inline :opcode:`CACHE` entries for
:opcode:`BINARY_SUBSCR`.
10 changes: 10 additions & 0 deletions Objects/typeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,11 @@ PyType_Modified(PyTypeObject *type)

type->tp_flags &= ~Py_TPFLAGS_VALID_VERSION_TAG;
type->tp_version_tag = 0; /* 0 is not a valid version tag */
if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
// This field *must* be invalidated if the type is modified (see the
// comment on struct _specialization_cache):
((PyHeapTypeObject *)type)->_spec_cache.getitem = NULL;
}
}

static void
Expand Down Expand Up @@ -563,6 +568,11 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) {
clear:
type->tp_flags &= ~Py_TPFLAGS_VALID_VERSION_TAG;
type->tp_version_tag = 0; /* 0 is not a valid version tag */
if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
// This field *must* be invalidated if the type is modified (see the
// comment on struct _specialization_cache):
((PyHeapTypeObject *)type)->_spec_cache.getitem = NULL;
}
}

static int
Expand Down
57 changes: 28 additions & 29 deletions Programs/test_frozenmain.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 11 additions & 9 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ dummy_func(
BINARY_SUBSCR_TUPLE_INT,
};

inst(BINARY_SUBSCR, (unused/4, container, sub -- res)) {
inst(BINARY_SUBSCR, (unused/1, container, sub -- res)) {
#if ENABLE_SPECIALIZATION
_PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr;
if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) {
Expand Down Expand Up @@ -339,7 +339,7 @@ dummy_func(
ERROR_IF(err, error);
}

inst(BINARY_SUBSCR_LIST_INT, (unused/4, list, sub -- res)) {
inst(BINARY_SUBSCR_LIST_INT, (unused/1, list, sub -- res)) {
assert(cframe.use_tracing == 0);
DEOPT_IF(!PyLong_CheckExact(sub), BINARY_SUBSCR);
DEOPT_IF(!PyList_CheckExact(list), BINARY_SUBSCR);
Expand All @@ -356,7 +356,7 @@ dummy_func(
Py_DECREF(list);
}

inst(BINARY_SUBSCR_TUPLE_INT, (unused/4, tuple, sub -- res)) {
inst(BINARY_SUBSCR_TUPLE_INT, (unused/1, tuple, sub -- res)) {
assert(cframe.use_tracing == 0);
DEOPT_IF(!PyLong_CheckExact(sub), BINARY_SUBSCR);
DEOPT_IF(!PyTuple_CheckExact(tuple), BINARY_SUBSCR);
Expand All @@ -373,7 +373,7 @@ dummy_func(
Py_DECREF(tuple);
}

inst(BINARY_SUBSCR_DICT, (unused/4, dict, sub -- res)) {
inst(BINARY_SUBSCR_DICT, (unused/1, dict, sub -- res)) {
assert(cframe.use_tracing == 0);
DEOPT_IF(!PyDict_CheckExact(dict), BINARY_SUBSCR);
STAT_INC(BINARY_SUBSCR, hit);
Expand All @@ -389,14 +389,16 @@ dummy_func(
DECREF_INPUTS();
}

inst(BINARY_SUBSCR_GETITEM, (unused/1, type_version/2, func_version/1, container, sub -- unused)) {
inst(BINARY_SUBSCR_GETITEM, (unused/1, container, sub -- unused)) {
PyTypeObject *tp = Py_TYPE(container);
DEOPT_IF(tp->tp_version_tag != type_version, BINARY_SUBSCR);
assert(tp->tp_flags & Py_TPFLAGS_HEAPTYPE);
PyObject *cached = ((PyHeapTypeObject *)tp)->_spec_cache.getitem;
DEOPT_IF(!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE), BINARY_SUBSCR);
PyHeapTypeObject *ht = (PyHeapTypeObject *)tp;
PyObject *cached = ht->_spec_cache.getitem;
DEOPT_IF(cached == NULL, BINARY_SUBSCR);
assert(PyFunction_Check(cached));
PyFunctionObject *getitem = (PyFunctionObject *)cached;
DEOPT_IF(getitem->func_version != func_version, BINARY_SUBSCR);
uint32_t cached_version = ht->_spec_cache.getitem_version;
DEOPT_IF(getitem->func_version != cached_version, BINARY_SUBSCR);
PyCodeObject *code = (PyCodeObject *)getitem->func_code;
assert(code->co_argcount == 2);
DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), BINARY_SUBSCR);
Expand Down
Loading