Skip to content

Commit 05a8bc1

Browse files
authored
bpo-46841: Use inline caching for attribute accesses (GH-31640)
1 parent 65b92cc commit 05a8bc1

File tree

10 files changed

+292
-270
lines changed

10 files changed

+292
-270
lines changed

Include/internal/pycore_code.h

Lines changed: 112 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,8 @@ typedef struct {
2020
uint32_t version;
2121
} _PyAdaptiveEntry;
2222

23-
2423
typedef struct {
25-
uint32_t tp_version;
26-
uint32_t dk_version;
27-
} _PyAttrCache;
28-
29-
typedef struct {
30-
/* Borrowed ref in LOAD_METHOD */
24+
/* Borrowed ref */
3125
PyObject *obj;
3226
} _PyObjectCache;
3327

@@ -51,7 +45,6 @@ typedef struct {
5145
typedef union {
5246
_PyEntryZero zero;
5347
_PyAdaptiveEntry adaptive;
54-
_PyAttrCache attr;
5548
_PyObjectCache obj;
5649
_PyCallCache call;
5750
} SpecializedCacheEntry;
@@ -65,8 +58,7 @@ typedef union {
6558
typedef struct {
6659
_Py_CODEUNIT counter;
6760
_Py_CODEUNIT index;
68-
_Py_CODEUNIT module_keys_version;
69-
_Py_CODEUNIT _m1;
61+
_Py_CODEUNIT module_keys_version[2];
7062
_Py_CODEUNIT builtin_keys_version;
7163
} _PyLoadGlobalCache;
7264

@@ -94,13 +86,32 @@ typedef struct {
9486

9587
typedef struct {
9688
_Py_CODEUNIT counter;
97-
_Py_CODEUNIT type_version;
98-
_Py_CODEUNIT _t1;
89+
_Py_CODEUNIT type_version[2];
9990
_Py_CODEUNIT func_version;
10091
} _PyBinarySubscrCache;
10192

10293
#define INLINE_CACHE_ENTRIES_BINARY_SUBSCR CACHE_ENTRIES(_PyBinarySubscrCache)
10394

95+
typedef struct {
96+
_Py_CODEUNIT counter;
97+
_Py_CODEUNIT version[2];
98+
_Py_CODEUNIT index;
99+
} _PyAttrCache;
100+
101+
#define INLINE_CACHE_ENTRIES_LOAD_ATTR CACHE_ENTRIES(_PyAttrCache)
102+
103+
#define INLINE_CACHE_ENTRIES_STORE_ATTR CACHE_ENTRIES(_PyAttrCache)
104+
105+
typedef struct {
106+
_Py_CODEUNIT counter;
107+
_Py_CODEUNIT type_version[2];
108+
_Py_CODEUNIT dict_offset;
109+
_Py_CODEUNIT keys_version[2];
110+
_Py_CODEUNIT descr[4];
111+
} _PyLoadMethodCache;
112+
113+
#define INLINE_CACHE_ENTRIES_LOAD_METHOD CACHE_ENTRIES(_PyLoadMethodCache)
114+
104115
/* Maximum size of code to quicken, in code units. */
105116
#define MAX_SIZE_TO_QUICKEN 5000
106117

@@ -328,10 +339,13 @@ cache_backoff(_PyAdaptiveEntry *entry) {
328339

329340
/* Specialization functions */
330341

331-
extern int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
332-
extern int _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
342+
extern int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr,
343+
PyObject *name);
344+
extern int _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr,
345+
PyObject *name);
333346
extern int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name);
334-
extern int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
347+
extern int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr,
348+
PyObject *name);
335349
extern int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr);
336350
extern int _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub, _Py_CODEUNIT *instr);
337351
extern int _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr, int nargs,
@@ -416,34 +430,107 @@ extern PyObject* _Py_GetSpecializationStats(void);
416430
#ifdef WORDS_BIGENDIAN
417431

418432
static inline void
419-
write32(uint16_t *p, uint32_t val)
433+
write_u32(uint16_t *p, uint32_t val)
420434
{
421-
p[0] = val >> 16;
422-
p[1] = (uint16_t)val;
435+
p[0] = (uint16_t)(val >> 16);
436+
p[1] = (uint16_t)(val >> 0);
437+
}
438+
439+
static inline void
440+
write_u64(uint16_t *p, uint64_t val)
441+
{
442+
p[0] = (uint16_t)(val >> 48);
443+
p[1] = (uint16_t)(val >> 32);
444+
p[2] = (uint16_t)(val >> 16);
445+
p[3] = (uint16_t)(val >> 0);
423446
}
424447

425448
static inline uint32_t
426-
read32(uint16_t *p)
449+
read_u32(uint16_t *p)
450+
{
451+
uint32_t val = 0;
452+
val |= (uint32_t)p[0] << 16;
453+
val |= (uint32_t)p[1] << 0;
454+
return val;
455+
}
456+
457+
static inline uint64_t
458+
read_u64(uint16_t *p)
427459
{
428-
return (p[0] << 16) | p[1];
460+
uint64_t val = 0;
461+
val |= (uint64_t)p[0] << 48;
462+
val |= (uint64_t)p[1] << 32;
463+
val |= (uint64_t)p[2] << 16;
464+
val |= (uint64_t)p[3] << 0;
465+
return val;
429466
}
430467

431468
#else
432469

433470
static inline void
434-
write32(uint16_t *p, uint32_t val)
471+
write_u32(uint16_t *p, uint32_t val)
472+
{
473+
p[0] = (uint16_t)(val >> 0);
474+
p[1] = (uint16_t)(val >> 16);
475+
}
476+
477+
static inline void
478+
write_u64(uint16_t *p, uint64_t val)
435479
{
436-
p[0] = (uint16_t)val;
437-
p[1] = val >> 16;
480+
p[0] = (uint16_t)(val >> 0);
481+
p[1] = (uint16_t)(val >> 16);
482+
p[2] = (uint16_t)(val >> 32);
483+
p[3] = (uint16_t)(val >> 48);
438484
}
439485

440486
static inline uint32_t
441-
read32(uint16_t *p)
487+
read_u32(uint16_t *p)
488+
{
489+
uint32_t val = 0;
490+
val |= (uint32_t)p[0] << 0;
491+
val |= (uint32_t)p[1] << 16;
492+
return val;
493+
}
494+
495+
static inline uint64_t
496+
read_u64(uint16_t *p)
497+
{
498+
uint64_t val = 0;
499+
val |= (uint64_t)p[0] << 0;
500+
val |= (uint64_t)p[1] << 16;
501+
val |= (uint64_t)p[2] << 32;
502+
val |= (uint64_t)p[3] << 48;
503+
return val;
504+
}
505+
506+
#endif
507+
508+
static inline void
509+
write_obj(uint16_t *p, PyObject *obj)
442510
{
443-
return p[0] | (p[1] << 16);
511+
uintptr_t val = (uintptr_t)obj;
512+
#if SIZEOF_VOID_P == 8
513+
write_u64(p, val);
514+
#elif SIZEOF_VOID_P == 4
515+
write_u32(p, val);
516+
#else
517+
#error "SIZEOF_VOID_P must be 4 or 8"
518+
#endif
444519
}
445520

521+
static inline PyObject *
522+
read_obj(uint16_t *p)
523+
{
524+
uintptr_t val;
525+
#if SIZEOF_VOID_P == 8
526+
val = read_u64(p);
527+
#elif SIZEOF_VOID_P == 4
528+
val = read_u32(p);
529+
#else
530+
#error "SIZEOF_VOID_P must be 4 or 8"
446531
#endif
532+
return (PyObject *)val;
533+
}
447534

448535
#ifdef __cplusplus
449536
}

Include/opcode.h

Lines changed: 3 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Lib/importlib/_bootstrap_external.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,8 @@ def _write_atomic(path, data, mode=0o666):
390390
# Python 3.11a5 3481 (Use inline cache for BINARY_OP)
391391
# Python 3.11a5 3482 (Use inline caching for UNPACK_SEQUENCE and LOAD_GLOBAL)
392392
# Python 3.11a5 3483 (Use inline caching for COMPARE_OP and BINARY_SUBSCR)
393+
# Python 3.11a5 3484 (Use inline caching for LOAD_ATTR, LOAD_METHOD, and
394+
# STORE_ATTR)
393395

394396
# Python 3.12 will start with magic number 3500
395397

@@ -404,7 +406,7 @@ def _write_atomic(path, data, mode=0o666):
404406
# Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array
405407
# in PC/launcher.c must also be updated.
406408

407-
MAGIC_NUMBER = (3483).to_bytes(2, 'little') + b'\r\n'
409+
MAGIC_NUMBER = (3484).to_bytes(2, 'little') + b'\r\n'
408410
_RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c
409411

410412
_PYCACHE = '__pycache__'

Lib/opcode.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ def jabs_op(name, op, entries=0):
112112
def_op('UNPACK_SEQUENCE', 92, 1) # Number of tuple items
113113
jrel_op('FOR_ITER', 93)
114114
def_op('UNPACK_EX', 94)
115-
name_op('STORE_ATTR', 95) # Index in name list
115+
name_op('STORE_ATTR', 95, 4) # Index in name list
116116
name_op('DELETE_ATTR', 96) # ""
117117
name_op('STORE_GLOBAL', 97) # ""
118118
name_op('DELETE_GLOBAL', 98) # ""
@@ -124,7 +124,7 @@ def jabs_op(name, op, entries=0):
124124
def_op('BUILD_LIST', 103) # Number of list items
125125
def_op('BUILD_SET', 104) # Number of set items
126126
def_op('BUILD_MAP', 105) # Number of dict entries
127-
name_op('LOAD_ATTR', 106) # Index in name list
127+
name_op('LOAD_ATTR', 106, 4) # Index in name list
128128
def_op('COMPARE_OP', 107, 2) # Comparison operator
129129
hascompare.append(107)
130130
name_op('IMPORT_NAME', 108) # Index in name list
@@ -186,7 +186,7 @@ def jabs_op(name, op, entries=0):
186186
def_op('BUILD_CONST_KEY_MAP', 156)
187187
def_op('BUILD_STRING', 157)
188188

189-
name_op('LOAD_METHOD', 160)
189+
name_op('LOAD_METHOD', 160, 10)
190190

191191
def_op('LIST_EXTEND', 162)
192192
def_op('SET_UPDATE', 163)
@@ -301,7 +301,6 @@ def jabs_op(name, op, entries=0):
301301
"LOAD_FAST__LOAD_CONST",
302302
"LOAD_CONST__LOAD_FAST",
303303
"STORE_FAST__STORE_FAST",
304-
"LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE",
305304
]
306305
_specialization_stats = [
307306
"success",

Lib/test/test_dis.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,7 @@ def bug42562():
375375
>> PUSH_EXC_INFO
376376
377377
%3d LOAD_GLOBAL 0 (Exception)
378-
JUMP_IF_NOT_EXC_MATCH 31 (to 62)
378+
JUMP_IF_NOT_EXC_MATCH 35 (to 70)
379379
STORE_FAST 0 (e)
380380
381381
%3d LOAD_FAST 0 (e)
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Use inline caching for :opcode:`LOAD_ATTR`, :opcode:`LOAD_METHOD`, and
2+
:opcode:`STORE_ATTR`.

Programs/test_frozenmain.h

Lines changed: 9 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)