Skip to content

Commit de3656f

Browse files
vstinnermcepl
authored andcommitted
bpo-40602: Optimize _Py_hashtable for pointer keys (pythonGH-20051)
Optimize _Py_hashtable_get() and _Py_hashtable_get_entry() for pointer keys: * key_size == sizeof(void*) * hash_func == _Py_hashtable_hash_ptr * compare_func == _Py_hashtable_compare_direct Changes: * Add get_func and get_entry_func members to _Py_hashtable_t * Convert _Py_hashtable_get() and _Py_hashtable_get_entry() functions to static nline functions. * Add specialized get and get entry for pointer keys.
1 parent bc44935 commit de3656f

File tree

2 files changed

+153
-93
lines changed

2 files changed

+153
-93
lines changed

Modules/hashtable.h

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -71,12 +71,17 @@ typedef struct {
7171

7272
/* Forward declaration */
7373
struct _Py_hashtable_t;
74+
typedef struct _Py_hashtable_t _Py_hashtable_t;
7475

75-
typedef Py_uhash_t (*_Py_hashtable_hash_func) (struct _Py_hashtable_t *ht,
76+
typedef Py_uhash_t (*_Py_hashtable_hash_func) (_Py_hashtable_t *ht,
7677
const void *pkey);
77-
typedef int (*_Py_hashtable_compare_func) (struct _Py_hashtable_t *ht,
78+
typedef int (*_Py_hashtable_compare_func) (_Py_hashtable_t *ht,
7879
const void *pkey,
7980
const _Py_hashtable_entry_t *he);
81+
typedef _Py_hashtable_entry_t* (*_Py_hashtable_get_entry_func)(_Py_hashtable_t *ht,
82+
const void *pkey);
83+
typedef int (*_Py_hashtable_get_func) (_Py_hashtable_t *ht,
84+
const void *pkey, void *data);
8085

8186
typedef struct {
8287
/* allocate a memory block */
@@ -88,18 +93,19 @@ typedef struct {
8893

8994

9095
/* _Py_hashtable: table */
91-
92-
typedef struct _Py_hashtable_t {
96+
struct _Py_hashtable_t {
9397
size_t num_buckets;
9498
size_t entries; /* Total number of entries in the table. */
9599
_Py_slist_t *buckets;
96100
size_t key_size;
97101
size_t data_size;
98102

103+
_Py_hashtable_get_func get_func;
104+
_Py_hashtable_get_entry_func get_entry_func;
99105
_Py_hashtable_hash_func hash_func;
100106
_Py_hashtable_compare_func compare_func;
101107
_Py_hashtable_allocator_t alloc;
102-
} _Py_hashtable_t;
108+
};
103109

104110
/* hash a pointer (void*) */
105111
PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_ptr(
@@ -171,10 +177,12 @@ PyAPI_FUNC(int) _Py_hashtable_set(
171177
172178
Don't call directly this function, but use _Py_HASHTABLE_GET_ENTRY()
173179
macro */
174-
PyAPI_FUNC(_Py_hashtable_entry_t*) _Py_hashtable_get_entry(
175-
_Py_hashtable_t *ht,
176-
size_t key_size,
177-
const void *pkey);
180+
static inline _Py_hashtable_entry_t *
181+
_Py_hashtable_get_entry(_Py_hashtable_t *ht, size_t key_size, const void *pkey)
182+
{
183+
assert(key_size == ht->key_size);
184+
return ht->get_entry_func(ht, pkey);
185+
}
178186

179187
#define _Py_HASHTABLE_GET_ENTRY(TABLE, KEY) \
180188
_Py_hashtable_get_entry(TABLE, sizeof(KEY), &(KEY))
@@ -184,12 +192,14 @@ PyAPI_FUNC(_Py_hashtable_entry_t*) _Py_hashtable_get_entry(
184192
exists, return 0 if the entry does not exist.
185193
186194
Don't call directly this function, but use _Py_HASHTABLE_GET() macro */
187-
PyAPI_FUNC(int) _Py_hashtable_get(
188-
_Py_hashtable_t *ht,
189-
size_t key_size,
190-
const void *pkey,
191-
size_t data_size,
192-
void *data);
195+
static inline int
196+
_Py_hashtable_get(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
197+
size_t data_size, void *data)
198+
{
199+
assert(key_size == ht->key_size);
200+
assert(data_size == ht->data_size);
201+
return ht->get_func(ht, pkey, data);
202+
}
193203

194204
#define _Py_HASHTABLE_GET(TABLE, KEY, DATA) \
195205
_Py_hashtable_get(TABLE, sizeof(KEY), &(KEY), sizeof(DATA), &(DATA))

Python/hashtable.c

Lines changed: 128 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,6 @@ Py_uhash_t
108108
_Py_hashtable_hash_ptr(struct _Py_hashtable_t *ht, const void *pkey)
109109
{
110110
void *key;
111-
112111
_Py_HASHTABLE_READ_KEY(ht, pkey, key);
113112
return (Py_uhash_t)_Py_HashPointer(key);
114113
}
@@ -137,60 +136,6 @@ round_size(size_t s)
137136
}
138137

139138

140-
_Py_hashtable_t *
141-
_Py_hashtable_new_full(size_t key_size, size_t data_size,
142-
size_t init_size,
143-
_Py_hashtable_hash_func hash_func,
144-
_Py_hashtable_compare_func compare_func,
145-
_Py_hashtable_allocator_t *allocator)
146-
{
147-
_Py_hashtable_t *ht;
148-
size_t buckets_size;
149-
_Py_hashtable_allocator_t alloc;
150-
151-
if (allocator == NULL) {
152-
alloc.malloc = PyMem_RawMalloc;
153-
alloc.free = PyMem_RawFree;
154-
}
155-
else
156-
alloc = *allocator;
157-
158-
ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
159-
if (ht == NULL)
160-
return ht;
161-
162-
ht->num_buckets = round_size(init_size);
163-
ht->entries = 0;
164-
ht->key_size = key_size;
165-
ht->data_size = data_size;
166-
167-
buckets_size = ht->num_buckets * sizeof(ht->buckets[0]);
168-
ht->buckets = alloc.malloc(buckets_size);
169-
if (ht->buckets == NULL) {
170-
alloc.free(ht);
171-
return NULL;
172-
}
173-
memset(ht->buckets, 0, buckets_size);
174-
175-
ht->hash_func = hash_func;
176-
ht->compare_func = compare_func;
177-
ht->alloc = alloc;
178-
return ht;
179-
}
180-
181-
182-
_Py_hashtable_t *
183-
_Py_hashtable_new(size_t key_size, size_t data_size,
184-
_Py_hashtable_hash_func hash_func,
185-
_Py_hashtable_compare_func compare_func)
186-
{
187-
return _Py_hashtable_new_full(key_size, data_size,
188-
HASHTABLE_MIN_SIZE,
189-
hash_func, compare_func,
190-
NULL);
191-
}
192-
193-
194139
size_t
195140
_Py_hashtable_size(_Py_hashtable_t *ht)
196141
{
@@ -250,23 +195,20 @@ _Py_hashtable_print_stats(_Py_hashtable_t *ht)
250195

251196

252197
_Py_hashtable_entry_t *
253-
_Py_hashtable_get_entry(_Py_hashtable_t *ht,
254-
size_t key_size, const void *pkey)
198+
_Py_hashtable_get_entry_generic(_Py_hashtable_t *ht, const void *pkey)
255199
{
256-
Py_uhash_t key_hash;
257-
size_t index;
258-
_Py_hashtable_entry_t *entry;
259-
260-
assert(key_size == ht->key_size);
261-
262-
key_hash = ht->hash_func(ht, pkey);
263-
index = key_hash & (ht->num_buckets - 1);
264-
265-
for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
266-
if (entry->key_hash == key_hash && ht->compare_func(ht, pkey, entry))
200+
Py_uhash_t key_hash = ht->hash_func(ht, pkey);
201+
size_t index = key_hash & (ht->num_buckets - 1);
202+
_Py_hashtable_entry_t *entry = entry = TABLE_HEAD(ht, index);
203+
while (1) {
204+
if (entry == NULL) {
205+
return NULL;
206+
}
207+
if (entry->key_hash == key_hash && ht->compare_func(ht, pkey, entry)) {
267208
break;
209+
}
210+
entry = ENTRY_NEXT(entry);
268211
}
269-
270212
return entry;
271213
}
272214

@@ -323,7 +265,7 @@ _Py_hashtable_set(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
323265
/* Don't write the assertion on a single line because it is interesting
324266
to know the duplicated entry if the assertion failed. The entry can
325267
be read using a debugger. */
326-
entry = _Py_hashtable_get_entry(ht, key_size, pkey);
268+
entry = ht->get_entry_func(ht, pkey);
327269
assert(entry == NULL);
328270
#endif
329271

@@ -351,18 +293,62 @@ _Py_hashtable_set(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
351293

352294

353295
int
354-
_Py_hashtable_get(_Py_hashtable_t *ht, size_t key_size,const void *pkey,
355-
size_t data_size, void *data)
296+
_Py_hashtable_get_generic(_Py_hashtable_t *ht, const void *pkey, void *data)
356297
{
357-
_Py_hashtable_entry_t *entry;
358-
359298
assert(data != NULL);
299+
_Py_hashtable_entry_t *entry = ht->get_entry_func(ht, pkey);
300+
if (entry != NULL) {
301+
ENTRY_READ_PDATA(ht, entry, ht->data_size, data);
302+
return 1;
303+
}
304+
else {
305+
return 0;
306+
}
307+
}
360308

361-
entry = _Py_hashtable_get_entry(ht, key_size, pkey);
362-
if (entry == NULL)
309+
310+
// Specialized for:
311+
// key_size == sizeof(void*)
312+
// hash_func == _Py_hashtable_hash_ptr
313+
// compare_func == _Py_hashtable_compare_direct
314+
_Py_hashtable_entry_t *
315+
_Py_hashtable_get_entry_ptr(_Py_hashtable_t *ht, const void *pkey)
316+
{
317+
Py_uhash_t key_hash = _Py_hashtable_hash_ptr(ht, pkey);
318+
size_t index = key_hash & (ht->num_buckets - 1);
319+
_Py_hashtable_entry_t *entry = entry = TABLE_HEAD(ht, index);
320+
while (1) {
321+
if (entry == NULL) {
322+
return NULL;
323+
}
324+
if (entry->key_hash == key_hash) {
325+
const void *pkey2 = _Py_HASHTABLE_ENTRY_PKEY(entry);
326+
if (memcmp(pkey, pkey2, sizeof(void*)) == 0) {
327+
break;
328+
}
329+
}
330+
entry = ENTRY_NEXT(entry);
331+
}
332+
return entry;
333+
}
334+
335+
336+
// Specialized for:
337+
// key_size == sizeof(void*)
338+
// hash_func == _Py_hashtable_hash_ptr
339+
// compare_func == _Py_hashtable_compare_direct
340+
int
341+
_Py_hashtable_get_ptr(_Py_hashtable_t *ht, const void *pkey, void *data)
342+
{
343+
assert(data != NULL);
344+
_Py_hashtable_entry_t *entry = _Py_hashtable_get_entry_ptr(ht, pkey);
345+
if (entry != NULL) {
346+
ENTRY_READ_PDATA(ht, entry, ht->data_size, data);
347+
return 1;
348+
}
349+
else {
363350
return 0;
364-
ENTRY_READ_PDATA(ht, entry, data_size, data);
365-
return 1;
351+
}
366352
}
367353

368354

@@ -453,6 +439,70 @@ hashtable_rehash(_Py_hashtable_t *ht)
453439
}
454440

455441

442+
_Py_hashtable_t *
443+
_Py_hashtable_new_full(size_t key_size, size_t data_size,
444+
size_t init_size,
445+
_Py_hashtable_hash_func hash_func,
446+
_Py_hashtable_compare_func compare_func,
447+
_Py_hashtable_allocator_t *allocator)
448+
{
449+
_Py_hashtable_t *ht;
450+
size_t buckets_size;
451+
_Py_hashtable_allocator_t alloc;
452+
453+
if (allocator == NULL) {
454+
alloc.malloc = PyMem_Malloc;
455+
alloc.free = PyMem_Free;
456+
}
457+
else {
458+
alloc = *allocator;
459+
}
460+
461+
ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
462+
if (ht == NULL)
463+
return ht;
464+
465+
ht->num_buckets = round_size(init_size);
466+
ht->entries = 0;
467+
ht->key_size = key_size;
468+
ht->data_size = data_size;
469+
470+
buckets_size = ht->num_buckets * sizeof(ht->buckets[0]);
471+
ht->buckets = alloc.malloc(buckets_size);
472+
if (ht->buckets == NULL) {
473+
alloc.free(ht);
474+
return NULL;
475+
}
476+
memset(ht->buckets, 0, buckets_size);
477+
478+
ht->get_func = _Py_hashtable_get_generic;
479+
ht->get_entry_func = _Py_hashtable_get_entry_generic;
480+
ht->hash_func = hash_func;
481+
ht->compare_func = compare_func;
482+
ht->alloc = alloc;
483+
if (ht->key_size == sizeof(void*)
484+
&& ht->hash_func == _Py_hashtable_hash_ptr
485+
&& ht->compare_func == _Py_hashtable_compare_direct)
486+
{
487+
ht->get_func = _Py_hashtable_get_ptr;
488+
ht->get_entry_func = _Py_hashtable_get_entry_ptr;
489+
}
490+
return ht;
491+
}
492+
493+
494+
_Py_hashtable_t *
495+
_Py_hashtable_new(size_t key_size, size_t data_size,
496+
_Py_hashtable_hash_func hash_func,
497+
_Py_hashtable_compare_func compare_func)
498+
{
499+
return _Py_hashtable_new_full(key_size, data_size,
500+
HASHTABLE_MIN_SIZE,
501+
hash_func, compare_func,
502+
NULL);
503+
}
504+
505+
456506
void
457507
_Py_hashtable_clear(_Py_hashtable_t *ht)
458508
{

0 commit comments

Comments
 (0)