Skip to content

Commit

Permalink
perf(python): Directly access the key-value pairs of a dict (#1970)
Browse files Browse the repository at this point in the history
## What does this PR do?

In Python, to implement a linear memory structure that stores key-value
pairs, we can traverse them in the order of insertion like accessing an
array. However, Cython does not provide a direct access interface, and
these interfaces are internal in CPython, requiring compatibility work
to use them correctly. Nevertheless, we can still use the`PyDict_Next`
interface to replace the `items` method. Essentially, `items` use
`PyDict_Next` to append to a list. Doing so can reduce the copying
overhead.

## Related issues

## Does this PR introduce any user-facing change?

- [ ] Does this PR introduce any public API change?
- [ ] Does this PR introduce any binary protocol compatibility change?

## Benchmark
For large dict
```
[dict_item] 541 us +- 39 us -> [dict_next]  535 us +- 35 us: 1.00x faster

[dict_item] 119.8 MiB +- 1344.0 KiB -> [dict_next] 118.8 MiB +- 1338.4 KiB: 1.01x faster
```
  • Loading branch information
penguin-wwy authored Dec 7, 2024
1 parent b3f531c commit 3865dcd
Showing 1 changed file with 15 additions and 2 deletions.
17 changes: 15 additions & 2 deletions python/pyfury/_serialization.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ from pyfury.util import is_little_endian
from libc.stdint cimport *
from libcpp.vector cimport vector
from cpython cimport PyObject
from cpython.dict cimport PyDict_Next
from cpython.ref cimport *
from cpython.list cimport PyList_New, PyList_SET_ITEM
from cpython.tuple cimport PyTuple_New, PyTuple_SET_ITEM
Expand Down Expand Up @@ -2049,7 +2050,13 @@ cdef class MapSerializer(Serializer):
buffer.write_varint32(len(value))
cdef ClassInfo key_classinfo
cdef ClassInfo value_classinfo
for k, v in value.items():
cdef int64_t key_addr, value_addr
cdef Py_ssize_t pos = 0
while PyDict_Next(value, &pos, <PyObject **>&key_addr, <PyObject **>&value_addr) != 0:
k = int2obj(key_addr)
Py_INCREF(k)
v = int2obj(value_addr)
Py_INCREF(v)
key_cls = type(k)
if key_cls is str:
buffer.write_int16(NOT_NULL_STRING_FLAG)
Expand Down Expand Up @@ -2122,7 +2129,13 @@ cdef class MapSerializer(Serializer):
cpdef inline xwrite(self, Buffer buffer, o):
cdef dict value = o
buffer.write_varint32(len(value))
for k, v in value.items():
cdef int64_t key_addr, value_addr
cdef Py_ssize_t pos = 0
while PyDict_Next(value, &pos, <PyObject **>&key_addr, <PyObject **>&value_addr) != 0:
k = int2obj(key_addr)
Py_INCREF(k)
v = int2obj(value_addr)
Py_INCREF(v)
self.fury.xserialize_ref(
buffer, k, serializer=self.key_serializer
)
Expand Down

0 comments on commit 3865dcd

Please sign in to comment.