-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
API: New copy / view semantics using Copy-on-Write #46958
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ea0bff8
74981f9
36faac9
ffa24a4
8c00bbd
af5a02c
a64f310
e6c0baa
977cbb8
cc367f5
53a8273
d3d26f4
5360e57
e431bc4
a2d8fde
81f6eae
b2a1428
4b1ccf6
40f2b24
9339f15
358deaf
085d15b
377f789
7dcefe8
773f03f
bfc2117
89c4fff
b2e56ea
fdeb154
c521161
fb297df
9ab9df5
14f753e
f7389b1
764838f
efde1bf
3d1377b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
from collections import defaultdict | ||
import weakref | ||
|
||
cimport cython | ||
from cpython.slice cimport PySlice_GetIndicesEx | ||
|
@@ -674,8 +675,9 @@ cdef class BlockManager: | |
public list axes | ||
public bint _known_consolidated, _is_consolidated | ||
public ndarray _blknos, _blklocs | ||
public list refs | ||
|
||
def __cinit__(self, blocks=None, axes=None, verify_integrity=True): | ||
def __cinit__(self, blocks=None, axes=None, refs=None, verify_integrity=True): | ||
# None as defaults for unpickling GH#42345 | ||
if blocks is None: | ||
# This adds 1-2 microseconds to DataFrame(np.array([])) | ||
|
@@ -687,6 +689,7 @@ cdef class BlockManager: | |
|
||
self.blocks = blocks | ||
self.axes = axes.copy() # copy to make sure we are not remotely-mutable | ||
self.refs = refs | ||
|
||
# Populate known_consolidate, blknos, and blklocs lazily | ||
self._known_consolidated = False | ||
|
@@ -795,12 +798,14 @@ cdef class BlockManager: | |
ndarray blknos, blklocs | ||
|
||
nbs = [] | ||
nrefs = [] | ||
for blk in self.blocks: | ||
nb = blk.getitem_block_index(slobj) | ||
nbs.append(nb) | ||
nrefs.append(weakref.ref(blk)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. any particular reason to make the reference to the blk instead of its array blk.values? will that make a difference in the cases where blk.values is re-set? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good question. I don't remember if there was a specific technical reason to do so, but I think it seemed the easier option (since this is all handled at the BlockManager level). When keeping those references / when checking for references, it would otherwise be an additional level of indirection to check the blk.values instead of blk itself. I suppose this would generally be the same, but indeed except in places where |
||
|
||
new_axes = [self.axes[0], self.axes[1]._getitem_slice(slobj)] | ||
mgr = type(self)(tuple(nbs), new_axes, verify_integrity=False) | ||
mgr = type(self)(tuple(nbs), new_axes, nrefs, verify_integrity=False) | ||
|
||
# We can avoid having to rebuild blklocs/blknos | ||
blklocs = self._blklocs | ||
|
@@ -813,7 +818,7 @@ cdef class BlockManager: | |
def get_slice(self, slobj: slice, axis: int = 0) -> BlockManager: | ||
|
||
if axis == 0: | ||
new_blocks = self._slice_take_blocks_ax0(slobj) | ||
new_blocks, new_refs = self._slice_take_blocks_ax0(slobj) | ||
elif axis == 1: | ||
return self._get_index_slice(slobj) | ||
else: | ||
|
@@ -822,4 +827,4 @@ cdef class BlockManager: | |
new_axes = list(self.axes) | ||
new_axes[axis] = new_axes[axis]._getitem_slice(slobj) | ||
|
||
return type(self)(tuple(new_blocks), new_axes, verify_integrity=False) | ||
return type(self)(tuple(new_blocks), new_axes, new_refs, verify_integrity=False) |
Uh oh!
There was an error while loading. Please reload this page.