@@ -49,18 +49,11 @@ def list_to_object_array(list obj):
4949
5050cdef size_t _INIT_VEC_CAP = 32
5151
52- cdef class ObjectVector :
52+ cdef class Vector :
5353
5454 cdef:
5555 size_t n, m
5656 ndarray ao
57- PyObject ** data
58-
59- def __cinit__ (self ):
60- self .n = 0
61- self .m = _INIT_VEC_CAP
62- self .ao = np.empty(_INIT_VEC_CAP, dtype = object )
63- self .data = < PyObject** > self .ao.data
6457
6558 def __len__ (self ):
6659 return self .n
@@ -70,6 +63,18 @@ cdef class ObjectVector:
7063 self .m = self .n
7164 return self .ao
7265
66+
67+ cdef class ObjectVector(Vector):
68+
69+ cdef:
70+ PyObject ** data
71+
72+ def __cinit__ (self ):
73+ self .n = 0
74+ self .m = _INIT_VEC_CAP
75+ self .ao = np.empty(_INIT_VEC_CAP, dtype = object )
76+ self .data = < PyObject** > self .ao.data
77+
7378 cdef inline append(self , object o):
7479 if self .n == self .m:
7580 self .m = max (self .m * 2 , _INIT_VEC_CAP)
@@ -81,11 +86,9 @@ cdef class ObjectVector:
8186 self .n += 1
8287
8388
84- cdef class Int64Vector:
89+ cdef class Int64Vector(Vector) :
8590
8691 cdef:
87- size_t n, m
88- ndarray ao
8992 int64_t * data
9093
9194 def __cinit__ (self ):
@@ -94,28 +97,29 @@ cdef class Int64Vector:
9497 self .ao = np.empty(_INIT_VEC_CAP, dtype = np.int64)
9598 self .data = < int64_t* > self .ao.data
9699
97- def __len__ (self ):
98- return self .n
100+ cdef inline uint8_t needs_resize(self ) nogil:
101+ # if we need to resize
102+ return self .n == self .m
99103
100- def to_array (self ):
101- self .ao.resize (self .n )
102- self .m = self .n
103- return self .ao
104+ cdef resize (self ):
105+ self .m = max (self .m * 2 , _INIT_VEC_CAP )
106+ self .ao.resize( self .m)
107+ self .data = < int64_t * > self .ao.data
104108
105- cdef inline append(self , int64_t x):
106- if self .n == self .m:
107- self .m = max (self .m * 2 , _INIT_VEC_CAP)
108- self .ao.resize(self .m)
109- self .data = < int64_t* > self .ao.data
109+ cdef inline void append(self , int64_t x) nogil:
110110
111- self .data[self .n] = x
112- self .n += 1
111+ with nogil:
112+
113+ if self .needs_resize():
114+ with gil:
115+ self .resize()
113116
114- cdef class Float64Vector:
117+ self .data[self .n] = x
118+ self .n += 1
119+
120+ cdef class Float64Vector(Vector):
115121
116122 cdef:
117- size_t n, m
118- ndarray ao
119123 float64_t * data
120124
121125 def __cinit__ (self ):
@@ -124,14 +128,6 @@ cdef class Float64Vector:
124128 self .ao = np.empty(_INIT_VEC_CAP, dtype = np.float64)
125129 self .data = < float64_t* > self .ao.data
126130
127- def __len__ (self ):
128- return self .n
129-
130- def to_array (self ):
131- self .ao.resize(self .n)
132- self .m = self .n
133- return self .ao
134-
135131 cdef inline append(self , float64_t x):
136132 if self .n == self .m:
137133 self .m = max (self .m * 2 , _INIT_VEC_CAP)
@@ -142,18 +138,17 @@ cdef class Float64Vector:
142138 self .n += 1
143139
144140
145- cdef class HashTable:
146- pass
147-
148-
149- cdef class StringHashTable(HashTable):
141+ cdef class StringHashTable:
150142 cdef kh_str_t * table
151143
152144 def __cinit__ (self , int size_hint = 1 ):
153145 self .table = kh_init_str()
154146 if size_hint is not None :
155147 kh_resize_str(self .table, size_hint)
156148
149+ def __len__ (self ):
150+ return self .table.size
151+
157152 def __dealloc__ (self ):
158153 kh_destroy_str(self .table)
159154
@@ -256,7 +251,7 @@ cdef class StringHashTable(HashTable):
256251
257252 return reverse, labels
258253
259- cdef class Int32HashTable(HashTable) :
254+ cdef class Int32HashTable:
260255 cdef kh_int32_t * table
261256
262257 def __init__ (self , size_hint = 1 ):
@@ -266,6 +261,9 @@ cdef class Int32HashTable(HashTable):
266261 def __cinit__ (self ):
267262 self .table = kh_init_int32()
268263
264+ def __len__ (self ):
265+ return self .table.size
266+
269267 def __dealloc__ (self ):
270268 kh_destroy_int32(self .table)
271269
@@ -353,14 +351,16 @@ cdef class Int32HashTable(HashTable):
353351
354352 return reverse, labels
355353
356- cdef class Int64HashTable: # (HashTable):
357- # cdef kh_int64_t *table
354+ cdef class Int64HashTable:
358355
359356 def __cinit__ (self , size_hint = 1 ):
360357 self .table = kh_init_int64()
361358 if size_hint is not None :
362359 kh_resize_int64(self .table, size_hint)
363360
361+ def __len__ (self ):
362+ return self .table.size
363+
364364 def __dealloc__ (self ):
365365 kh_destroy_int64(self .table)
366366
@@ -369,9 +369,6 @@ cdef class Int64HashTable: #(HashTable):
369369 k = kh_get_int64(self .table, key)
370370 return k != self .table.n_buckets
371371
372- def __len__ (self ):
373- return self .table.size
374-
375372 cpdef get_item(self , int64_t val):
376373 cdef khiter_t k
377374 k = kh_get_int64(self .table, val)
@@ -446,6 +443,7 @@ cdef class Int64HashTable: #(HashTable):
446443 labels = self .get_labels(values, reverse, 0 )
447444 return reverse, labels
448445
446+ @ cython.boundscheck (False )
449447 def get_labels (self , ndarray[int64_t] values , Int64Vector uniques ,
450448 Py_ssize_t count_prior , Py_ssize_t na_sentinel ):
451449 cdef:
@@ -458,21 +456,23 @@ cdef class Int64HashTable: #(HashTable):
458456
459457 labels = np.empty(n, dtype = np.int64)
460458
461- for i in range (n):
462- val = values[i]
463- k = kh_get_int64(self .table, val)
464- if k != self .table.n_buckets:
465- idx = self .table.vals[k]
466- labels[i] = idx
467- else :
468- k = kh_put_int64(self .table, val, & ret)
469- self .table.vals[k] = count
470- uniques.append(val)
471- labels[i] = count
472- count += 1
459+ with nogil:
460+ for i in range (n):
461+ val = values[i]
462+ k = kh_get_int64(self .table, val)
463+ if k != self .table.n_buckets:
464+ idx = self .table.vals[k]
465+ labels[i] = idx
466+ else :
467+ k = kh_put_int64(self .table, val, & ret)
468+ self .table.vals[k] = count
469+ uniques.append(val)
470+ labels[i] = count
471+ count += 1
473472
474473 return labels
475474
475+ @ cython.boundscheck (False )
476476 def get_labels_groupby (self , ndarray[int64_t] values ):
477477 cdef:
478478 Py_ssize_t i, n = len (values)
@@ -485,24 +485,25 @@ cdef class Int64HashTable: #(HashTable):
485485
486486 labels = np.empty(n, dtype = np.int64)
487487
488- for i in range (n):
489- val = values[i]
490-
491- # specific for groupby
492- if val < 0 :
493- labels[i] = - 1
494- continue
495-
496- k = kh_get_int64(self .table, val)
497- if k != self .table.n_buckets:
498- idx = self .table.vals[k]
499- labels[i] = idx
500- else :
501- k = kh_put_int64(self .table, val, & ret)
502- self .table.vals[k] = count
503- uniques.append(val)
504- labels[i] = count
505- count += 1
488+ with nogil:
489+ for i in range (n):
490+ val = values[i]
491+
492+ # specific for groupby
493+ if val < 0 :
494+ labels[i] = - 1
495+ continue
496+
497+ k = kh_get_int64(self .table, val)
498+ if k != self .table.n_buckets:
499+ idx = self .table.vals[k]
500+ labels[i] = idx
501+ else :
502+ k = kh_put_int64(self .table, val, & ret)
503+ self .table.vals[k] = count
504+ uniques.append(val)
505+ labels[i] = count
506+ count += 1
506507
507508 arr_uniques = uniques.to_array()
508509
@@ -530,6 +531,7 @@ cdef class Int64HashTable: #(HashTable):
530531
531532
532533cdef class Float64HashTable(HashTable):
534+
533535 def __cinit__ (self , size_hint = 1 ):
534536 self .table = kh_init_float64()
535537 if size_hint is not None :
@@ -658,7 +660,6 @@ cdef class Float64HashTable(HashTable):
658660na_sentinel = object
659661
660662cdef class PyObjectHashTable(HashTable):
661- # cdef kh_pymap_t *table
662663
663664 def __init__ (self , size_hint = 1 ):
664665 self .table = kh_init_pymap()
0 commit comments