Unmove unique-implementation (review jreback)

pandas-dev · jreback · Oct 18, 2018 · Oct 3, 2018 · Sep 27, 2018 · Sep 30, 2018
commit b1705a995b561b9f36f0edab10f3f1bb3984b606
diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -415,30 +415,6 @@ cdef class {{name}}HashTable(HashTable):
 
         return uniques.to_array(), np.asarray(labels)
 
-    @cython.boundscheck(False)
-    @cython.wraparound(False)
-    def _unique_no_inverse(self, const {{dtype}}_t[:] values):
-        # define separate functions without inverse for performance
-        cdef:
-           Py_ssize_t i, n = len(values)
-           int ret = 0
-           {{dtype}}_t val
-           khiter_t k
-           {{name}}Vector uniques = {{name}}Vector()
-           {{name}}VectorData *ud
-        ud = uniques.data
-        with nogil:
-            for i in range(n):
-                val = values[i]
-                k = kh_get_{{dtype}}(self.table, val)
-                if k == self.table.n_buckets:
-                    kh_put_{{dtype}}(self.table, val, &ret)
-                    if needs_resize(ud):
-                        with gil:
-                            uniques.resize()
-                    append_data_{{dtype}}(ud, val)
-        return uniques.to_array()
-
     def unique(self, const {{dtype}}_t[:] values, bint return_inverse=False):
         if return_inverse:
             return self._unique_with_inverse(values, uniques={{name}}Vector(),
@@ -503,6 +479,30 @@ cdef class {{name}}HashTable(HashTable):
 
         return np.asarray(labels), arr_uniques
 
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def _unique_no_inverse(self, const {{dtype}}_t[:] values):
+        # define separate functions without inverse for performance
+        cdef:
+           Py_ssize_t i, n = len(values)
+           int ret = 0
+           {{dtype}}_t val
+           khiter_t k
+           {{name}}Vector uniques = {{name}}Vector()
+           {{name}}VectorData *ud
+        ud = uniques.data
+        with nogil:
+            for i in range(n):
+                val = values[i]
+                k = kh_get_{{dtype}}(self.table, val)
+                if k == self.table.n_buckets:
+                    kh_put_{{dtype}}(self.table, val, &ret)
+                    if needs_resize(ud):
+                        with gil:
+                            uniques.resize()
+                    append_data_{{dtype}}(ud, val)
+        return uniques.to_array()
+
 {{endfor}}
 
 
@@ -582,6 +582,41 @@ cdef class StringHashTable(HashTable):
         free(vecs)
         return labels
 
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def _unique_no_inverse(self, ndarray[object] values):
+        # define separate functions without inverse for performance
+        cdef:
+            Py_ssize_t i, count, n = len(values)
+            int64_t[:] uindexer
+            int ret = 0
+            object val
+            ObjectVector uniques
+            khiter_t k
+            const char *v
+            const char **vecs
+        vecs = <const char **> malloc(n * sizeof(char *))
+        uindexer = np.empty(n, dtype=np.int64)
+        for i in range(n):
+            val = values[i]
+            v = util.get_c_string(val)
+            vecs[i] = v
+        count = 0
+        with nogil:
+            for i in range(n):
+                v = vecs[i]
+                k = kh_get_str(self.table, v)
+                if k == self.table.n_buckets:
+                    kh_put_str(self.table, v, &ret)
+                    uindexer[count] = i
+                    count += 1
+        free(vecs)
+        # uniques
+        uniques = ObjectVector()
+        for i in range(count):
+            uniques.append(values[uindexer[i]])
+        return uniques.to_array()
+
     @cython.boundscheck(False)
     def lookup(self, ndarray[object] values):
         cdef:
@@ -705,41 +740,6 @@ cdef class StringHashTable(HashTable):
 
         return uniques.to_array(), np.asarray(labels)
 
-    @cython.boundscheck(False)
-    @cython.wraparound(False)
-    def _unique_no_inverse(self, ndarray[object] values):
-        # define separate functions without inverse for performance
-        cdef:
-            Py_ssize_t i, count, n = len(values)
-            int64_t[:] uindexer
-            int ret = 0
-            object val
-            ObjectVector uniques
-            khiter_t k
-            const char *v
-            const char **vecs
-        vecs = <const char **> malloc(n * sizeof(char *))
-        uindexer = np.empty(n, dtype=np.int64)
-        for i in range(n):
-            val = values[i]
-            v = util.get_c_string(val)
-            vecs[i] = v
-        count = 0
-        with nogil:
-            for i in range(n):
-                v = vecs[i]
-                k = kh_get_str(self.table, v)
-                if k == self.table.n_buckets:
-                    kh_put_str(self.table, v, &ret)
-                    uindexer[count] = i
-                    count += 1
-        free(vecs)
-        # uniques
-        uniques = ObjectVector()
-        for i in range(count):
-            uniques.append(values[uindexer[i]])
-        return uniques.to_array()
-
     def unique(self, ndarray[object] values, bint return_inverse=False):
         if return_inverse:
             return self._unique_with_inverse(values, uniques=ObjectVector(),
@@ -845,6 +845,25 @@ cdef class PyObjectHashTable(HashTable):
 
         return np.asarray(locs)
 
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def _unique_no_inverse(self, ndarray[object] values):
+        # define separate functions without inverse for performance
+        cdef:
+            Py_ssize_t i, n = len(values)
+            int ret = 0
+            object val
+            khiter_t k
+            ObjectVector uniques = ObjectVector()
+        for i in range(n):
+            val = values[i]
+            hash(val)
+            k = kh_get_pymap(self.table, <PyObject*>val)
+            if k == self.table.n_buckets:
+                kh_put_pymap(self.table, <PyObject*>val, &ret)
+                uniques.append(val)
+        return uniques.to_array()
+
     @cython.boundscheck(False)
     @cython.wraparound(False)
     def _unique_with_inverse(self, ndarray[object] values,
@@ -886,25 +905,6 @@ cdef class PyObjectHashTable(HashTable):
 
         return uniques.to_array(), np.asarray(labels)
 
-    @cython.boundscheck(False)
-    @cython.wraparound(False)
-    def _unique_no_inverse(self, ndarray[object] values):
-        # define separate functions without inverse for performance
-        cdef:
-            Py_ssize_t i, n = len(values)
-            int ret = 0
-            object val
-            khiter_t k
-            ObjectVector uniques = ObjectVector()
-        for i in range(n):
-            val = values[i]
-            hash(val)
-            k = kh_get_pymap(self.table, <PyObject*>val)
-            if k == self.table.n_buckets:
-                kh_put_pymap(self.table, <PyObject*>val, &ret)
-                uniques.append(val)
-        return uniques.to_array()
-
     def unique(self, ndarray[object] values, bint return_inverse=False):
         if return_inverse:
             return self._unique_with_inverse(values, uniques=ObjectVector(),