@@ -358,8 +358,7 @@ cdef class {{name}}HashTable(HashTable):
358358 @cython.wraparound(False)
359359 def _unique(self, const {{dtype}}_t[:] values, {{name}}Vector uniques,
360360 Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1,
361- object na_value=None, bint ignore_na=False,
362- bint return_inverse=False):
361+ object na_value=None, bint ignore_na=False):
363362 """
364363 Calculate unique values and labels (no sorting!)
365364
@@ -382,15 +381,12 @@ cdef class {{name}}HashTable(HashTable):
382381 Whether NA-values should be ignored for calculating the uniques. If
383382 True, the labels corresponding to missing values will be set to
384383 na_sentinel.
385- return_inverse : boolean, default False
386- Whether the mapping of the original array values to their location
387- in the vector of uniques should be returned.
388384
389385 Returns
390386 -------
391387 uniques : ndarray[{{dtype}}]
392388 Unique values of input, not sorted
393- labels : ndarray[int64] (if return_inverse=True)
389+ labels : ndarray[int64]
394390 The labels from values to uniques
395391 """
396392 cdef:
@@ -402,8 +398,7 @@ cdef class {{name}}HashTable(HashTable):
402398 {{name}}VectorData *ud
403399 bint use_na_value
404400
405- if return_inverse:
406- labels = np.empty(n, dtype=np.int64)
401+ labels = np.empty(n, dtype=np.int64)
407402 ud = uniques.data
408403 use_na_value = na_value is not None
409404
@@ -440,19 +435,15 @@ cdef class {{name}}HashTable(HashTable):
440435 "Vector.resize() needed")
441436 uniques.resize()
442437 append_data_{{dtype}}(ud, val)
443- if return_inverse:
444- self.table.vals[k] = count
445- labels[i] = count
446- count += 1
447- elif return_inverse:
438+ self.table.vals[k] = count
439+ labels[i] = count
440+ count += 1
441+ else:
448442 # k falls into a previous bucket
449- # only relevant in case we need to construct the inverse
450443 idx = self.table.vals[k]
451444 labels[i] = idx
452445
453- if return_inverse:
454- return uniques.to_array(), np.asarray(labels)
455- return uniques.to_array()
446+ return uniques.to_array(), np.asarray(labels)
456447
457448 def unique(self, const {{dtype}}_t[:] values, bint return_inverse=False):
458449 """
@@ -474,8 +465,10 @@ cdef class {{name}}HashTable(HashTable):
474465 The labels from values to uniques
475466 """
476467 uniques = {{name}}Vector()
477- return self._unique(values, uniques, ignore_na=False,
478- return_inverse=return_inverse)
468+ uniques, inverse = self._unique(values, uniques, ignore_na=False)
469+ if return_inverse:
470+ return uniques, inverse
471+ return uniques
479472
480473 def factorize(self, const {{dtype}}_t[:] values, Py_ssize_t na_sentinel=-1,
481474 object na_value=None):
@@ -507,8 +500,7 @@ cdef class {{name}}HashTable(HashTable):
507500 uniques_vector = {{name}}Vector()
508501 uniques, labels = self._unique(values, uniques_vector,
509502 na_sentinel=na_sentinel,
510- na_value=na_value, ignore_na=True,
511- return_inverse=True)
503+ na_value=na_value, ignore_na=True)
512504 # factorize has reversed outputs compared to _unique
513505 return labels, uniques
514506
@@ -517,7 +509,7 @@ cdef class {{name}}HashTable(HashTable):
517509 object na_value=None):
518510 _, labels = self._unique(values, uniques, count_prior=count_prior,
519511 na_sentinel=na_sentinel, na_value=na_value,
520- ignore_na=True, return_inverse=True )
512+ ignore_na=True)
521513 return labels
522514
523515 @cython.boundscheck(False)
@@ -709,8 +701,7 @@ cdef class StringHashTable(HashTable):
709701 @cython.wraparound(False)
710702 def _unique(self, ndarray[object] values, ObjectVector uniques,
711703 Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1,
712- object na_value=None, bint ignore_na=False,
713- bint return_inverse=False):
704+ object na_value=None, bint ignore_na=False):
714705 """
715706 Calculate unique values and labels (no sorting!)
716707
@@ -733,15 +724,12 @@ cdef class StringHashTable(HashTable):
733724 Whether NA-values should be ignored for calculating the uniques. If
734725 True, the labels corresponding to missing values will be set to
735726 na_sentinel.
736- return_inverse : boolean, default False
737- Whether the mapping of the original array values to their location
738- in the vector of uniques should be returned.
739727
740728 Returns
741729 -------
742730 uniques : ndarray[object]
743731 Unique values of input, not sorted
744- labels : ndarray[int64] (if return_inverse=True)
732+ labels : ndarray[int64]
745733 The labels from values to uniques
746734 """
747735 cdef:
@@ -755,8 +743,7 @@ cdef class StringHashTable(HashTable):
755743 khiter_t k
756744 bint use_na_value
757745
758- if return_inverse:
759- labels = np.zeros(n, dtype=np.int64)
746+ labels = np.zeros(n, dtype=np.int64)
760747 uindexer = np.empty(n, dtype=np.int64)
761748 use_na_value = na_value is not None
762749
@@ -787,13 +774,11 @@ cdef class StringHashTable(HashTable):
787774 # k hasn't been seen yet
788775 k = kh_put_str(self.table, v, &ret)
789776 uindexer[count] = i
790- if return_inverse:
791- self.table.vals[k] = count
792- labels[i] = <int64_t>count
777+ self.table.vals[k] = count
778+ labels[i] = <int64_t>count
793779 count += 1
794- elif return_inverse :
780+ else :
795781 # k falls into a previous bucket
796- # only relevant in case we need to construct the inverse
797782 idx = self.table.vals[k]
798783 labels[i] = <int64_t>idx
799784
@@ -803,9 +788,7 @@ cdef class StringHashTable(HashTable):
803788 for i in range(count):
804789 uniques.append(values[uindexer[i]])
805790
806- if return_inverse:
807- return uniques.to_array(), np.asarray(labels)
808- return uniques.to_array()
791+ return uniques.to_array(), np.asarray(labels)
809792
810793 def unique(self, ndarray[object] values, bint return_inverse=False):
811794 """
@@ -827,8 +810,10 @@ cdef class StringHashTable(HashTable):
827810 The labels from values to uniques
828811 """
829812 uniques = ObjectVector()
830- return self._unique(values, uniques, ignore_na=False,
831- return_inverse=return_inverse)
813+ uniques, inverse = self._unique(values, uniques, ignore_na=False)
814+ if return_inverse:
815+ return uniques, inverse
816+ return uniques
832817
833818 def factorize(self, ndarray[object] values, Py_ssize_t na_sentinel=-1,
834819 object na_value=None):
@@ -860,8 +845,7 @@ cdef class StringHashTable(HashTable):
860845 uniques_vector = ObjectVector()
861846 uniques, labels = self._unique(values, uniques_vector,
862847 na_sentinel=na_sentinel,
863- na_value=na_value, ignore_na=True,
864- return_inverse=True)
848+ na_value=na_value, ignore_na=True)
865849 # factorize has reversed outputs compared to _unique
866850 return labels, uniques
867851
@@ -870,7 +854,7 @@ cdef class StringHashTable(HashTable):
870854 object na_value=None):
871855 _, labels = self._unique(values, uniques, count_prior=count_prior,
872856 na_sentinel=na_sentinel, na_value=na_value,
873- ignore_na=True, return_inverse=True )
857+ ignore_na=True)
874858 return labels
875859
876860
@@ -963,8 +947,7 @@ cdef class PyObjectHashTable(HashTable):
963947 @cython.wraparound(False)
964948 def _unique(self, ndarray[object] values, ObjectVector uniques,
965949 Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1,
966- object na_value=None, bint ignore_na=False,
967- bint return_inverse=False):
950+ object na_value=None, bint ignore_na=False):
968951 """
969952 Calculate unique values and labels (no sorting!)
970953
@@ -987,15 +970,12 @@ cdef class PyObjectHashTable(HashTable):
987970 Whether NA-values should be ignored for calculating the uniques. If
988971 True, the labels corresponding to missing values will be set to
989972 na_sentinel.
990- return_inverse : boolean, default False
991- Whether the mapping of the original array values to their location
992- in the vector of uniques should be returned.
993973
994974 Returns
995975 -------
996976 uniques : ndarray[object]
997977 Unique values of input, not sorted
998- labels : ndarray[int64] (if return_inverse=True)
978+ labels : ndarray[int64]
999979 The labels from values to uniques
1000980 """
1001981 cdef:
@@ -1006,8 +986,7 @@ cdef class PyObjectHashTable(HashTable):
1006986 khiter_t k
1007987 bint use_na_value
1008988
1009- if return_inverse:
1010- labels = np.empty(n, dtype=np.int64)
989+ labels = np.empty(n, dtype=np.int64)
1011990 use_na_value = na_value is not None
1012991
1013992 for i in range(n):
@@ -1024,19 +1003,15 @@ cdef class PyObjectHashTable(HashTable):
10241003 # k hasn't been seen yet
10251004 k = kh_put_pymap(self.table, <PyObject*>val, &ret)
10261005 uniques.append(val)
1027- if return_inverse:
1028- self.table.vals[k] = count
1029- labels[i] = count
1030- count += 1
1031- elif return_inverse:
1006+ self.table.vals[k] = count
1007+ labels[i] = count
1008+ count += 1
1009+ else:
10321010 # k falls into a previous bucket
1033- # only relevant in case we need to construct the inverse
10341011 idx = self.table.vals[k]
10351012 labels[i] = idx
10361013
1037- if return_inverse:
1038- return uniques.to_array(), np.asarray(labels)
1039- return uniques.to_array()
1014+ return uniques.to_array(), np.asarray(labels)
10401015
10411016 def unique(self, ndarray[object] values, bint return_inverse=False):
10421017 """
@@ -1058,8 +1033,10 @@ cdef class PyObjectHashTable(HashTable):
10581033 The labels from values to uniques
10591034 """
10601035 uniques = ObjectVector()
1061- return self._unique(values, uniques, ignore_na=False,
1062- return_inverse=return_inverse)
1036+ uniques, inverse = self._unique(values, uniques, ignore_na=False)
1037+ if return_inverse:
1038+ return uniques, inverse
1039+ return uniques
10631040
10641041 def factorize(self, ndarray[object] values, Py_ssize_t na_sentinel=-1,
10651042 object na_value=None):
@@ -1091,8 +1068,7 @@ cdef class PyObjectHashTable(HashTable):
10911068 uniques_vector = ObjectVector()
10921069 uniques, labels = self._unique(values, uniques_vector,
10931070 na_sentinel=na_sentinel,
1094- na_value=na_value, ignore_na=True,
1095- return_inverse=True)
1071+ na_value=na_value, ignore_na=True)
10961072 # factorize has reversed outputs compared to _unique
10971073 return labels, uniques
10981074
@@ -1101,5 +1077,5 @@ cdef class PyObjectHashTable(HashTable):
11011077 object na_value=None):
11021078 _, labels = self._unique(values, uniques, count_prior=count_prior,
11031079 na_sentinel=na_sentinel, na_value=na_value,
1104- ignore_na=True, return_inverse=True )
1080+ ignore_na=True)
11051081 return labels
0 commit comments