2525from pandas .util .testing import assert_almost_equal
2626
2727
28+ def assert_series_or_index_or_array_or_categorical_equal (left , right ):
29+ if isinstance (left , Series ):
30+ tm .assert_series_equal (left , right )
31+ elif isinstance (left , Index ):
32+ tm .assert_index_equal (left , right )
33+ elif isinstance (left , np .ndarray ):
34+ tm .assert_numpy_array_equal (left , right )
35+ elif isinstance (left , Categorical ):
36+ tm .assert_categorical_equal (left , right )
37+ else :
38+ # will fail
39+ assert isinstance (left , (Series , Index , np .ndarray , Categorical ))
40+
41+
2842class TestMatch (object ):
2943
3044 def test_ints (self ):
@@ -321,17 +335,22 @@ def test_parametrized_factorize_na_value(self, data, na_value):
321335
322336class TestUnique (object ):
323337
324- def test_ints (self ):
325- arr = np .random .randint (0 , 100 , size = 50 )
338+ def test_unique_inverse (self , any_numpy_dtype ):
339+ dtype = any_numpy_dtype
340+ arr = np .random .randint (0 , 100 , size = 50 ).astype (dtype )
326341
327342 result = algos .unique (arr )
328343 assert isinstance (result , np .ndarray )
329344
330- def test_objects ( self ):
331- arr = np . random . randint ( 0 , 100 , size = 50 ). astype ( 'O' )
345+ # reuse result as expected outcome of return_inverse case
346+ expected_uniques = result . copy ( )
332347
333- result = algos .unique (arr )
334- assert isinstance (result , np .ndarray )
348+ result_uniques , result_inverse = algos .unique (arr , return_inverse = True )
349+ tm .assert_numpy_array_equal (result_uniques , expected_uniques )
350+
351+ # reconstruction can only work if inverse is correct
352+ reconstr = result_uniques [result_inverse ]
353+ tm .assert_numpy_array_equal (reconstr , arr , check_dtype = False )
335354
336355 def test_object_refcount_bug (self ):
337356 lst = ['A' , 'B' , 'C' , 'D' , 'E' ]
@@ -376,24 +395,26 @@ def test_datetime64_dtype_array_returned(self):
376395 tm .assert_numpy_array_equal (result , expected )
377396 assert result .dtype == expected .dtype
378397
379- def test_timedelta64_dtype_array_returned (self ):
398+ @pytest .mark .parametrize ('box' , [Index , Series , np .array ])
399+ def test_timedelta64_dtype_array_returned (self , box ):
380400 # GH 9431
381401 expected = np .array ([31200 , 45678 , 10000 ], dtype = 'm8[ns]' )
382402
383403 td_index = pd .to_timedelta ([31200 , 45678 , 31200 , 10000 , 45678 ])
384- result = algos .unique (td_index )
385- tm .assert_numpy_array_equal (result , expected )
386- assert result .dtype == expected .dtype
404+ obj = box (td_index )
387405
388- s = Series (td_index )
389- result = algos .unique (s )
406+ result = algos .unique (obj )
390407 tm .assert_numpy_array_equal (result , expected )
391- assert result .dtype == expected .dtype
392408
393- arr = s .values
394- result = algos .unique (arr )
395- tm .assert_numpy_array_equal (result , expected )
396- assert result .dtype == expected .dtype
409+ # reuse result as expected outcome of return_inverse case
410+ expected_uniques = result .copy ()
411+
412+ result_uniques , result_inverse = algos .unique (obj , return_inverse = True )
413+ tm .assert_numpy_array_equal (result_uniques , expected_uniques )
414+
415+ # reconstruction can only work if inverse is correct
416+ reconstr = box (result_uniques [result_inverse ])
417+ assert_series_or_index_or_array_or_categorical_equal (reconstr , obj )
397418
398419 def test_uint64_overflow (self ):
399420 s = Series ([1 , 2 , 2 ** 63 , 2 ** 63 ], dtype = np .uint64 )
@@ -406,78 +427,80 @@ def test_nan_in_object_array(self):
406427 expected = np .array (['a' , np .nan , 'c' ], dtype = object )
407428 tm .assert_numpy_array_equal (result , expected )
408429
409- def test_categorical (self ):
430+ result_uniques , result_inverse = pd .unique (duplicated_items ,
431+ return_inverse = True )
432+ expected_inverse = np .array ([0 , 1 , 2 , 2 ], dtype = 'int64' )
433+ tm .assert_numpy_array_equal (result_inverse , expected_inverse )
434+
435+ @pytest .mark .parametrize ('ordered' , [True , False ])
436+ @pytest .mark .parametrize ('box' , [lambda x : x , Series , Index ],
437+ ids = ['Categorical' , 'Series' , 'Index' ])
438+ @pytest .mark .parametrize ('method' , [lambda x , ** kwargs : x .unique (** kwargs ),
439+ pd .unique ],
440+ ids = ['classmethod' , 'toplevel' ])
441+ def test_categorical (self , method , box , ordered ):
410442
411- # we are expecting to return in the order
412- # of appearance
413- expected = Categorical ( list ( 'bac' ), categories = list ( 'bac' ) )
443+ categories = list ( 'abc' ) if ordered else list ( 'bac' )
444+ expected = Categorical ( list ( 'bac' ), categories = categories ,
445+ ordered = ordered )
414446
415- # we are expecting to return in the order
416- # of the categories
417- expected_o = Categorical (
418- list ('bac' ), categories = list ('abc' ), ordered = True )
447+ # Index.unique always returns Index
448+ # pd.unique(Index) stays Index (only) for Categorical
449+ expected = box (expected ) if box == Index else expected
419450
420451 # GH 15939
421- c = Categorical (list ('baabc' ))
422- result = c . unique ( )
423- tm . assert_categorical_equal ( result , expected )
452+ c = box ( Categorical (list ('baabc' ), categories = categories ,
453+ ordered = ordered ) )
454+ result = method ( c )
424455
425- result = algos .unique (c )
426- tm .assert_categorical_equal (result , expected )
456+ assert_series_or_index_or_array_or_categorical_equal (result , expected )
427457
428- c = Categorical (list ('baabc' ), ordered = True )
429- result = c .unique ()
430- tm .assert_categorical_equal (result , expected_o )
458+ if method == pd .unique :
459+ # [Series/Index].unique do not yet support return_inverse=True
431460
432- result = algos .unique (c )
433- tm .assert_categorical_equal (result , expected_o )
461+ # reuse result as expected outcome of return_inverse case
462+ expected_uniques = result .copy ()
463+ result_uniques , result_inverse = method (c , return_inverse = True )
434464
435- # Series of categorical dtype
436- s = Series (Categorical (list ('baabc' )), name = 'foo' )
437- result = s .unique ()
438- tm .assert_categorical_equal (result , expected )
465+ assert_series_or_index_or_array_or_categorical_equal (
466+ result_uniques , expected_uniques )
439467
440- result = pd .unique (s )
441- tm .assert_categorical_equal (result , expected )
468+ # reconstruction can only work if inverse is correct
469+ reconstr = box (result_uniques [result_inverse ])
470+ assert_series_or_index_or_array_or_categorical_equal (reconstr , c )
442471
443- # CI -> return CI
444- ci = CategoricalIndex ( Categorical ( list ( 'baabc' ),
445- categories = list ( 'bac' )))
446- expected = CategoricalIndex ( expected )
447- result = ci . unique ()
448- tm . assert_index_equal ( result , expected )
472+ @ pytest . mark . parametrize ( 'box' , [ Series , Index ])
473+ @ pytest . mark . parametrize ( 'method' , [ lambda x , ** kwargs : x . unique ( ** kwargs ),
474+ pd . unique ],
475+ ids = [ 'classmethod' , 'toplevel' ] )
476+ def test_datetime64tz_aware ( self , method , box ):
477+ # GH 15939
449478
450- result = pd . unique ( ci )
451- tm . assert_index_equal ( result , expected )
479+ ts = Timestamp ( '20160101' , tz = 'US/Eastern' )
480+ obj = box ([ ts , ts ] )
452481
453- def test_datetime64tz_aware (self ):
454- # GH 15939
482+ if box == Series :
483+ expected = np .array ([Timestamp ('2016-01-01 00:00:00-0500' ,
484+ tz = 'US/Eastern' )], dtype = object )
485+ else : # Index
486+ expected = Index ([ts ])
455487
456- result = Series (
457- Index ([Timestamp ('20160101' , tz = 'US/Eastern' ),
458- Timestamp ('20160101' , tz = 'US/Eastern' )])).unique ()
459- expected = np .array ([Timestamp ('2016-01-01 00:00:00-0500' ,
460- tz = 'US/Eastern' )], dtype = object )
461- tm .assert_numpy_array_equal (result , expected )
488+ result = method (obj )
489+ assert_series_or_index_or_array_or_categorical_equal (result , expected )
462490
463- result = Index ([Timestamp ('20160101' , tz = 'US/Eastern' ),
464- Timestamp ('20160101' , tz = 'US/Eastern' )]).unique ()
465- expected = DatetimeIndex (['2016-01-01 00:00:00' ],
466- dtype = 'datetime64[ns, US/Eastern]' , freq = None )
467- tm .assert_index_equal (result , expected )
468-
469- result = pd .unique (
470- Series (Index ([Timestamp ('20160101' , tz = 'US/Eastern' ),
471- Timestamp ('20160101' , tz = 'US/Eastern' )])))
472- expected = np .array ([Timestamp ('2016-01-01 00:00:00-0500' ,
473- tz = 'US/Eastern' )], dtype = object )
474- tm .assert_numpy_array_equal (result , expected )
491+ if method == pd .unique :
492+ # [Series/Index].unique do not yet support return_inverse=True
493+
494+ # reuse result as expected outcome of return_inverse case
495+ expected_uniques = result .copy ()
496+ result_uniques , result_inverse = method (obj , return_inverse = True )
475497
476- result = pd .unique (Index ([Timestamp ('20160101' , tz = 'US/Eastern' ),
477- Timestamp ('20160101' , tz = 'US/Eastern' )]))
478- expected = DatetimeIndex (['2016-01-01 00:00:00' ],
479- dtype = 'datetime64[ns, US/Eastern]' , freq = None )
480- tm .assert_index_equal (result , expected )
498+ assert_series_or_index_or_array_or_categorical_equal (
499+ result_uniques , expected_uniques )
500+
501+ # reconstruction can only work if inverse is correct
502+ reconstr = box (result_uniques [result_inverse ])
503+ assert_series_or_index_or_array_or_categorical_equal (reconstr , obj )
481504
482505 def test_order_of_appearance (self ):
483506 # 9346
@@ -491,28 +514,10 @@ def test_order_of_appearance(self):
491514 tm .assert_numpy_array_equal (result ,
492515 np .array ([2 , 1 ], dtype = 'int64' ))
493516
494- result = pd .unique (Series ([Timestamp ('20160101' ),
495- Timestamp ('20160101' )]))
496- expected = np .array (['2016-01-01T00:00:00.000000000' ],
497- dtype = 'datetime64[ns]' )
498- tm .assert_numpy_array_equal (result , expected )
499-
500- result = pd .unique (Index (
501- [Timestamp ('20160101' , tz = 'US/Eastern' ),
502- Timestamp ('20160101' , tz = 'US/Eastern' )]))
503- expected = DatetimeIndex (['2016-01-01 00:00:00' ],
504- dtype = 'datetime64[ns, US/Eastern]' ,
505- freq = None )
506- tm .assert_index_equal (result , expected )
507-
508517 result = pd .unique (list ('aabc' ))
509518 expected = np .array (['a' , 'b' , 'c' ], dtype = object )
510519 tm .assert_numpy_array_equal (result , expected )
511520
512- result = pd .unique (Series (Categorical (list ('aabc' ))))
513- expected = Categorical (list ('abc' ))
514- tm .assert_categorical_equal (result , expected )
515-
516521 @pytest .mark .parametrize ("arg ,expected" , [
517522 (('1' , '1' , '2' ), np .array (['1' , '2' ], dtype = object )),
518523 (('foo' ,), np .array (['foo' ], dtype = object ))
0 commit comments