@@ -2622,14 +2622,12 @@ def test_filter_out_no_groups(self):
26222622 grouper = s .apply (lambda x : x % 2 )
26232623 grouped = s .groupby (grouper )
26242624 filtered = grouped .filter (lambda x : x .mean () > 0 )
2625- filtered .sort () # was sorted by group
2626- s .sort () # was sorted arbitrarily
26272625 assert_series_equal (filtered , s )
26282626 df = pd .DataFrame ({'A' : [1 , 12 , 12 , 1 ], 'B' : 'a b c d' .split ()})
26292627 grouper = df ['A' ].apply (lambda x : x % 2 )
26302628 grouped = df .groupby (grouper )
26312629 filtered = grouped .filter (lambda x : x ['A' ].mean () > 0 )
2632- assert_frame_equal (filtered . sort () , df )
2630+ assert_frame_equal (filtered , df )
26332631
26342632 def test_filter_condition_raises (self ):
26352633 import pandas as pd
@@ -2706,7 +2704,7 @@ def test_filter_against_workaround(self):
27062704 old_way = df [grouped .floats .\
27072705 transform (lambda x : x .mean () > N / 20 ).astype ('bool' )]
27082706 new_way = grouped .filter (lambda x : x ['floats' ].mean () > N / 20 )
2709- assert_frame_equal (new_way . sort () , old_way . sort () )
2707+ assert_frame_equal (new_way , old_way )
27102708
27112709 # Group by floats (rounded); filter on strings.
27122710 grouper = df .floats .apply (lambda x : np .round (x , - 1 ))
@@ -2715,14 +2713,14 @@ def test_filter_against_workaround(self):
27152713 transform (lambda x : len (x ) < N / 10 ).astype ('bool' )]
27162714 new_way = grouped .filter (
27172715 lambda x : len (x .letters ) < N / 10 )
2718- assert_frame_equal (new_way . sort () , old_way . sort () )
2716+ assert_frame_equal (new_way , old_way )
27192717
27202718 # Group by strings; filter on ints.
27212719 grouped = df .groupby ('letters' )
27222720 old_way = df [grouped .ints .\
27232721 transform (lambda x : x .mean () > N / 20 ).astype ('bool' )]
27242722 new_way = grouped .filter (lambda x : x ['ints' ].mean () > N / 20 )
2725- assert_frame_equal (new_way . sort_index () , old_way . sort_index () )
2723+ assert_frame_equal (new_way , old_way )
27262724
27272725 def test_filter_using_len (self ):
27282726 # BUG GH4447
@@ -2747,6 +2745,48 @@ def test_filter_using_len(self):
27472745 expected = s [[]]
27482746 assert_series_equal (actual , expected )
27492747
2748+ def test_filter_maintains_ordering (self ):
2749+ # Simple case: index is sequential. #4621
2750+ df = DataFrame ({'pid' : [1 ,1 ,1 ,2 ,2 ,3 ,3 ,3 ],
2751+ 'tag' : [23 ,45 ,62 ,24 ,45 ,34 ,25 ,62 ]})
2752+ s = df ['pid' ]
2753+ grouped = df .groupby ('tag' )
2754+ actual = grouped .filter (lambda x : len (x ) > 1 )
2755+ expected = df .iloc [[1 , 2 , 4 , 7 ]]
2756+ assert_frame_equal (actual , expected )
2757+
2758+ grouped = s .groupby (df ['tag' ])
2759+ actual = grouped .filter (lambda x : len (x ) > 1 )
2760+ expected = s .iloc [[1 , 2 , 4 , 7 ]]
2761+ assert_series_equal (actual , expected )
2762+
2763+ # Now index is sequentially decreasing.
2764+ df .index = np .arange (len (df ) - 1 , - 1 , - 1 )
2765+ s = df ['pid' ]
2766+ grouped = df .groupby ('tag' )
2767+ actual = grouped .filter (lambda x : len (x ) > 1 )
2768+ expected = df .iloc [[1 , 2 , 4 , 7 ]]
2769+ assert_frame_equal (actual , expected )
2770+
2771+ grouped = s .groupby (df ['tag' ])
2772+ actual = grouped .filter (lambda x : len (x ) > 1 )
2773+ expected = s .iloc [[1 , 2 , 4 , 7 ]]
2774+ assert_series_equal (actual , expected )
2775+
2776+ # Index is shuffled.
2777+ SHUFFLED = [4 , 6 , 7 , 2 , 1 , 0 , 5 , 3 ]
2778+ df .index = df .index [SHUFFLED ]
2779+ s = df ['pid' ]
2780+ grouped = df .groupby ('tag' )
2781+ actual = grouped .filter (lambda x : len (x ) > 1 )
2782+ expected = df .iloc [[1 , 2 , 4 , 7 ]]
2783+ assert_frame_equal (actual , expected )
2784+
2785+ grouped = s .groupby (df ['tag' ])
2786+ actual = grouped .filter (lambda x : len (x ) > 1 )
2787+ expected = s .iloc [[1 , 2 , 4 , 7 ]]
2788+ assert_series_equal (actual , expected )
2789+
27502790 def test_groupby_whitelist (self ):
27512791 from string import ascii_lowercase
27522792 letters = np .array (list (ascii_lowercase ))
0 commit comments