@@ -29,6 +29,8 @@ class providing the base-class of operations.
2929 ensure_float , is_extension_array_dtype , is_numeric_dtype , is_scalar )
3030from pandas .core .dtypes .missing import isna , notna
3131
32+ from pandas .api .types import (
33+ is_datetime64_dtype , is_integer_dtype , is_object_dtype )
3234import pandas .core .algorithms as algorithms
3335from pandas .core .base import (
3436 DataError , GroupByError , PandasObject , SelectionMixin , SpecificationError )
@@ -1024,15 +1026,17 @@ def _bool_agg(self, val_test, skipna):
10241026 """
10251027
10261028 def objs_to_bool (vals ):
1027- try :
1028- vals = vals .astype (np .bool )
1029- except ValueError : # for objects
1029+ # type: np.ndarray -> (np.ndarray, typing.Type)
1030+ if is_object_dtype (vals ):
10301031 vals = np .array ([bool (x ) for x in vals ])
1032+ else :
1033+ vals = vals .astype (np .bool )
10311034
1032- return vals .view (np .uint8 )
1035+ return vals .view (np .uint8 ), np . bool
10331036
1034- def result_to_bool (result ):
1035- return result .astype (np .bool , copy = False )
1037+ def result_to_bool (result , inference ):
1038+ # type: (np.ndarray, typing.Type) -> np.ndarray
1039+ return result .astype (inference , copy = False )
10361040
10371041 return self ._get_cythonized_result ('group_any_all' , self .grouper ,
10381042 aggregate = True ,
@@ -1688,6 +1692,75 @@ def nth(self, n, dropna=None):
16881692
16891693 return result
16901694
1695+ def quantile (self , q = 0.5 , interpolation = 'linear' ):
1696+ """
1697+ Return group values at the given quantile, a la numpy.percentile.
1698+
1699+ Parameters
1700+ ----------
1701+ q : float or array-like, default 0.5 (50% quantile)
1702+ Value(s) between 0 and 1 providing the quantile(s) to compute.
1703+ interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
1704+ Method to use when the desired quantile falls between two points.
1705+
1706+ Returns
1707+ -------
1708+ Series or DataFrame
1709+ Return type determined by caller of GroupBy object.
1710+
1711+ See Also
1712+ --------
1713+ Series.quantile : Similar method for Series.
1714+ DataFrame.quantile : Similar method for DataFrame.
1715+ numpy.percentile : NumPy method to compute qth percentile.
1716+
1717+ Examples
1718+ --------
1719+ >>> df = pd.DataFrame([
1720+ ... ['a', 1], ['a', 2], ['a', 3],
1721+ ... ['b', 1], ['b', 3], ['b', 5]
1722+ ... ], columns=['key', 'val'])
1723+ >>> df.groupby('key').quantile()
1724+ val
1725+ key
1726+ a 2.0
1727+ b 3.0
1728+ """
1729+
1730+ def pre_processor (vals ):
1731+ # type: np.ndarray -> (np.ndarray, Optional[typing.Type])
1732+ if is_object_dtype (vals ):
1733+ raise TypeError ("'quantile' cannot be performed against "
1734+ "'object' dtypes!" )
1735+
1736+ inference = None
1737+ if is_integer_dtype (vals ):
1738+ inference = np .int64
1739+ elif is_datetime64_dtype (vals ):
1740+ inference = 'datetime64[ns]'
1741+ vals = vals .astype (np .float )
1742+
1743+ return vals , inference
1744+
1745+ def post_processor (vals , inference ):
1746+ # type: (np.ndarray, Optional[typing.Type]) -> np.ndarray
1747+ if inference :
1748+ # Check for edge case
1749+ if not (is_integer_dtype (inference ) and
1750+ interpolation in {'linear' , 'midpoint' }):
1751+ vals = vals .astype (inference )
1752+
1753+ return vals
1754+
1755+ return self ._get_cythonized_result ('group_quantile' , self .grouper ,
1756+ aggregate = True ,
1757+ needs_values = True ,
1758+ needs_mask = True ,
1759+ cython_dtype = np .float64 ,
1760+ pre_processing = pre_processor ,
1761+ post_processing = post_processor ,
1762+ q = q , interpolation = interpolation )
1763+
16911764 @Substitution (name = 'groupby' )
16921765 def ngroup (self , ascending = True ):
16931766 """
@@ -1924,10 +1997,16 @@ def _get_cythonized_result(self, how, grouper, aggregate=False,
19241997 Whether the result of the Cython operation is an index of
19251998 values to be retrieved, instead of the actual values themselves
19261999 pre_processing : function, default None
1927- Function to be applied to `values` prior to passing to Cython
1928- Raises if `needs_values` is False
2000+ Function to be applied to `values` prior to passing to Cython.
2001+ Function should return a tuple where the first element is the
2002+ values to be passed to Cython and the second element is an optional
2003+ type which the values should be converted to after being returned
2004+ by the Cython operation. Raises if `needs_values` is False.
19292005 post_processing : function, default None
1930- Function to be applied to result of Cython function
2006+ Function to be applied to result of Cython function. Should accept
2007+ an array of values as the first argument and type inferences as its
2008+ second argument, i.e. the signature should be
2009+ (ndarray, typing.Type).
19312010 **kwargs : dict
19322011 Extra arguments to be passed back to Cython funcs
19332012
@@ -1963,10 +2042,12 @@ def _get_cythonized_result(self, how, grouper, aggregate=False,
19632042
19642043 result = np .zeros (result_sz , dtype = cython_dtype )
19652044 func = partial (base_func , result , labels )
2045+ inferences = None
2046+
19662047 if needs_values :
19672048 vals = obj .values
19682049 if pre_processing :
1969- vals = pre_processing (vals )
2050+ vals , inferences = pre_processing (vals )
19702051 func = partial (func , vals )
19712052
19722053 if needs_mask :
@@ -1982,7 +2063,7 @@ def _get_cythonized_result(self, how, grouper, aggregate=False,
19822063 result = algorithms .take_nd (obj .values , result )
19832064
19842065 if post_processing :
1985- result = post_processing (result )
2066+ result = post_processing (result , inferences )
19862067
19872068 output [name ] = result
19882069
0 commit comments