-
-
Notifications
You must be signed in to change notification settings - Fork 18.7k
Fixed Inconsistent GroupBy Output Shape with Duplicate Column Labels #29124
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
fd53827
6f60cd0
0aa1813
4af22f6
9756e74
b675963
444d542
98a9901
c8648b1
1626de1
2a6b8d7
12d1ca0
5a3fcd7
dee597a
a2f1b64
fdb36f6
8975009
9adde1b
63b35f9
9eb7c73
0e49bdb
2ad7632
11fda39
caf8f11
7c4bad9
b9dca96
a878e67
037f9af
dd3b1dc
a66d37f
6d50448
4dd8f5b
9d39862
d6b197b
3cfd1a2
16e9512
d297684
a234bed
e3959b0
391d106
c30ca82
3a78051
f4f9e61
936591e
5dd131e
6cc1607
d5ce753
ce97eff
d1a92b4
23eb803
4aa9f4c
faa08c9
b07335b
fb71185
d7b84a2
7934422
c8f0b19
acf22d3
a0aae64
a9b411a
51b8050
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -341,7 +341,27 @@ def _aggregate_multiple_funcs(self, arg, _level): | |
return DataFrame(results, columns=columns) | ||
|
||
def _wrap_series_output(self, output: Dict[int, np.ndarray], index, names: List[Hashable]): | ||
""" common agg/transform wrapping logic """ | ||
""" | ||
Wraps the output of a SeriesGroupBy operation into the expected result. | ||
|
||
Parameters | ||
---------- | ||
output : dict[int, np.ndarray] | ||
Dict with a sole key of 0 and a value of the result values. | ||
index : pd.Index | ||
Index to apply to the output. | ||
names : List[Hashable] | ||
List containing one label (the Series name). | ||
|
||
Returns | ||
------- | ||
Series | ||
|
||
Notes | ||
----- | ||
output and names should only contain one element. These are containers for generic | ||
compatability with the DataFrameGroupBy class. | ||
""" | ||
assert len(names) == 1 | ||
result = Series(output[0]) | ||
result.index = index | ||
|
@@ -350,12 +370,54 @@ def _wrap_series_output(self, output: Dict[int, np.ndarray], index, names: List[ | |
return result | ||
|
||
def _wrap_aggregated_output(self, output: Dict[int, np.ndarray], names: List[Hashable]): | ||
""" | ||
Wraps the output of a SeriesGroupBy aggregation into the expected result. | ||
|
||
Parameters | ||
---------- | ||
output : dict[int, np.ndarray] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. dict->Mapping? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. actually, looks like this docstring is really similar to the previous one. can it be shared? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thought about this but there are differences in the signatures of these functions (see |
||
Dict with a sole key of 0 and a value of the result values. | ||
index : pd.Index | ||
Index to apply to the output. | ||
names : List[Hashable] | ||
List containing one label (the Series name). | ||
|
||
Returns | ||
------- | ||
Series | ||
|
||
Notes | ||
----- | ||
output and names should only contain one element. These are containers for generic | ||
compatability with the DataFrameGroupBy class. | ||
""" | ||
result = self._wrap_series_output( | ||
output=output, index=self.grouper.result_index, names=names | ||
) | ||
return self._reindex_output(result)._convert(datetime=True) | ||
|
||
def _wrap_transformed_output(self, output: Dict[int, np.ndarray], names: List[Hashable]): | ||
""" | ||
Wraps the output of a SeriesGroupBy aggregation into the expected result. | ||
|
||
Parameters | ||
---------- | ||
output : dict[int, np.ndarray] | ||
Dict with a sole key of 0 and a value of the result values. | ||
index : pd.Index | ||
Index to apply to the output. | ||
names : List[Hashable] | ||
List containing one label (the Series name). | ||
|
||
Returns | ||
------- | ||
Series | ||
|
||
Notes | ||
----- | ||
output and names should only contain one element. These are containers for generic | ||
compatability with the DataFrameGroupBy class. | ||
""" | ||
return self._wrap_series_output( | ||
output=output, index=self.obj.index, names=names | ||
) | ||
|
@@ -1590,6 +1652,22 @@ def _insert_inaxis_grouper_inplace(self, result): | |
result.insert(0, name, lev) | ||
|
||
def _wrap_aggregated_output(self, output: Dict[int, np.ndarray], names: List[Hashable]) -> DataFrame: | ||
""" | ||
Wraps the output of DataFrameGroupBy aggregations into the expected result. | ||
|
||
Parameters | ||
---------- | ||
output : dict[int, np.ndarray] | ||
Dict where the key represents the columnar-index and the values are | ||
the actual results. | ||
names : List[Hashable] | ||
List containing the column names to apply. The position of each | ||
item in the list corresponds with the key in output. | ||
|
||
Returns | ||
------- | ||
DataFrame | ||
""" | ||
result = DataFrame(output) | ||
result.columns = names | ||
|
||
|
@@ -1606,6 +1684,23 @@ def _wrap_aggregated_output(self, output: Dict[int, np.ndarray], names: List[Has | |
return self._reindex_output(result)._convert(datetime=True) | ||
|
||
def _wrap_transformed_output(self, output: Dict[int, np.ndarray], names: List[Hashable]) -> DataFrame: | ||
""" | ||
Wraps the output of DataFrameGroupBy transformations into the expected result. | ||
|
||
Parameters | ||
---------- | ||
output : dict[int, np.ndarray] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should this be AnyArrayLike? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess the most accurate representation will be a Union[Series, np.ndarray]. 99% of the time this is a ndarray. ohlc is an exception is it gets decomposed from a DataFrame to 4 Series objects as part of iteration |
||
Dict where the key represents the columnar-index and the values are | ||
the actual results. | ||
names : List[Hashable] | ||
List containing the column names to apply. The position of each | ||
item in the list corresponds with the key in output. | ||
|
||
Returns | ||
------- | ||
DataFrame | ||
""" | ||
|
||
result = DataFrame(output) | ||
result.columns = names | ||
result.index = self.obj.index | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
dict->Mapping?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
updated