Skip to content

Commit 1cb6c5e

Browse files
committed
feat: add Series.plot.hist method
1 parent dd3643d commit 1cb6c5e

File tree

7 files changed

+330
-0
lines changed

7 files changed

+330
-0
lines changed

bigframes/operations/plot.py

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
# Copyright 2023 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
from typing import Sequence
15+
16+
import matplotlib.pyplot as plt
17+
18+
import bigframes.constants as constants
19+
import third_party.bigframes_vendored.pandas.plotting._core as vendordt
20+
21+
22+
class PlotAccessor:
23+
__doc__ = vendordt.PlotAccessor.__doc__
24+
25+
def __init__(self, data) -> None:
26+
self._parent = data
27+
28+
def hist(self, by: Sequence[str] | None = None, bins: int = 10, **kwargs):
29+
if by is not None:
30+
raise NotImplementedError(
31+
f"Non-none `by` argument is not yet supported. {constants.FEEDBACK_LINK}"
32+
)
33+
if kwargs.pop("backend", None) is not None:
34+
raise NotImplementedError(
35+
f"Only support matplotlib backend for now. {constants.FEEDBACK_LINK}"
36+
)
37+
import bigframes.dataframe as dataframe
38+
39+
if isinstance(self._parent, dataframe.DataFrame):
40+
raise NotImplementedError(
41+
f"`Dataframe.plot.hist` is not implemented yet. {constants.FEEDBACK_LINK}"
42+
)
43+
44+
return self._hist_series(
45+
by=by,
46+
bins=bins,
47+
**kwargs,
48+
)
49+
50+
def _hist_series(
51+
self,
52+
by: Sequence[str] | None = None,
53+
bins: int = 10,
54+
**kwargs,
55+
):
56+
# Only supported some arguments to adorn plots.
57+
ax = kwargs.pop("ax", None)
58+
figsize = kwargs.pop("figsize", None)
59+
legend = kwargs.pop("legend", False)
60+
grid = kwargs.pop("grid", None)
61+
xticks = kwargs.pop("xticks", None)
62+
yticks = kwargs.pop("yticks", None)
63+
64+
# Calculates the bins' values and weights through BigQuery
65+
import bigframes.pandas as bpd
66+
67+
series = self._parent.copy()
68+
binned = bpd.cut(series, bins=bins, labels=None)
69+
binned_data = (
70+
binned.struct.explode()
71+
.value_counts()
72+
.to_pandas()
73+
.sort_index(level="left_exclusive")
74+
)
75+
weights = binned_data.values
76+
left_bins = binned_data.index.get_level_values("left_exclusive")
77+
right_bins = binned_data.index.get_level_values("right_inclusive")
78+
bin_edges = left_bins.union(right_bins, sort=True)
79+
80+
# This code takes the hist_series function from pandas and tweaks it a bit.
81+
if kwargs.get("layout", None) is not None:
82+
raise ValueError("The 'layout' keyword is not supported when 'by' is None")
83+
84+
fig = kwargs.pop(
85+
"figure", plt.gcf() if plt.get_fignums() else plt.figure(figsize=figsize)
86+
)
87+
if figsize is not None and tuple(figsize) != tuple(fig.get_size_inches()):
88+
fig.set_size_inches(*figsize, forward=True)
89+
90+
ax = kwargs.pop("ax", None)
91+
if ax is None:
92+
ax = fig.gca()
93+
elif ax.get_figure() != fig:
94+
raise AssertionError("passed axis not bound to passed figure")
95+
96+
if legend:
97+
kwargs["label"] = series.name
98+
ax.hist(x=left_bins, bins=bin_edges, weights=weights, **kwargs)
99+
if legend:
100+
ax.legend()
101+
if grid is not None:
102+
ax.grid(grid)
103+
if xticks is not None:
104+
ax.set_xticks(xticks)
105+
if yticks is not None:
106+
ax.set_yticks(yticks)
107+
108+
return ax

bigframes/plotting/__init__.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
# Copyright 2023 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
from typing import Iterable
15+
16+
from matplotlib.axes import Axes
17+
import matplotlib.pyplot as plt
18+
import numpy as np
19+
20+
21+
def hist_series(
22+
series,
23+
by=None,
24+
ax=None,
25+
grid: bool = True,
26+
xlabelsize: int | None = None,
27+
xrot: float | None = None,
28+
ylabelsize: int | None = None,
29+
yrot: float | None = None,
30+
figsize=None,
31+
bins: int = 10,
32+
legend: bool = False,
33+
**kwargs,
34+
):
35+
import bigframes.pandas as bpd
36+
37+
# Calculates the bins' values and weights through BigQuery
38+
binned = bpd.cut(series, bins=bins, labels=None)
39+
binned_data = (
40+
binned.struct.explode()
41+
.value_counts()
42+
.to_pandas()
43+
.sort_index(level="left_exclusive")
44+
)
45+
46+
weights = binned_data.values
47+
left_bins = binned_data.index.get_level_values("left_exclusive")
48+
right_bins = binned_data.index.get_level_values("right_inclusive")
49+
bin_edges = left_bins.union(right_bins, sort=True)
50+
51+
# This code takes the hist_series function from pandas and tweaks it a bit.
52+
if kwargs.get("layout", None) is not None:
53+
raise ValueError("The 'layout' keyword is not supported when 'by' is None")
54+
# hack until the plotting interface is a bit more unified
55+
fig = kwargs.pop(
56+
"figure", plt.gcf() if plt.get_fignums() else plt.figure(figsize=figsize)
57+
)
58+
if figsize is not None and tuple(figsize) != tuple(fig.get_size_inches()):
59+
fig.set_size_inches(*figsize, forward=True)
60+
if ax is None:
61+
ax = fig.gca()
62+
elif ax.get_figure() != fig:
63+
raise AssertionError("passed axis not bound to passed figure")
64+
if legend:
65+
kwargs["label"] = series.name
66+
ax.hist(x=left_bins, bins=bin_edges, weights=weights, **kwargs)
67+
if legend:
68+
ax.legend()
69+
ax.grid(grid)
70+
axes = np.array([ax])
71+
72+
_set_ticks_props(
73+
axes,
74+
xlabelsize=xlabelsize,
75+
xrot=xrot,
76+
ylabelsize=ylabelsize,
77+
yrot=yrot,
78+
)
79+
80+
if hasattr(axes, "ndim"):
81+
if axes.ndim == 1 and len(axes) == 1:
82+
return axes[0]
83+
return axes
84+
85+
86+
def _set_ticks_props(
87+
axes: Iterable[Axes],
88+
xlabelsize: int | None = None,
89+
xrot=None,
90+
ylabelsize: int | None = None,
91+
yrot=None,
92+
):
93+
for ax in axes:
94+
if xlabelsize is not None:
95+
plt.setp(ax.get_xticklabels(), fontsize=xlabelsize)
96+
if xrot is not None:
97+
plt.setp(ax.get_xticklabels(), rotation=xrot)
98+
if ylabelsize is not None:
99+
plt.setp(ax.get_yticklabels(), fontsize=ylabelsize)
100+
if yrot is not None:
101+
plt.setp(ax.get_yticklabels(), rotation=yrot)
102+
return axes

bigframes/series.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
import bigframes.operations.aggregations as agg_ops
5151
import bigframes.operations.base
5252
import bigframes.operations.datetimes as dt
53+
import bigframes.operations.plot as plot
5354
import bigframes.operations.strings as strings
5455
import bigframes.operations.structs as structs
5556
import third_party.bigframes_vendored.pandas.core.series as vendored_pandas_series
@@ -1551,6 +1552,10 @@ def __array_ufunc__(
15511552
def str(self) -> strings.StringMethods:
15521553
return strings.StringMethods(self._block)
15531554

1555+
@property
1556+
def plot(self):
1557+
return plot.PlotAccessor(self)
1558+
15541559
def _slice(
15551560
self,
15561561
start: typing.Optional[int] = None,

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
"tabulate >= 0.9",
5959
"ipywidgets >=7.7.1",
6060
"humanize >= 4.6.0",
61+
"matplotlib >= 3.7.1",
6162
]
6263
extras = {
6364
# Optional test dependencies packages. If they're missed, may skip some tests.
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# Copyright 2023 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import pandas._testing as tm
16+
17+
18+
def test_series_hist_bins(scalars_dfs):
19+
scalars_df, scalars_pandas_df = scalars_dfs
20+
ax = scalars_df["int64_col"].plot.hist(bins=5)
21+
pd_ax = scalars_pandas_df["int64_col"].hist(bins=5)
22+
23+
# Check hist has same height compared to the pandas one.
24+
assert len(ax.patches) == len(pd_ax.patches)
25+
for i in range(len(ax.patches)):
26+
assert ax.patches[i].xy == pd_ax.patches[i].xy
27+
assert ax.patches[i]._height == pd_ax.patches[i]._height
28+
29+
30+
def test_series_hist_ticks_props(scalars_dfs):
31+
scalars_df, scalars_pandas_df = scalars_dfs
32+
33+
xticks = [20, 18]
34+
yticks = [30, 40]
35+
36+
ax = scalars_df["float64_col"].plot.hist(xticks=xticks, yticks=yticks)
37+
pd_ax = scalars_pandas_df["float64_col"].plot.hist(xticks=xticks, yticks=yticks)
38+
xlabels = ax.get_xticklabels()
39+
pd_xlables = pd_ax.get_xticklabels()
40+
assert len(xlabels) == len(pd_xlables)
41+
for i in range(len(pd_xlables)):
42+
tm.assert_almost_equal(xlabels[i].get_fontsize(), pd_xlables[i].get_fontsize())
43+
tm.assert_almost_equal(xlabels[i].get_rotation(), pd_xlables[i].get_rotation())
44+
45+
ylabels = ax.get_yticklabels()
46+
pd_ylables = pd_ax.get_yticklabels()
47+
assert len(xlabels) == len(pd_xlables)
48+
for i in range(len(pd_xlables)):
49+
tm.assert_almost_equal(ylabels[i].get_fontsize(), pd_ylables[i].get_fontsize())
50+
tm.assert_almost_equal(ylabels[i].get_rotation(), pd_ylables[i].get_rotation())

third_party/bigframes_vendored/pandas/core/series.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3111,6 +3111,17 @@ def str(self):
31113111
"""
31123112
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
31133113

3114+
@property
3115+
def plot(self):
3116+
"""
3117+
Make plots of Series.
3118+
3119+
Returns:
3120+
bigframes.operations.plot.PlotAccessor:
3121+
An accessor making plots.
3122+
"""
3123+
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
3124+
31143125
def isin(self, values):
31153126
"""
31163127
Whether elements in Series are contained in values.
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
from typing import Sequence
2+
3+
from bigframes import constants
4+
5+
6+
class PlotAccessor:
7+
def hist(self, by: Sequence[str] | None = None, bins: int = 10, **kwargs):
8+
"""
9+
Draw histogram of the input series using matplotlib.
10+
11+
Parameters
12+
----------
13+
by : str or sequence, optional
14+
If passed, then used to form histograms for separate groups.
15+
Currently, it is not supported yet.
16+
bins : int, default 10
17+
Number of histogram bins to be used.
18+
ax : matplotlib axes object, default None
19+
An axes of the current figure.
20+
grid : bool, default None (matlab style default)
21+
Axis grid lines.
22+
xticks : sequence
23+
Values to use for the xticks.
24+
yticks : sequence
25+
Values to use for the yticks.
26+
figsize : a tuple (width, height) in inches
27+
Size of a figure object.
28+
backend : str, default None
29+
Backend to use instead of the backend specified in the option
30+
``plotting.backend``. Currently, only `matplotlib` is not supported yet.
31+
legend : bool, default False
32+
Place legend on axis subplots.
33+
**kwargs
34+
Options to pass to matplotlib plotting method.
35+
36+
Returns
37+
-------
38+
class:`matplotlib.Axes`
39+
A histogram plot.
40+
41+
Examples
42+
--------
43+
For Series:
44+
45+
.. plot::
46+
:context: close-figs
47+
48+
>>> import bigframes.pandas as bpd
49+
>>> lst = ['a', 'a', 'a', 'b', 'b', 'b']
50+
>>> ser = bpd.Series([1, 2, 2, 4, 6, 6], index=lst)
51+
>>> hist = ser.plot.hist()
52+
"""
53+
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

0 commit comments

Comments
 (0)