Skip to content

Commit 41ab44a

Browse files
BUG: histogram weights aren't dropped if NaN values in data (#48888)
1 parent 6c76b49 commit 41ab44a

File tree

3 files changed

+44
-4
lines changed

3 files changed

+44
-4
lines changed

doc/source/whatsnew/v2.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -926,6 +926,7 @@ Period
926926

927927
Plotting
928928
^^^^^^^^
929+
- Bug in :meth:`DataFrame.plot.hist`, not dropping elements of ``weights`` corresponding to ``NaN`` values in ``data`` (:issue:`48884`)
929930
- ``ax.set_xlim`` was sometimes raising ``UserWarning`` which users couldn't address due to ``set_xlim`` not accepting parsing arguments - the converter now uses :func:`Timestamp` instead (:issue:`49148`)
930931
-
931932

pandas/plotting/_matplotlib/hist.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -146,14 +146,23 @@ def _make_plot(self) -> None:
146146
kwds["label"] = self.columns
147147
kwds.pop("color")
148148

149-
y = reformat_hist_y_given_by(y, self.by)
150-
151149
# We allow weights to be a multi-dimensional array, e.g. a (10, 2) array,
152150
# and each sub-array (10,) will be called in each iteration. If users only
153151
# provide 1D array, we assume the same weights is used for all iterations
154152
weights = kwds.get("weights", None)
155-
if weights is not None and np.ndim(weights) != 1:
156-
kwds["weights"] = weights[:, i]
153+
if weights is not None:
154+
if np.ndim(weights) != 1 and np.shape(weights)[-1] != 1:
155+
try:
156+
weights = weights[:, i]
157+
except IndexError as err:
158+
raise ValueError(
159+
"weights must have the same shape as data, "
160+
"or be a single column"
161+
) from err
162+
weights = weights[~isna(y)]
163+
kwds["weights"] = weights
164+
165+
y = reformat_hist_y_given_by(y, self.by)
157166

158167
artists = self._plot(ax, y, column_num=i, stacking_id=stacking_id, **kwds)
159168

pandas/tests/plotting/test_hist_method.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,36 @@ def test_hist_secondary_legend(self):
560560
assert ax.get_yaxis().get_visible()
561561
tm.close()
562562

563+
@td.skip_if_no_mpl
564+
def test_hist_with_nans_and_weights(self):
565+
# GH 48884
566+
df = DataFrame(
567+
[[np.nan, 0.2, 0.3], [0.4, np.nan, np.nan], [0.7, 0.8, 0.9]],
568+
columns=list("abc"),
569+
)
570+
weights = np.array([0.25, 0.3, 0.45])
571+
no_nan_df = DataFrame([[0.4, 0.2, 0.3], [0.7, 0.8, 0.9]], columns=list("abc"))
572+
no_nan_weights = np.array([[0.3, 0.25, 0.25], [0.45, 0.45, 0.45]])
573+
574+
from matplotlib.patches import Rectangle
575+
576+
_, ax0 = self.plt.subplots()
577+
df.plot.hist(ax=ax0, weights=weights)
578+
rects = [x for x in ax0.get_children() if isinstance(x, Rectangle)]
579+
heights = [rect.get_height() for rect in rects]
580+
_, ax1 = self.plt.subplots()
581+
no_nan_df.plot.hist(ax=ax1, weights=no_nan_weights)
582+
no_nan_rects = [x for x in ax1.get_children() if isinstance(x, Rectangle)]
583+
no_nan_heights = [rect.get_height() for rect in no_nan_rects]
584+
assert all(h0 == h1 for h0, h1 in zip(heights, no_nan_heights))
585+
586+
idxerror_weights = np.array([[0.3, 0.25], [0.45, 0.45]])
587+
588+
msg = "weights must have the same shape as data, or be a single column"
589+
with pytest.raises(ValueError, match=msg):
590+
_, ax2 = self.plt.subplots()
591+
no_nan_df.plot.hist(ax=ax2, weights=idxerror_weights)
592+
563593

564594
@td.skip_if_no_mpl
565595
class TestDataFrameGroupByPlots(TestPlotBase):

0 commit comments

Comments
 (0)