diff --git a/py-polars/docs/source/reference/series/index.rst b/py-polars/docs/source/reference/series/index.rst index 6090e43ddd15..a8476da64b97 100644 --- a/py-polars/docs/source/reference/series/index.rst +++ b/py-polars/docs/source/reference/series/index.rst @@ -20,6 +20,7 @@ This page gives an overview of all public Series methods. list modify_select miscellaneous + plot string struct temporal diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index e00be8cc5333..d40506d58d7d 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -618,15 +618,20 @@ def plot(self) -> Plot: `df.plot` with `df.hvplot`. Polars does not implement plotting logic itself, but instead defers to - Altair: + `Altair `_: - - `df.plot.line(*args, **kwargs)` + - `df.plot.line(**kwargs)` is shorthand for - `alt.Chart(df).mark_line().encode(*args, **kwargs).interactive()` - - `df.plot.point(*args, **kwargs)` + `alt.Chart(df).mark_line().encode(**kwargs).interactive()` + - `df.plot.point(**kwargs)` is shorthand for - `alt.Chart(df).mark_point().encode(*args, **kwargs).interactive()` - - ... (likewise, for any other attribute, e.g. `df.plot.bar`) + `alt.Chart(df).mark_point().encode(**kwargs).interactive()` + - `df.plot.bar(**kwargs)` + is shorthand for + `alt.Chart(df).mark_bar().encode(**kwargs).interactive()` + - for any other attribute `attr`, `df.plot.attr(**kwargs)` + is shorthand for + `alt.Chart(df).mark_attr().encode(**kwargs).interactive()` Examples -------- @@ -652,6 +657,19 @@ def plot(self) -> Plot: ... } ... ) >>> df.plot.line(x="date", y="price", color="stock") # doctest: +SKIP + + Bar plot: + + >>> df = pl.DataFrame( + ... { + ... "day": ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] * 2, + ... "group": ["a"] * 7 + ["b"] * 7, + ... "value": [1, 3, 2, 4, 5, 6, 1, 1, 3, 2, 4, 5, 1, 2], + ... } + ... ) + >>> df.plot.bar( + ... x="day", y="value", color="day", column="group" + ... ) # doctest: +SKIP """ if not _ALTAIR_AVAILABLE or parse_version(altair.__version__) < (5, 4, 0): msg = "altair>=5.4.0 is required for `.plot`" diff --git a/py-polars/polars/dataframe/plotting.py b/py-polars/polars/dataframe/plotting.py index b43ba8c320d8..2fdd35cdc4d2 100644 --- a/py-polars/polars/dataframe/plotting.py +++ b/py-polars/polars/dataframe/plotting.py @@ -57,9 +57,11 @@ def bar( """ Draw bar plot. - Polars does not implement plotting logic itself but instead defers to Altair. - `df.plot.bar(*args, **kwargs)` is shorthand for - `alt.Chart(df).mark_bar().encode(*args, **kwargs).interactive()`, + Polars does not implement plotting logic itself but instead defers to + `Altair `_. + + `df.plot.bar(**kwargs)` is shorthand for + `alt.Chart(df).mark_bar().encode(**kwargs).interactive()`, as is intended for convenience - for full customisatibility, use a plotting library directly. @@ -79,8 +81,8 @@ def bar( Column to color bars by. tooltip Columns to show values of when hovering over bars with pointer. - *args, **kwargs - Additional arguments and keyword arguments passed to Altair. + **kwargs + Additional keyword arguments passed to Altair. Examples -------- @@ -120,9 +122,10 @@ def line( """ Draw line plot. - Polars does not implement plotting logic itself but instead defers to Altair. - `df.plot.line(*args, **kwargs)` is shorthand for - `alt.Chart(df).mark_line().encode(*args, **kwargs).interactive()`, + Polars does not implement plotting logic itself but instead defers to + `Altair `_. + + `alt.Chart(df).mark_line().encode(**kwargs).interactive()`, as is intended for convenience - for full customisatibility, use a plotting library directly. @@ -144,8 +147,8 @@ def line( Column to use for order of data points in lines. tooltip Columns to show values of when hovering over lines with pointer. - *args, **kwargs - Additional arguments and keyword arguments passed to Altair. + **kwargs + Additional keyword arguments passed to Altair. Examples -------- @@ -183,15 +186,16 @@ def point( color: ChannelColor | None = None, size: ChannelSize | None = None, tooltip: ChannelTooltip | None = None, - *args: Any, **kwargs: Any, ) -> alt.Chart: """ Draw scatter plot. - Polars does not implement plotting logic itself but instead defers to Altair. - `df.plot.point(*args, **kwargs)` is shorthand for - `alt.Chart(df).mark_point().encode(*args, **kwargs).interactive()`, + Polars does not implement plotting logic itself but instead defers to + `Altair `_. + + `df.plot.point(**kwargs)` is shorthand for + `alt.Chart(df).mark_point().encode(**kwargs).interactive()`, as is intended for convenience - for full customisatibility, use a plotting library directly. @@ -213,8 +217,8 @@ def point( Column which determines points' sizes. tooltip Columns to show values of when hovering over points with pointer. - *args, **kwargs - Additional arguments and keyword arguments passed to Altair. + **kwargs + Additional keyword arguments passed to Altair. Examples -------- @@ -240,15 +244,16 @@ def point( encodings["tooltip"] = tooltip return ( self.chart.mark_point() - .encode(*args, **{**encodings, **kwargs}) + .encode( + **encodings, # type: ignore[arg-type] + **kwargs, + ) .interactive() ) - def __getattr__( - self, attr: str, *args: EncodeKwds, **kwargs: EncodeKwds - ) -> Callable[..., alt.Chart]: - method = self.chart.__getattr__(f"mark_{attr}", None) + def __getattr__(self, attr: str) -> Callable[..., alt.Chart]: + method = getattr(self.chart, f"mark_{attr}", None) if method is None: msg = "Altair has no method 'mark_{attr}'" raise AttributeError(msg) - return method().encode(*args, **kwargs).interactive() + return lambda **kwargs: method().encode(**kwargs).interactive() diff --git a/py-polars/polars/series/plotting.py b/py-polars/polars/series/plotting.py index 25e84e338437..3321830e68a6 100644 --- a/py-polars/polars/series/plotting.py +++ b/py-polars/polars/series/plotting.py @@ -1,11 +1,12 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Dict, Union +from typing import TYPE_CHECKING, Callable, Dict, Union + +from polars.dependencies import altair as alt if TYPE_CHECKING: import sys - import altair as alt from altair.typing import ( ChannelColor, ChannelOrder, @@ -16,7 +17,6 @@ EncodeKwds, ) - if sys.version_info >= (3, 10): from typing import TypeAlias else: @@ -26,6 +26,8 @@ else: from typing_extensions import Unpack + from polars import Series + Encodings: TypeAlias = Dict[ str, Union[ @@ -35,31 +37,30 @@ class Plot: - """DataFrame.plot namespace.""" + """Series.plot namespace.""" + + _accessor = "plot" chart: alt.Chart def __init__(self, s: Series) -> None: - import altair as alt - - self.chart = alt.Chart(s.to_frame()) - self._series_name = s.name + name = s.name or "value" + self._df = s.to_frame(name) + self._series_name = name - def bar( + def hist( self, - x: ChannelX | None = None, - y: ChannelY | None = None, - color: ChannelColor | None = None, - tooltip: ChannelTooltip | None = None, /, **kwargs: Unpack[EncodeKwds], ) -> alt.Chart: """ - Draw bar plot. + Draw histogram. + + Polars does not implement plotting logic itself but instead defers to + `Altair `_. - Polars does not implement plotting logic itself but instead defers to Altair. - `df.plot.bar(*args, **kwargs)` is shorthand for - `alt.Chart(df).mark_bar().encode(*args, **kwargs).interactive()`, + `s.plot.hist(**kwargs)` is shorthand for + `alt.Chart(s.to_frame()).mark_bar().encode(x=alt.X(f'{s.name}:Q', bin=True), y='count()', **kwargs).interactive()`, as is intended for convenience - for full customisatibility, use a plotting library directly. @@ -71,40 +72,22 @@ def bar( Parameters ---------- - x - Column with x-coordinates of bars. - y - Column with y-coordinates of bars. - color - Column to color bars by. - tooltip - Columns to show values of when hovering over bars with pointer. - *args, **kwargs + **kwargs Additional arguments and keyword arguments passed to Altair. Examples -------- - >>> from datetime import date - >>> df = pl.DataFrame( - ... { - ... "date": [date(2020, 1, 2), date(2020, 1, 3), date(2020, 1, 4)] * 2, - ... "price": [1, 4, 6, 1, 5, 2], - ... "stock": ["a", "a", "a", "b", "b", "b"], - ... } - ... ) - >>> df.plot.bar(x="price", y="count()") # doctest: +SKIP - """ - encodings: Encodings = {} - if x is not None: - encodings["x"] = x - if y is not None: - encodings["y"] = y - if color is not None: - encodings["color"] = color - if tooltip is not None: - encodings["tooltip"] = tooltip + >>> s = pl.Series("price", [1, 3, 3, 3, 5, 2, 6, 5, 5, 5, 7]) + >>> s.plot.hist() # doctest: +SKIP + """ # noqa: W505 + if self._series_name == "count()": + msg = "Cannot use `plot.hist` when Series name is `'count()'`" + raise ValueError(msg) return ( - self.chart.mark_bar().encode(**{**encodings, **kwargs}).interactive() # type: ignore[arg-type] + alt.Chart(self._df) + .mark_bar() + .encode(x=alt.X(f"{self._series_name}:Q", bin=True), y="count()", **kwargs) # type: ignore[misc] + .interactive() ) def kde( @@ -113,11 +96,13 @@ def kde( **kwargs: Unpack[EncodeKwds], ) -> alt.Chart: """ - Draw line plot. + Draw kernel dentity estimate plot. + + Polars does not implement plotting logic itself but instead defers to + `Altair `_. - Polars does not implement plotting logic itself but instead defers to Altair. - `df.plot.line(*args, **kwargs)` is shorthand for - `alt.Chart(df).mark_line().encode(*args, **kwargs).interactive()`, + `s.plot.kde(**kwargs)` is shorthand for + `alt.Chart(s.to_frame()).transform_density(s.name, as_=[s.name, 'density']).mark_area().encode(x=s.name, y='density:Q', **kwargs).interactive()`, as is intended for convenience - for full customisatibility, use a plotting library directly. @@ -144,96 +129,32 @@ def kde( Examples -------- - >>> from datetime import date - >>> df = pl.DataFrame( - ... { - ... "date": [date(2020, 1, 2), date(2020, 1, 3), date(2020, 1, 4)] * 2, - ... "price": [1, 4, 6, 1, 5, 2], - ... "stock": ["a", "a", "a", "b", "b", "b"], - ... } - ... ) - >>> df.plot.line(x="date", y="price", color="stock") # doctest: +SKIP - """ + >>> s = pl.Series("price", [1, 3, 3, 3, 5, 2, 6, 5, 5, 5, 7]) + >>> s.plot.kde() # doctest: +SKIP + """ # noqa: W505 + if self._series_name == "density": + msg = "Cannot use `plot.kde` when Series name is `'density'`" + raise ValueError(msg) return ( - self.chart.transform_density(self._series_name, as_=[f'{self._series_name}', 'density']).mark_area() - .encode(x=f'{self._series_name}', y='density:Q', **kwargs) # type: ignore[arg-type] + alt.Chart(self._df) + .transform_density(self._series_name, as_=[self._series_name, "density"]) + .mark_area() + .encode(x=self._series_name, y="density:Q", **kwargs) # type: ignore[misc] .interactive() ) - alt.Chart(df).transform_density('price', as_=['price', 'density']).mark_area().encode(x='price', y='density:Q') - - def point( - self, - x: ChannelX | None = None, - y: ChannelY | None = None, - color: ChannelColor | None = None, - size: ChannelSize | None = None, - tooltip: ChannelTooltip | None = None, - *args: Any, - **kwargs: Any, - ) -> alt.Chart: - """ - Draw scatter plot. - - Polars does not implement plotting logic itself but instead defers to Altair. - `df.plot.point(*args, **kwargs)` is shorthand for - `alt.Chart(df).mark_point().encode(*args, **kwargs).interactive()`, - as is intended for convenience - for full customisatibility, use a plotting - library directly. - .. versionchanged:: 1.6.0 - In prior versions of Polars, HvPlot was the plotting backend. If you would - like to restore the previous plotting functionality, all you need to do - add `import hvplot.polars` at the top of your script and replace - `df.plot` with `df.hvplot`. - - Parameters - ---------- - x - Column with x-coordinates of points. - y - Column with y-coordinates of points. - color - Column to color points by. - size - Column which determines points' sizes. - tooltip - Columns to show values of when hovering over points with pointer. - *args, **kwargs - Additional arguments and keyword arguments passed to Altair. - - Examples - -------- - >>> df = pl.DataFrame( - ... { - ... "length": [1, 4, 6], - ... "width": [4, 5, 6], - ... "species": ["setosa", "setosa", "versicolor"], - ... } - ... ) - >>> df.plot.point(x="length", y="width", color="species") # doctest: +SKIP - """ - encodings: Encodings = {} - if x is not None: - encodings["x"] = x - if y is not None: - encodings["y"] = y - if color is not None: - encodings["color"] = color - if size is not None: - encodings["size"] = size - if tooltip is not None: - encodings["tooltip"] = tooltip + def __getattr__(self, attr: str) -> Callable[..., alt.Chart]: + if "index" in self._df.columns: + msg = "Cannot call `plot.{attr}` when Series name is 'index'" + raise ValueError(msg) + method = getattr( + alt.Chart(self._df.with_row_index("index")), f"mark_{attr}", None + ) + if method is None: + msg = "Altair has no method 'mark_{attr}'" + raise AttributeError(msg) return ( - self.chart.mark_point() - .encode(*args, **{**encodings, **kwargs}) + lambda **kwargs: method() + .encode(x="index", y=self._series_name, **kwargs) .interactive() ) - - # def __getattr__( - # self, attr: str, *args: EncodeKwds, **kwargs: EncodeKwds - # ) -> Callable[..., alt.Chart]: - # method = self.chart.__getattr__(f"mark_{attr}", None) - # if method is None: - # msg = "Altair has no method 'mark_{attr}'" - # raise AttributeError(msg) - # return method().encode(*args, **kwargs).interactive() diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index 87ad673867e0..c1b3020a5b8a 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -1,5 +1,4 @@ from __future__ import annotations -from polars.series.plotting import Plot import contextlib import math @@ -92,8 +91,8 @@ _check_for_numpy, _check_for_pandas, _check_for_pyarrow, - import_optional, altair, + import_optional, ) from polars.dependencies import numpy as np from polars.dependencies import pandas as pd @@ -105,6 +104,7 @@ from polars.series.categorical import CatNameSpace from polars.series.datetime import DateTimeNameSpace from polars.series.list import ListNameSpace +from polars.series.plotting import Plot from polars.series.string import StringNameSpace from polars.series.struct import StructNameSpace from polars.series.utils import expr_dispatch, get_ffi_func @@ -7376,27 +7376,31 @@ def plot(self) -> Plot: Polars does not implement plotting logic itself, but instead defers to Altair: - - `s.plot.hist(*args, **kwargs)` + - `s.plot.hist(**kwargs)` + is shorthand for + `alt.Chart(s.to_frame()).mark_bar().encode(x=alt.X(f'{s.name}:Q', bin=True), y='count()', **kwargs).interactive()` + - `s.plot.kde(**kwargs)` is shorthand for - `alt.Chart(s.to_frame()).mark_bar().encode(x=s.name, y='count()', *args, **kwargs).interactive()` - - `s.plot.kde(*args, **kwargs)` + `alt.Chart(s.to_frame()).transform_density(s.name, as_=[s.name, 'density']).mark_area().encode(x=s.name, y='density:Q', **kwargs).interactive()` + - for any other attribute `attr`, `s.plot.attr(**kwargs)` is shorthand for - `alt.Chart(s.to_frame()).transform_density(s.name, as_=[s.name, 'density']).mark_area().encode(x=s.name, y='density', *args, **kwargs).interactive()` - - For anything else, please call `s.to_frame()` and then use one of the - methods in :meth:`DataFrame.plot`. + `alt.Chart(s.to_frame().with_row_index()).mark_attr().encode(x=s.name, y='index', **kwargs).interactive()` Examples -------- Histogram: - >>> s = pl.Series([1, 1, 2, 3]) + >>> s = pl.Series([1, 4, 4, 6, 2, 4, 3, 5, 5, 7, 1]) >>> s.plot.hist() # doctest: +SKIP KDE plot: >>> s.plot.kde() # doctest: +SKIP - """ + + Line plot: + + >>> s.plot.line() # doctest: +SKIP + """ # noqa: W505 if not _ALTAIR_AVAILABLE or parse_version(altair.__version__) < (5, 4, 0): msg = "altair>=5.4.0 is required for `.plot`" raise ModuleUpgradeRequiredError(msg) diff --git a/py-polars/tests/unit/operations/namespaces/test_plot.py b/py-polars/tests/unit/operations/namespaces/test_plot.py index f8f6710095e0..e1cd8f8f1edb 100644 --- a/py-polars/tests/unit/operations/namespaces/test_plot.py +++ b/py-polars/tests/unit/operations/namespaces/test_plot.py @@ -1,15 +1,8 @@ -from datetime import date - -import pytest - import polars as pl -# Calling `plot` the first time is slow -# https://github.com/pola-rs/polars/issues/13500 -pytestmark = pytest.mark.slow - -def test_dataframe_scatter() -> None: +def test_dataframe_plot() -> None: + # dry-run, check nothing errors df = pl.DataFrame( { "length": [1, 4, 6], @@ -17,19 +10,24 @@ def test_dataframe_scatter() -> None: "species": ["setosa", "setosa", "versicolor"], } ) - df.plot.point(x="length", y="width", color="species") + df.plot.line(x="length", y="width", color="species").to_json() + df.plot.point(x="length", y="width", size="species").to_json() + df.plot.bar(x="length", y="width", color="species").to_json() + df.plot.area(x="length", y="width", color="species").to_json() -def test_dataframe_line() -> None: - df = pl.DataFrame( - { - "date": [date(2020, 1, 2), date(2020, 1, 3), date(2020, 1, 4)] * 2, - "price": [1, 4, 6, 1, 5, 2], - "stock": ["a", "a", "a", "b", "b", "b"], - } - ) - df.plot.line(x="date", y="price", color="stock") +def test_series_plot() -> None: + # dry-run, check nothing errors + s = pl.Series("a", [1, 4, 4, 4, 7, 2, 5, 3, 6]) + s.plot.kde().to_json() + s.plot.hist().to_json() + s.plot.line().to_json() + s.plot.point().to_json() def test_empty_dataframe() -> None: pl.DataFrame({"a": [], "b": []}).plot.point(x="a", y="b") + + +def test_nameless_series() -> None: + pl.Series([1, 2, 3]).plot.kde().to_json()