Skip to content

Commit

Permalink
Update Polars to v0.33 (#721)
Browse files Browse the repository at this point in the history
* WIP: upgrade polars

* Fix precision differences

* Changes in the argmin series function

* Product and correlation are now different

* More fixes to argmin

* Fix Polars behaviour after upgrading to v0.33

Some optimizations in the expressions were breaking the "mutate".
This is may be a partial solution, as it may be a bug in Polars.
See: pola-rs/polars#11787

Also, fix a small problem with a malformed CSV that contained
more columns that its schema/header.

* Fix CSV in the tests

* Update "object_store" and "tokio" in the Rust project

The idea is to use the same versions that Polars is using.
This may help to reduce the size of the binary and compilation time.
  • Loading branch information
philss authored Oct 17, 2023
1 parent a0631ce commit 63f2ed6
Show file tree
Hide file tree
Showing 14 changed files with 348 additions and 331 deletions.
4 changes: 2 additions & 2 deletions lib/explorer/data_frame.ex
Original file line number Diff line number Diff line change
Expand Up @@ -2696,7 +2696,7 @@ defmodule Explorer.DataFrame do
petal_length float [1.4, 1.4, 1.3, 1.5, 1.4, ...]
petal_width float [0.2, 0.2, 0.2, 0.2, 0.2, ...]
species string ["Iris-setosa", "Iris-setosa", "Iris-setosa", "Iris-setosa", "Iris-setosa", ...]
petal_length_avg float [1.4640000000000004, 1.4640000000000004, 1.4640000000000004, 1.4640000000000004, 1.4640000000000004, ...]
petal_length_avg float [1.464, 1.464, 1.464, 1.464, 1.464, ...]
>
"""
@doc type: :single
Expand Down Expand Up @@ -5223,7 +5223,7 @@ defmodule Explorer.DataFrame do
iex> Explorer.DataFrame.summarise(df, mean_petal_length: mean(petal_length))
#Explorer.DataFrame<
Polars[1 x 1]
mean_petal_length float [3.758666666666667]
mean_petal_length float [3.758666666666666]
>
"""
Expand Down
24 changes: 12 additions & 12 deletions lib/explorer/query.ex
Original file line number Diff line number Diff line change
Expand Up @@ -118,10 +118,10 @@ defmodule Explorer.Query do
...> )
#Explorer.DataFrame<
Polars[150 x 5]
sepal_length float [-1.0840606189132314, -1.3757361217598396, -1.6674116246064494, -1.8132493760297548, -1.2298983703365356, ...]
sepal_width float [2.372289612531505, -0.28722789030650403, 0.7765791108287006, 0.24467561026109824, 2.904193113099107, ...]
petal_length float [-0.7576391687443842, -0.7576391687443842, -0.7897606710936372, -0.725517666395131, -0.7576391687443842, ...]
petal_width float [-1.7147014356654704, -1.7147014356654704, -1.7147014356654704, -1.7147014356654704, -1.7147014356654704, ...]
sepal_length float [-1.0840606189132322, -1.3757361217598405, -1.66741162460645, -1.8132493760297554, -1.2298983703365365, ...]
sepal_width float [2.3722896125315045, -0.28722789030650403, 0.7765791108287005, 0.2446756102610982, 2.9041931130991068, ...]
petal_length float [-0.757639168744384, -0.757639168744384, -0.789760671093637, -0.7255176663951308, -0.757639168744384, ...]
petal_width float [-1.714701435665471, -1.714701435665471, -1.714701435665471, -1.714701435665471, -1.714701435665471, ...]
species string ["Iris-setosa", "Iris-setosa", "Iris-setosa", "Iris-setosa", "Iris-setosa", ...]
>
Expand All @@ -136,10 +136,10 @@ defmodule Explorer.Query do
...> )
#Explorer.DataFrame<
Polars[150 x 5]
sepal_length float [-1.0840606189132314, -1.3757361217598396, -1.6674116246064494, -1.8132493760297548, -1.2298983703365356, ...]
sepal_width float [2.372289612531505, -0.28722789030650403, 0.7765791108287006, 0.24467561026109824, 2.904193113099107, ...]
petal_length float [-0.7576391687443842, -0.7576391687443842, -0.7897606710936372, -0.725517666395131, -0.7576391687443842, ...]
petal_width float [-1.7147014356654704, -1.7147014356654704, -1.7147014356654704, -1.7147014356654704, -1.7147014356654704, ...]
sepal_length float [-1.0840606189132322, -1.3757361217598405, -1.66741162460645, -1.8132493760297554, -1.2298983703365365, ...]
sepal_width float [2.3722896125315045, -0.28722789030650403, 0.7765791108287005, 0.2446756102610982, 2.9041931130991068, ...]
petal_length float [-0.757639168744384, -0.757639168744384, -0.789760671093637, -0.7255176663951308, -0.757639168744384, ...]
petal_width float [-1.714701435665471, -1.714701435665471, -1.714701435665471, -1.714701435665471, -1.714701435665471, ...]
species string ["Iris-setosa", "Iris-setosa", "Iris-setosa", "Iris-setosa", "Iris-setosa", ...]
>
Expand Down Expand Up @@ -189,10 +189,10 @@ defmodule Explorer.Query do
...> )
#Explorer.DataFrame<
Polars[150 x 5]
sepal_length float [-1.0840606189132314, -1.3757361217598396, -1.6674116246064494, -1.8132493760297548, -1.2298983703365356, ...]
sepal_width float [2.372289612531505, -0.28722789030650403, 0.7765791108287006, 0.24467561026109824, 2.904193113099107, ...]
petal_length float [-0.7576391687443842, -0.7576391687443842, -0.7897606710936372, -0.725517666395131, -0.7576391687443842, ...]
petal_width float [-1.7147014356654704, -1.7147014356654704, -1.7147014356654704, -1.7147014356654704, -1.7147014356654704, ...]
sepal_length float [-1.0840606189132322, -1.3757361217598405, -1.66741162460645, -1.8132493760297554, -1.2298983703365365, ...]
sepal_width float [2.3722896125315045, -0.28722789030650403, 0.7765791108287005, 0.2446756102610982, 2.9041931130991068, ...]
petal_length float [-0.757639168744384, -0.757639168744384, -0.789760671093637, -0.7255176663951308, -0.757639168744384, ...]
petal_width float [-1.714701435665471, -1.714701435665471, -1.714701435665471, -1.714701435665471, -1.714701435665471, ...]
species string ["Iris-setosa", "Iris-setosa", "Iris-setosa", "Iris-setosa", "Iris-setosa", ...]
>
Expand Down
27 changes: 24 additions & 3 deletions lib/explorer/series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -1935,6 +1935,10 @@ defmodule Explorer.Series do
iex> Explorer.Series.argmax(s)
1
iex> s = Explorer.Series.from_list([], dtype: :integer)
iex> Explorer.Series.argmax(s)
nil
iex> s = Explorer.Series.from_list(["a", "b", "c"])
iex> Explorer.Series.argmax(s)
** (ArgumentError) Explorer.Series.argmax/1 not implemented for dtype :string. Valid dtypes are [:integer, :float, :time, :date, {:datetime, :nanosecond}, {:datetime, :microsecond}, {:datetime, :millisecond}, {:duration, :nanosecond}, {:duration, :microsecond}, {:duration, :millisecond}]
Expand All @@ -1950,7 +1954,9 @@ defmodule Explorer.Series do
@doc """
Gets the index of the minimum value of the series.
Note that `nil` is treated as a minimum value.
Note that `nil` is ignored. In case an empty list
or a series whose all elements are `nil` is used,
the result will be `nil`.
## Supported dtypes
Expand All @@ -1965,11 +1971,11 @@ defmodule Explorer.Series do
iex> s = Explorer.Series.from_list([1, 2, nil, 3])
iex> Explorer.Series.argmin(s)
2
0
iex> s = Explorer.Series.from_list([1.0, 2.0, nil, 3.0])
iex> Explorer.Series.argmin(s)
2
0
iex> s = Explorer.Series.from_list([~D[2021-01-01], ~D[1999-12-31]])
iex> Explorer.Series.argmin(s)
Expand All @@ -1983,6 +1989,14 @@ defmodule Explorer.Series do
iex> Explorer.Series.argmin(s)
0
iex> s = Explorer.Series.from_list([], dtype: :integer)
iex> Explorer.Series.argmin(s)
nil
iex> s = Explorer.Series.from_list([nil], dtype: :integer)
iex> Explorer.Series.argmin(s)
nil
iex> s = Explorer.Series.from_list(["a", "b", "c"])
iex> Explorer.Series.argmin(s)
** (ArgumentError) Explorer.Series.argmin/1 not implemented for dtype :string. Valid dtypes are [:integer, :float, :time, :date, {:datetime, :nanosecond}, {:datetime, :microsecond}, {:datetime, :millisecond}, {:duration, :nanosecond}, {:duration, :microsecond}, {:duration, :millisecond}]
Expand Down Expand Up @@ -2115,6 +2129,9 @@ defmodule Explorer.Series do
@doc """
Reduce this Series to the product value.
Note that an empty series is going to result in a
product of `1`. Values that are `nil` are ignored.
## Supported dtypes
* `:integer`
Expand All @@ -2126,6 +2143,10 @@ defmodule Explorer.Series do
iex> Explorer.Series.product(s)
6
iex> s = Explorer.Series.from_list([])
iex> Explorer.Series.product(s)
1.0
iex> s = Explorer.Series.from_list([true, false, true])
iex> Explorer.Series.product(s)
** (ArgumentError) Explorer.Series.product/1 not implemented for dtype :boolean. Valid dtypes are [:integer, :float]
Expand Down
Loading

0 comments on commit 63f2ed6

Please sign in to comment.