From 60c487f59beea3f8ec4f74a45db55f79ca9ec7bd Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Thu, 12 Sep 2024 11:32:40 +0200 Subject: [PATCH] pyo3-polars 0.17 (Polars 0.43.0) (#104) --- Cargo.toml | 12 +++++----- .../expression_lib/src/distances.rs | 10 ++++---- .../expression_lib/src/expressions.rs | 8 +++---- example/io_plugin/io_plugin/src/lib.rs | 2 +- example/io_plugin/io_plugin/src/samplers.rs | 4 ++-- pyo3-polars-derive/Cargo.toml | 2 +- pyo3-polars-derive/src/lib.rs | 1 - pyo3-polars/Cargo.toml | 4 ++-- pyo3-polars/src/alloc.rs | 6 +++++ pyo3-polars/src/derive.rs | 8 +++++-- pyo3-polars/src/ffi/to_py.rs | 4 ++-- pyo3-polars/src/ffi/to_rust.rs | 2 +- pyo3-polars/src/types.rs | 24 ++++++++++--------- 13 files changed, 49 insertions(+), 38 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 3abd31b..531b4f8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,15 +9,15 @@ members = [ ] [workspace.dependencies] -polars = { version = "0.42.0", default-features = false } -polars-core = { version = "0.42.0", default-features = false } -polars-ffi = { version = "0.42.0", default-features = false } -polars-plan = { version = "0.42.0", default-feautres = false } -polars-lazy = { version = "0.42.0", default-features = false } +polars = { version = "0.43.0", default-features = false } +polars-core = { version = "0.43.0", default-features = false } +polars-ffi = { version = "0.43.0", default-features = false } +polars-plan = { version = "0.43.0", default-feautres = false } +polars-lazy = { version = "0.43.0", default-features = false } [workspace.dependencies.arrow] package = "polars-arrow" -version = "0.42.0" +version = "0.43.0" path = "../polars/crates/polars-arrow" default-features = false diff --git a/example/derive_expression/expression_lib/src/distances.rs b/example/derive_expression/expression_lib/src/distances.rs index beabf73..2ed215f 100644 --- a/example/derive_expression/expression_lib/src/distances.rs +++ b/example/derive_expression/expression_lib/src/distances.rs @@ -75,10 +75,10 @@ where T::Native: Float, { let out: ChunkedArray = start_lat - .into_iter() - .zip(start_long.into_iter()) - .zip(end_lat.into_iter()) - .zip(end_long.into_iter()) + .iter() + .zip(start_long.iter()) + .zip(end_lat.iter()) + .zip(end_long.iter()) .map(|(((start_lat, start_long), end_lat), end_long)| { let start_lat = start_lat?; let start_long = start_long?; @@ -90,5 +90,5 @@ where }) .collect(); - Ok(out.with_name(start_lat.name())) + Ok(out.with_name(start_lat.name().clone())) } diff --git a/example/derive_expression/expression_lib/src/expressions.rs b/example/derive_expression/expression_lib/src/expressions.rs index 5395f2b..112de81 100644 --- a/example/derive_expression/expression_lib/src/expressions.rs +++ b/example/derive_expression/expression_lib/src/expressions.rs @@ -85,7 +85,7 @@ fn pig_latinnify_with_paralellism( .collect(); Ok( - StringChunked::from_chunk_iter(ca.name(), chunks.into_iter().flatten()) + StringChunked::from_chunk_iter(ca.name().clone(), chunks.into_iter().flatten()) .into_series(), ) }) @@ -176,7 +176,7 @@ fn is_leap_year(input: &[Series]) -> PolarsResult { let out: BooleanChunked = ca .as_date_iter() .map(|opt_dt| opt_dt.map(|dt| dt.leap_year())) - .collect_ca(ca.name()); + .collect_ca(ca.name().clone()); Ok(out.into_series()) } @@ -193,7 +193,7 @@ struct TimeZone { fn convert_timezone(input_fields: &[Field], kwargs: TimeZone) -> PolarsResult { FieldsMapper::new(input_fields).try_map_dtype(|dtype| match dtype { - DataType::Datetime(tu, _) => Ok(DataType::Datetime(*tu, Some(kwargs.tz.clone()))), + DataType::Datetime(tu, _) => Ok(DataType::Datetime(*tu, Some(kwargs.tz.into()))), _ => polars_bail!(ComputeError: "expected datetime"), }) } @@ -206,6 +206,6 @@ fn change_time_zone(input: &[Series], kwargs: TimeZone) -> PolarsResult let ca = input.datetime()?; let mut out = ca.clone(); - out.set_time_zone(kwargs.tz)?; + out.set_time_zone(kwargs.tz.into())?; Ok(out.into_series()) } diff --git a/example/io_plugin/io_plugin/src/lib.rs b/example/io_plugin/io_plugin/src/lib.rs index 79908a1..54c11b7 100644 --- a/example/io_plugin/io_plugin/src/lib.rs +++ b/example/io_plugin/io_plugin/src/lib.rs @@ -42,7 +42,7 @@ impl RandomSource { .iter() .map(|s| { let s = s.0.lock().unwrap(); - Field::new(s.name(), s.dtype()) + Field::new(s.name().into(), s.dtype()) }) .collect::(); PySchema(Arc::new(schema)) diff --git a/example/io_plugin/io_plugin/src/samplers.rs b/example/io_plugin/io_plugin/src/samplers.rs index 07d5f3d..1760b87 100644 --- a/example/io_plugin/io_plugin/src/samplers.rs +++ b/example/io_plugin/io_plugin/src/samplers.rs @@ -61,7 +61,7 @@ where let v = self.d.sample(&mut self.rng); out.push(v); } - Series::from_vec(self.name(), out) + Series::from_vec(self.name().into(), out) } } @@ -107,7 +107,7 @@ impl Sampler for BernoulliSample { } Series::from_arrow( - self.name(), + self.name().into(), BooleanArray::from_data_default(bits.freeze(), None).boxed(), ) .unwrap() diff --git a/pyo3-polars-derive/Cargo.toml b/pyo3-polars-derive/Cargo.toml index 7e2d212..b9f5059 100644 --- a/pyo3-polars-derive/Cargo.toml +++ b/pyo3-polars-derive/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pyo3-polars-derive" -version = "0.10.0" +version = "0.11.0" edition = "2021" license = "MIT" readme = "README.md" diff --git a/pyo3-polars-derive/src/lib.rs b/pyo3-polars-derive/src/lib.rs index 097196e..fbf4831 100644 --- a/pyo3-polars-derive/src/lib.rs +++ b/pyo3-polars-derive/src/lib.rs @@ -171,7 +171,6 @@ fn create_expression_function(ast: syn::ItemFn) -> proc_macro2::TokenStream { #quote_call #quote_process_result - () }); if panic_result.is_err() { diff --git a/pyo3-polars/Cargo.toml b/pyo3-polars/Cargo.toml index 374455d..3115782 100644 --- a/pyo3-polars/Cargo.toml +++ b/pyo3-polars/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pyo3-polars" -version = "0.16.1" +version = "0.17.0" edition = "2021" license = "MIT" readme = "../README.md" @@ -19,7 +19,7 @@ polars-ffi = { workspace = true, optional = true } polars-lazy = { workspace = true, optional = true } polars-plan = { workspace = true, optional = true } pyo3 = "0.22.2" -pyo3-polars-derive = { version = "0.10.0", path = "../pyo3-polars-derive", optional = true } +pyo3-polars-derive = { version = "0.11.0", path = "../pyo3-polars-derive", optional = true } serde = { version = "1", optional = true } serde-pickle = { version = "1", optional = true } thiserror = "1" diff --git a/pyo3-polars/src/alloc.rs b/pyo3-polars/src/alloc.rs index b2b9898..86b18a5 100644 --- a/pyo3-polars/src/alloc.rs +++ b/pyo3-polars/src/alloc.rs @@ -92,6 +92,12 @@ impl PolarsAllocator { } } +impl Default for PolarsAllocator { + fn default() -> Self { + Self::new() + } +} + unsafe impl GlobalAlloc for PolarsAllocator { #[inline] unsafe fn alloc(&self, layout: Layout) -> *mut u8 { diff --git a/pyo3-polars/src/derive.rs b/pyo3-polars/src/derive.rs index b1aa705..53fc440 100644 --- a/pyo3-polars/src/derive.rs +++ b/pyo3-polars/src/derive.rs @@ -16,7 +16,7 @@ thread_local! { static LAST_ERROR: RefCell = RefCell::new(CString::default()); } -pub unsafe fn _parse_kwargs<'a, T>(kwargs: &'a [u8]) -> PolarsResult +pub fn _parse_kwargs<'a, T>(kwargs: &'a [u8]) -> PolarsResult where T: Deserialize<'a>, { @@ -30,12 +30,14 @@ pub fn _update_last_error(err: PolarsError) { } pub fn _set_panic() { - let msg = format!("PANIC"); + let msg = "PANIC"; let msg = CString::new(msg).unwrap(); LAST_ERROR.with(|prev| *prev.borrow_mut() = msg) } #[no_mangle] +/// # Safety +/// FFI function, so unsafe pub unsafe extern "C" fn _polars_plugin_get_last_error_message() -> *const std::os::raw::c_char { LAST_ERROR.with(|prev| prev.borrow_mut().as_ptr()) } @@ -53,6 +55,8 @@ fn start_up_init() { } #[no_mangle] +/// # Safety +/// FFI function, so unsafe pub unsafe extern "C" fn _polars_plugin_get_version() -> u32 { if !INIT.swap(true, Ordering::Relaxed) { // Plugin version is is always called at least once. diff --git a/pyo3-polars/src/ffi/to_py.rs b/pyo3-polars/src/ffi/to_py.rs index 765d85e..4070d81 100644 --- a/pyo3-polars/src/ffi/to_py.rs +++ b/pyo3-polars/src/ffi/to_py.rs @@ -10,8 +10,8 @@ pub(crate) fn to_py_array( pyarrow: Bound<'_, PyModule>, ) -> PyResult { let schema = Box::new(ffi::export_field_to_c(&ArrowField::new( - "", - array.data_type().clone(), + "".into(), + array.dtype().clone(), true, ))); let array = Box::new(ffi::export_array_to_c(array)); diff --git a/pyo3-polars/src/ffi/to_rust.rs b/pyo3-polars/src/ffi/to_rust.rs index 0590d09..6d3bd6a 100644 --- a/pyo3-polars/src/ffi/to_rust.rs +++ b/pyo3-polars/src/ffi/to_rust.rs @@ -21,7 +21,7 @@ pub fn array_to_rust(obj: &Bound) -> PyResult { unsafe { let field = ffi::import_field_from_c(schema.as_ref()).map_err(PyPolarsErr::from)?; - let array = ffi::import_array_from_c(*array, field.data_type).map_err(PyPolarsErr::from)?; + let array = ffi::import_array_from_c(*array, field.dtype).map_err(PyPolarsErr::from)?; Ok(array) } } diff --git a/pyo3-polars/src/types.rs b/pyo3-polars/src/types.rs index 8d26ba7..8fe0752 100644 --- a/pyo3-polars/src/types.rs +++ b/pyo3-polars/src/types.rs @@ -79,7 +79,8 @@ impl<'py> FromPyObject<'py> for PyField { .str()? .extract::()?; let dtype = ob.getattr(intern!(py, "dtype"))?.extract::()?; - Ok(PyField(Field::new(&name, dtype.0))) + let name: &str = name.as_ref(); + Ok(PyField(Field::new(name.into(), dtype.0))) } } @@ -177,8 +178,9 @@ impl<'a> FromPyObject<'a> for PySeries { } let arr = ob.call_method("to_arrow", (), Some(&kwargs))?; let arr = ffi::to_rust::array_to_rust(&arr)?; + let name = name.as_ref(); Ok(PySeries( - Series::try_from((&*name, arr)).map_err(PyPolarsErr::from)?, + Series::try_from((PlSmallStr::from(name), arr)).map_err(PyPolarsErr::from)?, )) } } @@ -246,8 +248,8 @@ impl IntoPy for PySeries { for i in 0..self.0.n_chunks() { let array = self.0.to_arrow(i, compat_level); let schema = Box::new(arrow::ffi::export_field_to_c(&ArrowField::new( - "", - array.data_type().clone(), + "".into(), + array.dtype().clone(), true, ))); let array = Box::new(arrow::ffi::export_array_to_c(array.clone())); @@ -260,7 +262,7 @@ impl IntoPy for PySeries { // Somehow we need to clone the Vec, because pyo3 doesn't accept a slice here. let pyseries = import_arrow_from_c - .call1((self.0.name(), chunk_ptrs.clone())) + .call1((self.0.name().as_str(), chunk_ptrs.clone())) .unwrap(); // Deallocate boxes for (schema_ptr, array_ptr) in chunk_ptrs { @@ -285,7 +287,7 @@ impl IntoPy for PySeries { // Go via pyarrow Err(_) => { let s = self.0.rechunk(); - let name = s.name(); + let name = s.name().as_str(); let arr = s.to_arrow(0, CompatLevel::oldest()); let pyarrow = py.import_bound("pyarrow").expect("pyarrow not installed"); @@ -432,7 +434,7 @@ impl ToPyObject for PyDataType { DataType::Datetime(tu, tz) => { let datetime_class = pl.getattr(intern!(py, "Datetime")).unwrap(); datetime_class - .call1((tu.to_ascii(), tz.clone())) + .call1((tu.to_ascii(), tz.as_ref().map(|s| s.as_str()))) .unwrap() .into() } @@ -459,7 +461,7 @@ impl ToPyObject for PyDataType { // we should always have an initialized rev_map coming from rust let categories = rev_map.as_ref().unwrap().get_categories(); let class = pl.getattr(intern!(py, "Enum")).unwrap(); - let s = Series::from_arrow("category", categories.clone().boxed()).unwrap(); + let s = Series::from_arrow("category".into(), categories.clone().boxed()).unwrap(); let series = to_series(py, PySeries(s)); return class.call1((series,)).unwrap().into(); } @@ -469,7 +471,7 @@ impl ToPyObject for PyDataType { let field_class = pl.getattr(intern!(py, "Field")).unwrap(); let iter = fields.iter().map(|fld| { let name = fld.name().as_str(); - let dtype = PyDataType(fld.data_type().clone()).to_object(py); + let dtype = PyDataType(fld.dtype().clone()).to_object(py); field_class.call1((name, dtype)).unwrap() }); let fields = PyList::new_bound(py, iter); @@ -598,8 +600,8 @@ impl<'py> FromPyObject<'py> for PyDataType { let time_unit = ob.getattr(intern!(py, "time_unit")).unwrap(); let time_unit = time_unit.extract::()?.0; let time_zone = ob.getattr(intern!(py, "time_zone")).unwrap(); - let time_zone = time_zone.extract()?; - DataType::Datetime(time_unit, time_zone) + let time_zone: Option = time_zone.extract()?; + DataType::Datetime(time_unit, time_zone.map(PlSmallStr::from)) }, "Duration" => { let time_unit = ob.getattr(intern!(py, "time_unit")).unwrap();