Skip to content

Commit

Permalink
Upgrade arrow2, arrow2_convert and polars (#665)
Browse files Browse the repository at this point in the history
* Upgrade arrow2, arrow2_convert and polars including unmerged/unreleased PRs
  • Loading branch information
John Hughes authored Jan 4, 2023
1 parent e8d743c commit 996012f
Show file tree
Hide file tree
Showing 16 changed files with 170 additions and 79 deletions.
105 changes: 86 additions & 19 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 9 additions & 5 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,14 @@ repository = "https://github.com/rerun-io/rerun"

[workspace.dependencies]
anyhow = "1.0"
arrow2 = "0.14"
arrow2 = "0.15"
arrow2_convert = "0.3"
comfy-table = { version = "6.1", default-features = false }
ecolor = { version = "0.20" }
lazy_static = "1.4"
polars-core = "0.25"
polars-lazy = "0.25"
polars-core = "0.26"
polars-lazy = "0.26"
polars-ops = "0.26"
puffin = "0.14"
thiserror = "1.0"

Expand Down Expand Up @@ -62,6 +63,9 @@ wgpu = { git = "https://github.com/gfx-rs/wgpu.git", ref = "a377ae2b7fe6c1c94127
wgpu-core = { git = "https://github.com/gfx-rs/wgpu.git", ref = "a377ae2b7fe6c1c9412751166f0917e617164e49" }
#wgpu = { path = "../wgpu/wgpu" }

# Upstream PR https://github.com/jorgecarleitao/arrow2/pull/1334
arrow2 = { git = "https://github.com/rerun-io/arrow2", rev = "ed459eb9e3b49877e3e827753621d75426a8496d" }
# Upstream PR https://github.com/jorgecarleitao/arrow2/pull/1334 merged, but not released
arrow2 = { git = "https://github.com/rerun-io/arrow2", rev = "26d1b1139ac3db22c4973772ddb0d30c16027ff6" }
# Upstream PRs https://github.com/DataEngineeringLabs/arrow2-convert/pull/88 and https://github.com/DataEngineeringLabs/arrow2-convert/pull/89
arrow2_convert = { git = "https://github.com/rerun-io/arrow2-convert", rev = "5b98b537d8095fac807bb6f018536125668f7e90" }
#arrow2 = { path = "../arrow2" }
#arrow2_convert = { path = "../arrow2-convert/arrow2_convert" }
13 changes: 11 additions & 2 deletions crates/re_arrow_store/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ default = []
## Enables `parking_lot`'s deadlock detection background thread.
deadlock_detection = ["parking_lot/deadlock_detection"]
## Integration with `polars`, to efficiently use the datastore with dataframes.
polars = ["dep:polars-core"]
polars = ["dep:polars-core", "dep:polars-ops"]


[dependencies]
Expand Down Expand Up @@ -45,16 +45,25 @@ polars-core = { workspace = true, optional = true, features = [
"dtype-struct",
"sort_multiple",
] }
polars-ops = { workspace = true, optional = true, features = [
"dtype-date",
"dtype-datetime",
"dtype-time",
"dtype-struct",
] }


[dev-dependencies]
criterion = "0.4"
mimalloc = "0.1"
polars-core = { workspace = true, features = [
"diagonal_concat",
"dtype-date",
"dtype-time",
"dtype-datetime",
"dtype-struct",
"dtype-time",
"fmt",
"sort_multiple",
] }
tracing-subscriber = "0.3"

Expand Down
15 changes: 10 additions & 5 deletions crates/re_arrow_store/src/arrow_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,18 @@ impl ArrayExt for dyn Array {
}
}

/// Return the length of the first child.
///
/// ## Panics
///
/// Panics if `Self` is not a `ListArray<i32>`, or if the array is empty (no children).
fn get_child_length(&self, child_nr: usize) -> usize {
let offsets = self
.as_any()
self.as_any()
.downcast_ref::<ListArray<i32>>()
.unwrap()
.offsets();

(offsets[child_nr + 1] - offsets[child_nr]) as usize
.offsets()
.lengths()
.nth(child_nr)
.unwrap()
}
}
1 change: 1 addition & 0 deletions crates/re_arrow_store/src/polars_util.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use arrow2::array::Array;
use itertools::Itertools;
use polars_core::{prelude::*, series::Series};
use polars_ops::prelude::*;
use re_log_types::{ComponentName, ObjPath as EntityPath, TimeInt};

use crate::{DataStore, LatestAtQuery, RangeQuery};
Expand Down
28 changes: 9 additions & 19 deletions crates/re_arrow_store/src/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1079,9 +1079,9 @@ impl ComponentBucket {
fn test_arrow_estimated_size_bytes() {
use arrow2::{
array::{Float64Array, ListArray, StructArray, UInt64Array, Utf8Array},
buffer::Buffer,
compute::aggregate::estimated_bytes_size,
datatypes::{DataType, Field},
offset::Offsets,
};

// simple primitive array
Expand Down Expand Up @@ -1119,16 +1119,11 @@ fn test_arrow_estimated_size_bytes() {
let array_flattened =
UInt64Array::from_vec(data.clone().into_iter().flatten().collect()).boxed();

let mut i = 0i32;
let indices = std::iter::from_fn(move || {
let ret = i;
i += 50;
Some(ret)
});

ListArray::<i32>::from_data(
ListArray::<i32>::new(
ListArray::<i32>::default_datatype(DataType::UInt64),
Buffer::from_iter(indices.take(50)),
Offsets::try_from_lengths(std::iter::repeat(50).take(50))
.unwrap()
.into(),
array_flattened,
None,
)
Expand Down Expand Up @@ -1206,16 +1201,11 @@ fn test_arrow_estimated_size_bytes() {
StructArray::new(DataType::Struct(fields), vec![x, y], None)
};

let mut i = 0i32;
let indices = std::iter::from_fn(move || {
let ret = i;
i += 50;
Some(ret)
});

ListArray::<i32>::from_data(
ListArray::<i32>::new(
ListArray::<i32>::default_datatype(array.data_type().clone()),
Buffer::from_iter(indices.take(50)),
Offsets::try_from_lengths(std::iter::repeat(50).take(50))
.unwrap()
.into(),
array.boxed(),
None,
)
Expand Down
Loading

1 comment on commit 996012f

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rust Benchmark

Benchmark suite Current: 996012f Previous: e8d743c Ratio
datastore/insert/batch/rects/insert 275899 ns/iter (± 3696) 272801 ns/iter (± 553) 1.01
datastore/latest_at/batch/rects/query 722 ns/iter (± 1) 710 ns/iter (± 0) 1.02
datastore/latest_at/missing_components/primary 305 ns/iter (± 0) 308 ns/iter (± 0) 0.99
datastore/latest_at/missing_components/secondaries 372 ns/iter (± 0) 372 ns/iter (± 1) 1
datastore/range/batch/rects/query 45278 ns/iter (± 68) 44210 ns/iter (± 977) 1.02
obj_mono_points/insert 941043825 ns/iter (± 6919329) 1000462289 ns/iter (± 6857634) 0.94
obj_mono_points/query 372120 ns/iter (± 3787) 356140 ns/iter (± 6871) 1.04
obj_batch_points/insert 95493615 ns/iter (± 531217) 94532571 ns/iter (± 541359) 1.01
obj_batch_points/query 11478 ns/iter (± 22) 11382 ns/iter (± 20) 1.01
obj_batch_points_sequential/insert 23881484 ns/iter (± 30674) 23712629 ns/iter (± 188324) 1.01
obj_batch_points_sequential/query 7956 ns/iter (± 22) 7945 ns/iter (± 6) 1.00
mono_points_classic/generate_messages 4968611 ns/iter (± 278247) 4383981 ns/iter (± 123814) 1.13
mono_points_classic/encode_log_msg 13838820 ns/iter (± 385900) 11107846 ns/iter (± 581398) 1.25
mono_points_classic/encode_total 20554480 ns/iter (± 922108) 15304519 ns/iter (± 905317) 1.34
mono_points_classic/decode_total 40139140 ns/iter (± 272661) 35057820 ns/iter (± 968934) 1.14
mono_points_arrow/generate_message_bundles 53763709 ns/iter (± 623381) 49054011 ns/iter (± 1065937) 1.10
mono_points_arrow/generate_messages 138735682 ns/iter (± 1345995) 137708001 ns/iter (± 1462436) 1.01
mono_points_arrow/encode_log_msg 169444469 ns/iter (± 916652) 164255358 ns/iter (± 1358897) 1.03
mono_points_arrow/encode_total 363533270 ns/iter (± 2011150) 354737012 ns/iter (± 2134810) 1.02
mono_points_arrow/decode_log_msg 190675666 ns/iter (± 927609) 188765512 ns/iter (± 1036770) 1.01
mono_points_arrow/decode_message_bundles 79889872 ns/iter (± 1072545) 79484487 ns/iter (± 1459358) 1.01
mono_points_arrow/decode_total 267454225 ns/iter (± 1945184) 263107038 ns/iter (± 1776512) 1.02
batch_points_classic/generate_messages 3380 ns/iter (± 47) 3529 ns/iter (± 21) 0.96
batch_points_classic/encode_log_msg 386571 ns/iter (± 568) 381131 ns/iter (± 863) 1.01
batch_points_classic/encode_total 391285 ns/iter (± 733) 385545 ns/iter (± 1720) 1.01
batch_points_classic/decode_total 742352 ns/iter (± 3231) 734592 ns/iter (± 1078) 1.01
batch_points_arrow/generate_message_bundles 325110 ns/iter (± 578) 316438 ns/iter (± 332) 1.03
batch_points_arrow/generate_messages 6308 ns/iter (± 16) 6148 ns/iter (± 13) 1.03
batch_points_arrow/encode_log_msg 352650 ns/iter (± 1469) 351490 ns/iter (± 1382) 1.00
batch_points_arrow/encode_total 717522 ns/iter (± 3683) 702803 ns/iter (± 2400) 1.02
batch_points_arrow/decode_log_msg 346710 ns/iter (± 1303) 352872 ns/iter (± 977) 0.98
batch_points_arrow/decode_message_bundles 2128 ns/iter (± 5) 2130 ns/iter (± 9) 1.00
batch_points_arrow/decode_total 357838 ns/iter (± 1430) 356576 ns/iter (± 1464) 1.00
arrow_mono_points/insert 6863558650 ns/iter (± 38423548) 6874854387 ns/iter (± 27952545) 1.00
arrow_mono_points/query 1690461 ns/iter (± 25183) 1656604 ns/iter (± 14043) 1.02
arrow_batch_points/insert 2692122 ns/iter (± 34525) 2573582 ns/iter (± 10540) 1.05
arrow_batch_points/query 13016 ns/iter (± 29) 16225 ns/iter (± 97) 0.80
obj_batch_points_sequential/Tuid::random 37 ns/iter (± 0) 37 ns/iter (± 0) 1

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.