Skip to content

Commit 5cd5814

Browse files
committed
wip
1 parent 05f4876 commit 5cd5814

File tree

23 files changed

+694
-322
lines changed

23 files changed

+694
-322
lines changed

crates/re_arrow_store/benches/data_store.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ fn build_table(n: usize, packed: bool) -> DataTable {
259259
// Do a serialization roundtrip to pack everything in contiguous memory.
260260
if packed {
261261
let (schema, columns) = table.serialize().unwrap();
262-
table = DataTable::deserialize(MsgId::ZERO, &schema, &columns).unwrap();
262+
table = DataTable::deserialize(MsgId::ZERO, &schema, &columns, false).unwrap();
263263
}
264264

265265
table

crates/re_arrow_store/src/store.rs

Lines changed: 35 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,7 @@ fn datastore_internal_repr() {
262262
},
263263
);
264264

265-
let timeless = DataTable::example(false);
265+
let timeless = DataTable::example(true);
266266
eprintln!("{timeless}");
267267
store.insert_table(&timeless).unwrap();
268268

@@ -317,38 +317,31 @@ pub struct IndexedTable {
317317
/// to free up space.
318318
pub all_components: IntSet<ComponentName>,
319319

320-
/// The total number of rows in this indexed table, accounting for all buckets.
321-
pub total_rows: u64,
320+
/// The number of rows stored in this table, across all of its buckets, in bytes.
321+
pub buckets_num_rows: u64,
322322

323-
/// The size of this table in bytes across all of its buckets, accounting for both data and
324-
/// metadata.
323+
/// The size of both the control & component data stored in this table, across all of its
324+
/// buckets, in bytes.
325325
///
326-
/// Accurately computing the size of arrow arrays is surprisingly costly, which is why we
327-
/// cache this.
328-
/// Also: there are many buckets.
329-
pub total_size_bytes: u64,
326+
/// This is a best-effort approximation, adequate for most purposes (stats,
327+
/// triggering GCs, ...).
328+
pub buckets_size_bytes: u64,
330329
}
331330

332331
impl IndexedTable {
333332
pub fn new(cluster_key: ComponentName, timeline: Timeline, ent_path: EntityPath) -> Self {
333+
let bucket = IndexedBucket::new(cluster_key, timeline);
334+
let buckets_size_bytes = bucket.size_bytes();
334335
Self {
335336
timeline,
336337
ent_path,
337-
buckets: [(i64::MIN.into(), IndexedBucket::new(cluster_key, timeline))].into(),
338+
buckets: [(i64::MIN.into(), bucket)].into(),
338339
cluster_key,
339340
all_components: Default::default(),
340-
total_rows: 0,
341-
total_size_bytes: 0, // TODO(#1619)
341+
buckets_num_rows: 0,
342+
buckets_size_bytes,
342343
}
343344
}
344-
345-
/// Returns a read-only iterator over the raw buckets.
346-
///
347-
/// Do _not_ use this to try and test the internal state of the datastore.
348-
#[doc(hidden)]
349-
pub fn iter_buckets(&self) -> impl ExactSizeIterator<Item = &IndexedBucket> {
350-
self.buckets.values()
351-
}
352345
}
353346

354347
/// An `IndexedBucket` holds a chunk of rows from an [`IndexedTable`]
@@ -414,25 +407,29 @@ pub struct IndexedBucketInner {
414407
/// (i.e. the table is sparse).
415408
pub columns: IntMap<ComponentName, DataCellColumn>,
416409

417-
/// The size of this bucket in bytes, accounting for both data and metadata.
410+
/// The size of both the control & component data stored in this bucket, in bytes.
418411
///
419-
/// Accurately computing the size of arrow arrays is surprisingly costly, which is why we
420-
/// cache this.
421-
pub total_size_bytes: u64,
412+
/// This is a best-effort approximation, adequate for most purposes (stats,
413+
/// triggering GCs, ...).
414+
///
415+
/// We cache this because there can be many, many buckets.
416+
pub size_bytes: u64,
422417
}
423418

424419
impl Default for IndexedBucketInner {
425420
fn default() -> Self {
426-
Self {
421+
let mut this = Self {
427422
is_sorted: true,
428423
time_range: TimeRange::new(i64::MAX.into(), i64::MIN.into()),
429424
col_time: Default::default(),
430425
col_insert_id: Default::default(),
431426
col_row_id: Default::default(),
432427
col_num_instances: Default::default(),
433428
columns: Default::default(),
434-
total_size_bytes: 0, // TODO(#1619)
435-
}
429+
size_bytes: 0, // NOTE: computed below
430+
};
431+
this.compute_size_bytes();
432+
this
436433
}
437434
}
438435

@@ -476,15 +473,20 @@ pub struct PersistentIndexedTable {
476473
/// The cells are optional since not all rows will have data for every single component
477474
/// (i.e. the table is sparse).
478475
pub columns: IntMap<ComponentName, DataCellColumn>,
479-
480-
/// The size of this indexed table in bytes, accounting for both data and metadata.
481-
///
482-
/// Accurately computing the size of arrow arrays is surprisingly costly, which is why we
483-
/// cache this.
484-
pub total_size_bytes: u64,
485476
}
486477

487478
impl PersistentIndexedTable {
479+
pub fn new(cluster_key: ComponentName, ent_path: EntityPath) -> Self {
480+
Self {
481+
cluster_key,
482+
ent_path,
483+
col_insert_id: Default::default(),
484+
col_row_id: Default::default(),
485+
col_num_instances: Default::default(),
486+
columns: Default::default(),
487+
}
488+
}
489+
488490
pub fn is_empty(&self) -> bool {
489491
self.col_num_instances.is_empty()
490492
}

crates/re_arrow_store/src/store_arrow.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ impl IndexedBucket {
3838
col_row_id,
3939
col_num_instances,
4040
columns,
41-
total_size_bytes: _,
41+
size_bytes: _,
4242
} = &*inner.read();
4343

4444
serialize(
@@ -72,7 +72,6 @@ impl PersistentIndexedTable {
7272
col_row_id,
7373
col_num_instances,
7474
columns,
75-
total_size_bytes: _,
7675
} = self;
7776

7877
serialize(

crates/re_arrow_store/src/store_format.rs

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@ impl std::fmt::Display for DataStore {
3434
format!(
3535
"{} timeless indexed tables, for a total of {} across {} total rows\n",
3636
timeless_tables.len(),
37-
format_bytes(self.total_timeless_index_size_bytes() as _),
38-
format_number(self.total_timeless_index_rows() as _)
37+
format_bytes(self.total_timeless_size_bytes() as _),
38+
format_number(self.total_timeless_rows() as _)
3939
),
4040
))?;
4141
f.write_str(&indent::indent_all_by(4, "timeless_tables: [\n"))?;
@@ -53,8 +53,8 @@ impl std::fmt::Display for DataStore {
5353
format!(
5454
"{} indexed tables, for a total of {} across {} total rows\n",
5555
tables.len(),
56-
format_bytes(self.total_temporal_index_size_bytes() as _),
57-
format_number(self.total_temporal_index_rows() as _)
56+
format_bytes(self.total_temporal_size_bytes() as _),
57+
format_number(self.total_temporal_rows() as _)
5858
),
5959
))?;
6060
f.write_str(&indent::indent_all_by(4, "tables: [\n"))?;
@@ -83,8 +83,8 @@ impl std::fmt::Display for IndexedTable {
8383
buckets,
8484
cluster_key: _,
8585
all_components: _,
86-
total_rows: _,
87-
total_size_bytes: _,
86+
buckets_num_rows: _,
87+
buckets_size_bytes: _,
8888
} = self;
8989

9090
f.write_fmt(format_args!("timeline: {}\n", timeline.name()))?;
@@ -116,8 +116,8 @@ impl std::fmt::Display for IndexedBucket {
116116
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
117117
f.write_fmt(format_args!(
118118
"size: {} across {} rows\n",
119-
format_bytes(self.total_size_bytes() as _),
120-
format_number(self.total_rows() as _),
119+
format_bytes(self.size_bytes() as _),
120+
format_number(self.num_rows() as _),
121121
))?;
122122

123123
let time_range = {
@@ -156,7 +156,6 @@ impl std::fmt::Display for PersistentIndexedTable {
156156
col_row_id: _,
157157
col_num_instances: _,
158158
columns: _,
159-
total_size_bytes: _,
160159
} = self;
161160

162161
f.write_fmt(format_args!("entity: {ent_path}\n"))?;

crates/re_arrow_store/src/store_polars.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,6 @@ impl PersistentIndexedTable {
176176
col_row_id,
177177
col_num_instances,
178178
columns,
179-
total_size_bytes: _,
180179
} = self;
181180

182181
let num_rows = self.total_rows() as usize;
@@ -217,7 +216,7 @@ impl IndexedBucket {
217216
col_row_id,
218217
col_num_instances,
219218
columns,
220-
total_size_bytes: _,
219+
size_bytes: _,
221220
} = &*self.inner.read();
222221

223222
let (_, times) = DataTable::serialize_primitive_column(

crates/re_arrow_store/src/store_read.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -655,7 +655,7 @@ impl IndexedBucket {
655655
col_row_id: _,
656656
col_num_instances: _,
657657
columns,
658-
total_size_bytes: _, // TODO(#1619)
658+
size_bytes: _,
659659
} = &*self.inner.read();
660660
debug_assert!(is_sorted);
661661

@@ -761,7 +761,7 @@ impl IndexedBucket {
761761
col_row_id,
762762
col_num_instances: _,
763763
columns,
764-
total_size_bytes: _, // TODO(#1619)
764+
size_bytes: _,
765765
} = &*self.inner.read();
766766
debug_assert!(is_sorted);
767767

@@ -873,7 +873,7 @@ impl IndexedBucketInner {
873873
col_row_id,
874874
col_num_instances,
875875
columns,
876-
total_size_bytes: _,
876+
size_bytes: _,
877877
} = self;
878878

879879
if *is_sorted {

0 commit comments

Comments
 (0)