@@ -262,7 +262,7 @@ fn datastore_internal_repr() {
262262 } ,
263263 ) ;
264264
265- let timeless = DataTable :: example ( false ) ;
265+ let timeless = DataTable :: example ( true ) ;
266266 eprintln ! ( "{timeless}" ) ;
267267 store. insert_table ( & timeless) . unwrap ( ) ;
268268
@@ -317,38 +317,31 @@ pub struct IndexedTable {
317317 /// to free up space.
318318 pub all_components : IntSet < ComponentName > ,
319319
320- /// The total number of rows in this indexed table, accounting for all buckets.
321- pub total_rows : u64 ,
320+ /// The number of rows stored in this table, across all of its buckets, in bytes .
321+ pub buckets_num_rows : u64 ,
322322
323- /// The size of this table in bytes across all of its buckets, accounting for both data and
324- /// metadata .
323+ /// The size of both the control & component data stored in this table, across all of its
324+ /// buckets, in bytes .
325325 ///
326- /// Accurately computing the size of arrow arrays is surprisingly costly, which is why we
327- /// cache this.
328- /// Also: there are many buckets.
329- pub total_size_bytes : u64 ,
326+ /// This is a best-effort approximation, adequate for most purposes (stats,
327+ /// triggering GCs, ...).
328+ pub buckets_size_bytes : u64 ,
330329}
331330
332331impl IndexedTable {
333332 pub fn new ( cluster_key : ComponentName , timeline : Timeline , ent_path : EntityPath ) -> Self {
333+ let bucket = IndexedBucket :: new ( cluster_key, timeline) ;
334+ let buckets_size_bytes = bucket. size_bytes ( ) ;
334335 Self {
335336 timeline,
336337 ent_path,
337- buckets : [ ( i64:: MIN . into ( ) , IndexedBucket :: new ( cluster_key , timeline ) ) ] . into ( ) ,
338+ buckets : [ ( i64:: MIN . into ( ) , bucket ) ] . into ( ) ,
338339 cluster_key,
339340 all_components : Default :: default ( ) ,
340- total_rows : 0 ,
341- total_size_bytes : 0 , // TODO(#1619)
341+ buckets_num_rows : 0 ,
342+ buckets_size_bytes ,
342343 }
343344 }
344-
345- /// Returns a read-only iterator over the raw buckets.
346- ///
347- /// Do _not_ use this to try and test the internal state of the datastore.
348- #[ doc( hidden) ]
349- pub fn iter_buckets ( & self ) -> impl ExactSizeIterator < Item = & IndexedBucket > {
350- self . buckets . values ( )
351- }
352345}
353346
354347/// An `IndexedBucket` holds a chunk of rows from an [`IndexedTable`]
@@ -414,25 +407,29 @@ pub struct IndexedBucketInner {
414407 /// (i.e. the table is sparse).
415408 pub columns : IntMap < ComponentName , DataCellColumn > ,
416409
417- /// The size of this bucket in bytes, accounting for both data and metadata .
410+ /// The size of both the control & component data stored in this bucket, in bytes .
418411 ///
419- /// Accurately computing the size of arrow arrays is surprisingly costly, which is why we
420- /// cache this.
421- pub total_size_bytes : u64 ,
412+ /// This is a best-effort approximation, adequate for most purposes (stats,
413+ /// triggering GCs, ...).
414+ ///
415+ /// We cache this because there can be many, many buckets.
416+ pub size_bytes : u64 ,
422417}
423418
424419impl Default for IndexedBucketInner {
425420 fn default ( ) -> Self {
426- Self {
421+ let mut this = Self {
427422 is_sorted : true ,
428423 time_range : TimeRange :: new ( i64:: MAX . into ( ) , i64:: MIN . into ( ) ) ,
429424 col_time : Default :: default ( ) ,
430425 col_insert_id : Default :: default ( ) ,
431426 col_row_id : Default :: default ( ) ,
432427 col_num_instances : Default :: default ( ) ,
433428 columns : Default :: default ( ) ,
434- total_size_bytes : 0 , // TODO(#1619)
435- }
429+ size_bytes : 0 , // NOTE: computed below
430+ } ;
431+ this. compute_size_bytes ( ) ;
432+ this
436433 }
437434}
438435
@@ -476,15 +473,20 @@ pub struct PersistentIndexedTable {
476473 /// The cells are optional since not all rows will have data for every single component
477474 /// (i.e. the table is sparse).
478475 pub columns : IntMap < ComponentName , DataCellColumn > ,
479-
480- /// The size of this indexed table in bytes, accounting for both data and metadata.
481- ///
482- /// Accurately computing the size of arrow arrays is surprisingly costly, which is why we
483- /// cache this.
484- pub total_size_bytes : u64 ,
485476}
486477
487478impl PersistentIndexedTable {
479+ pub fn new ( cluster_key : ComponentName , ent_path : EntityPath ) -> Self {
480+ Self {
481+ cluster_key,
482+ ent_path,
483+ col_insert_id : Default :: default ( ) ,
484+ col_row_id : Default :: default ( ) ,
485+ col_num_instances : Default :: default ( ) ,
486+ columns : Default :: default ( ) ,
487+ }
488+ }
489+
488490 pub fn is_empty ( & self ) -> bool {
489491 self . col_num_instances . is_empty ( )
490492 }
0 commit comments