@@ -21,7 +21,7 @@ use bytes::Bytes;
21
21
use half:: f16;
22
22
23
23
use crate :: bloom_filter:: Sbbf ;
24
- use crate :: file:: page_index:: index :: Index ;
24
+ use crate :: file:: page_index:: column_index :: ColumnIndexMetaData ;
25
25
use crate :: file:: page_index:: offset_index:: OffsetIndexMetaData ;
26
26
use std:: collections:: { BTreeSet , VecDeque } ;
27
27
use std:: str;
@@ -192,7 +192,7 @@ pub struct ColumnCloseResult {
192
192
/// Optional bloom filter for this column
193
193
pub bloom_filter : Option < Sbbf > ,
194
194
/// Optional column index, for filtering
195
- pub column_index : Option < Index > ,
195
+ pub column_index : Option < ColumnIndexMetaData > ,
196
196
/// Optional offset index, identifying page locations
197
197
pub offset_index : Option < OffsetIndexMetaData > ,
198
198
}
@@ -2959,28 +2959,22 @@ mod tests {
2959
2959
assert ! ( r. column_index. is_some( ) ) ;
2960
2960
let col_idx = r. column_index . unwrap ( ) ;
2961
2961
let col_idx = match col_idx {
2962
- Index :: INT32 ( col_idx) => col_idx,
2962
+ ColumnIndexMetaData :: INT32 ( col_idx) => col_idx,
2963
2963
_ => panic ! ( "wrong stats type" ) ,
2964
2964
} ;
2965
2965
// null_pages should be true for page 0
2966
- assert ! ( col_idx. indexes [ 0 ] . is_null_page( ) ) ;
2966
+ assert ! ( col_idx. is_null_page( 0 ) ) ;
2967
2967
// min and max should be empty byte arrays
2968
- assert ! ( col_idx. indexes [ 0 ] . min ( ) . is_none( ) ) ;
2969
- assert ! ( col_idx. indexes [ 0 ] . max ( ) . is_none( ) ) ;
2968
+ assert ! ( col_idx. min_value ( 0 ) . is_none( ) ) ;
2969
+ assert ! ( col_idx. max_value ( 0 ) . is_none( ) ) ;
2970
2970
// null_counts should be defined and be 4 for page 0
2971
- assert ! ( col_idx. indexes [ 0 ] . null_count( ) . is_some( ) ) ;
2972
- assert_eq ! ( col_idx. indexes [ 0 ] . null_count( ) . unwrap ( ) , 4 ) ;
2971
+ assert ! ( col_idx. null_count( 0 ) . is_some( ) ) ;
2972
+ assert_eq ! ( col_idx. null_count( 0 ) , Some ( 4 ) ) ;
2973
2973
// there is no repetition so rep histogram should be absent
2974
- assert ! ( col_idx. indexes [ 0 ] . repetition_level_histogram( ) . is_none( ) ) ;
2974
+ assert ! ( col_idx. repetition_level_histogram( 0 ) . is_none( ) ) ;
2975
2975
// definition_level_histogram should be present and should be 0:4, 1:0
2976
- assert ! ( col_idx. indexes[ 0 ] . definition_level_histogram( ) . is_some( ) ) ;
2977
- assert_eq ! (
2978
- col_idx. indexes[ 0 ]
2979
- . definition_level_histogram( )
2980
- . unwrap( )
2981
- . values( ) ,
2982
- & [ 4 , 0 ]
2983
- ) ;
2976
+ assert ! ( col_idx. definition_level_histogram( 0 ) . is_some( ) ) ;
2977
+ assert_eq ! ( col_idx. definition_level_histogram( 0 ) . unwrap( ) , & [ 4 , 0 ] ) ;
2984
2978
}
2985
2979
2986
2980
#[ test]
@@ -3004,15 +2998,15 @@ mod tests {
3004
2998
3005
2999
// column index
3006
3000
let column_index = match column_index {
3007
- Index :: INT32 ( column_index) => column_index,
3001
+ ColumnIndexMetaData :: INT32 ( column_index) => column_index,
3008
3002
_ => panic ! ( "wrong stats type" ) ,
3009
3003
} ;
3010
- assert_eq ! ( 2 , column_index. indexes . len ( ) ) ;
3004
+ assert_eq ! ( 2 , column_index. num_pages ( ) ) ;
3011
3005
assert_eq ! ( 2 , offset_index. page_locations. len( ) ) ;
3012
3006
assert_eq ! ( BoundaryOrder :: UNORDERED , column_index. boundary_order) ;
3013
3007
for idx in 0 ..2 {
3014
- assert ! ( !column_index. indexes [ idx ] . is_null_page( ) ) ;
3015
- assert_eq ! ( 0 , * column_index. indexes [ idx ] . null_count. as_ref ( ) . unwrap( ) ) ;
3008
+ assert ! ( !column_index. is_null_page( idx ) ) ;
3009
+ assert_eq ! ( 0 , column_index. null_count( 0 ) . unwrap( ) ) ;
3016
3010
}
3017
3011
3018
3012
if let Some ( stats) = r. metadata . statistics ( ) {
@@ -3022,8 +3016,8 @@ mod tests {
3022
3016
// first page is [1,2,3,4]
3023
3017
// second page is [-5,2,4,8]
3024
3018
// note that we don't increment here, as this is a non BinaryArray type.
3025
- assert_eq ! ( stats. min_opt( ) , column_index. indexes [ 1 ] . min ( ) ) ;
3026
- assert_eq ! ( stats. max_opt( ) , column_index. indexes [ 1 ] . max ( ) ) ;
3019
+ assert_eq ! ( stats. min_opt( ) , column_index. min_value ( 1 ) ) ;
3020
+ assert_eq ! ( stats. max_opt( ) , column_index. max_value ( 1 ) ) ;
3027
3021
} else {
3028
3022
panic ! ( "expecting Statistics::Int32" ) ;
3029
3023
}
@@ -3064,25 +3058,25 @@ mod tests {
3064
3058
let offset_index = r. offset_index . unwrap ( ) ;
3065
3059
3066
3060
let column_index = match column_index {
3067
- Index :: FIXED_LEN_BYTE_ARRAY ( column_index) => column_index,
3061
+ ColumnIndexMetaData :: FIXED_LEN_BYTE_ARRAY ( column_index) => column_index,
3068
3062
_ => panic ! ( "wrong stats type" ) ,
3069
3063
} ;
3070
3064
3071
3065
assert_eq ! ( 3 , r. rows_written) ;
3072
3066
3073
3067
// column index
3074
- assert_eq ! ( 1 , column_index. indexes . len ( ) ) ;
3068
+ assert_eq ! ( 1 , column_index. num_pages ( ) ) ;
3075
3069
assert_eq ! ( 1 , offset_index. page_locations. len( ) ) ;
3076
3070
assert_eq ! ( BoundaryOrder :: ASCENDING , column_index. boundary_order) ;
3077
- assert ! ( !column_index. indexes [ 0 ] . is_null_page( ) ) ;
3078
- assert_eq ! ( Some ( 0 ) , column_index. indexes [ 0 ] . null_count( ) ) ;
3071
+ assert ! ( !column_index. is_null_page( 0 ) ) ;
3072
+ assert_eq ! ( Some ( 0 ) , column_index. null_count( 0 ) ) ;
3079
3073
3080
3074
if let Some ( stats) = r. metadata . statistics ( ) {
3081
3075
assert_eq ! ( stats. null_count_opt( ) , Some ( 0 ) ) ;
3082
3076
assert_eq ! ( stats. distinct_count_opt( ) , None ) ;
3083
3077
if let Statistics :: FixedLenByteArray ( stats) = stats {
3084
- let column_index_min_value = column_index. indexes [ 0 ] . min_bytes ( ) . unwrap ( ) ;
3085
- let column_index_max_value = column_index. indexes [ 0 ] . max_bytes ( ) . unwrap ( ) ;
3078
+ let column_index_min_value = column_index. min_value ( 0 ) . unwrap ( ) ;
3079
+ let column_index_max_value = column_index. max_value ( 0 ) . unwrap ( ) ;
3086
3080
3087
3081
// Column index stats are truncated, while the column chunk's aren't.
3088
3082
assert_ne ! ( stats. min_bytes_opt( ) . unwrap( ) , column_index_min_value) ;
@@ -3135,25 +3129,25 @@ mod tests {
3135
3129
let offset_index = r. offset_index . unwrap ( ) ;
3136
3130
3137
3131
let column_index = match column_index {
3138
- Index :: FIXED_LEN_BYTE_ARRAY ( column_index) => column_index,
3132
+ ColumnIndexMetaData :: FIXED_LEN_BYTE_ARRAY ( column_index) => column_index,
3139
3133
_ => panic ! ( "wrong stats type" ) ,
3140
3134
} ;
3141
3135
3142
3136
assert_eq ! ( 1 , r. rows_written) ;
3143
3137
3144
3138
// column index
3145
- assert_eq ! ( 1 , column_index. indexes . len ( ) ) ;
3139
+ assert_eq ! ( 1 , column_index. num_pages ( ) ) ;
3146
3140
assert_eq ! ( 1 , offset_index. page_locations. len( ) ) ;
3147
3141
assert_eq ! ( BoundaryOrder :: ASCENDING , column_index. boundary_order) ;
3148
- assert ! ( !column_index. indexes [ 0 ] . is_null_page( ) ) ;
3149
- assert_eq ! ( Some ( 0 ) , column_index. indexes [ 0 ] . null_count( ) ) ;
3142
+ assert ! ( !column_index. is_null_page( 0 ) ) ;
3143
+ assert_eq ! ( Some ( 0 ) , column_index. null_count( 0 ) ) ;
3150
3144
3151
3145
if let Some ( stats) = r. metadata . statistics ( ) {
3152
3146
assert_eq ! ( stats. null_count_opt( ) , Some ( 0 ) ) ;
3153
3147
assert_eq ! ( stats. distinct_count_opt( ) , None ) ;
3154
3148
if let Statistics :: FixedLenByteArray ( _stats) = stats {
3155
- let column_index_min_value = column_index. indexes [ 0 ] . min_bytes ( ) . unwrap ( ) ;
3156
- let column_index_max_value = column_index. indexes [ 0 ] . max_bytes ( ) . unwrap ( ) ;
3149
+ let column_index_min_value = column_index. min_value ( 0 ) . unwrap ( ) ;
3150
+ let column_index_max_value = column_index. max_value ( 0 ) . unwrap ( ) ;
3157
3151
3158
3152
assert_eq ! ( column_index_min_value. len( ) , 1 ) ;
3159
3153
assert_eq ! ( column_index_max_value. len( ) , 1 ) ;
@@ -3190,11 +3184,11 @@ mod tests {
3190
3184
// ensure bytes weren't truncated for column index
3191
3185
let column_index = r. column_index . unwrap ( ) ;
3192
3186
let column_index = match column_index {
3193
- Index :: FIXED_LEN_BYTE_ARRAY ( column_index) => column_index,
3187
+ ColumnIndexMetaData :: FIXED_LEN_BYTE_ARRAY ( column_index) => column_index,
3194
3188
_ => panic ! ( "wrong stats type" ) ,
3195
3189
} ;
3196
- let column_index_min_bytes = column_index. indexes [ 0 ] . min_bytes ( ) . unwrap ( ) ;
3197
- let column_index_max_bytes = column_index. indexes [ 0 ] . min_bytes ( ) . unwrap ( ) ;
3190
+ let column_index_min_bytes = column_index. min_value ( 0 ) . unwrap ( ) ;
3191
+ let column_index_max_bytes = column_index. max_value ( 0 ) . unwrap ( ) ;
3198
3192
assert_eq ! ( expected_value, column_index_min_bytes) ;
3199
3193
assert_eq ! ( expected_value, column_index_max_bytes) ;
3200
3194
@@ -3233,11 +3227,11 @@ mod tests {
3233
3227
// ensure bytes weren't truncated for column index
3234
3228
let column_index = r. column_index . unwrap ( ) ;
3235
3229
let column_index = match column_index {
3236
- Index :: FIXED_LEN_BYTE_ARRAY ( column_index) => column_index,
3230
+ ColumnIndexMetaData :: FIXED_LEN_BYTE_ARRAY ( column_index) => column_index,
3237
3231
_ => panic ! ( "wrong stats type" ) ,
3238
3232
} ;
3239
- let column_index_min_bytes = column_index. indexes [ 0 ] . min_bytes ( ) . unwrap ( ) ;
3240
- let column_index_max_bytes = column_index. indexes [ 0 ] . min_bytes ( ) . unwrap ( ) ;
3233
+ let column_index_min_bytes = column_index. min_value ( 0 ) . unwrap ( ) ;
3234
+ let column_index_max_bytes = column_index. max_value ( 0 ) . unwrap ( ) ;
3241
3235
assert_eq ! ( expected_value, column_index_min_bytes) ;
3242
3236
assert_eq ! ( expected_value, column_index_max_bytes) ;
3243
3237
0 commit comments