@@ -32,50 +32,41 @@ use arrow::compute::concat_batches;
3232use arrow:: record_batch:: RecordBatch ;
3333use datafusion:: physical_plan:: collect;
3434use datafusion:: physical_plan:: metrics:: MetricsSet ;
35- use datafusion:: prelude:: { col, lit, lit_timestamp_nano, Expr , SessionContext } ;
35+ use datafusion:: prelude:: {
36+ col, lit, lit_timestamp_nano, Expr , ParquetReadOptions , SessionContext ,
37+ } ;
3638use datafusion:: test_util:: parquet:: { ParquetScanOptions , TestParquetFile } ;
37- use datafusion_common:: instant:: Instant ;
3839use datafusion_expr:: utils:: { conjunction, disjunction, split_conjunction} ;
3940
4041use itertools:: Itertools ;
4142use parquet:: file:: properties:: WriterProperties ;
4243use tempfile:: TempDir ;
43- use test_utils:: AccessLogGenerator ;
4444
4545/// how many rows of generated data to write to our parquet file (arbitrary)
4646const NUM_ROWS : usize = 4096 ;
4747
48- fn generate_file ( tempdir : & TempDir , props : WriterProperties ) -> TestParquetFile {
49- // Tune down the generator for smaller files
50- let generator = AccessLogGenerator :: new ( )
51- . with_row_limit ( NUM_ROWS )
52- . with_pods_per_host ( 1 ..4 )
53- . with_containers_per_pod ( 1 ..2 )
54- . with_entries_per_container ( 128 ..256 ) ;
55-
56- let file = tempdir. path ( ) . join ( "data.parquet" ) ;
57-
58- let start = Instant :: now ( ) ;
59- println ! ( "Writing test data to {file:?}" ) ;
60- let test_parquet_file = TestParquetFile :: try_new ( file, props, generator) . unwrap ( ) ;
61- println ! (
62- "Completed generating test data in {:?}" ,
63- Instant :: now( ) - start
64- ) ;
65- test_parquet_file
66- }
67-
6848#[ tokio:: test]
6949async fn single_file ( ) {
7050 // Only create the parquet file once as it is fairly large
71-
7251 let tempdir = TempDir :: new_in ( Path :: new ( "." ) ) . unwrap ( ) ;
7352 // Set row group size smaller so can test with fewer rows
7453 let props = WriterProperties :: builder ( )
7554 . set_max_row_group_size ( 1024 )
7655 . build ( ) ;
77- let test_parquet_file = generate_file ( & tempdir, props) ;
78-
56+ let ctx: SessionContext = SessionContext :: new ( ) ;
57+ let batches = ctx
58+ . read_parquet (
59+ "tests/data/filter_pushdown/single_file.gz.parquet" . to_string ( ) ,
60+ ParquetReadOptions :: default ( ) ,
61+ )
62+ . await
63+ . unwrap ( )
64+ . collect ( )
65+ . await
66+ . unwrap ( ) ;
67+ let test_parquet_file =
68+ TestParquetFile :: try_new ( tempdir. path ( ) . join ( "data.parquet" ) , props, batches)
69+ . unwrap ( ) ;
7970 let case = TestCase :: new ( & test_parquet_file)
8071 . with_name ( "selective" )
8172 // request_method = 'GET'
@@ -227,13 +218,27 @@ async fn single_file() {
227218async fn single_file_small_data_pages ( ) {
228219 let tempdir = TempDir :: new_in ( Path :: new ( "." ) ) . unwrap ( ) ;
229220
230- // Set low row count limit to improve page filtering
221+ // Set a low row count limit to improve page filtering
231222 let props = WriterProperties :: builder ( )
232223 . set_max_row_group_size ( 2048 )
233224 . set_data_page_row_count_limit ( 512 )
234225 . set_write_batch_size ( 512 )
235226 . build ( ) ;
236- let test_parquet_file = generate_file ( & tempdir, props) ;
227+
228+ let ctx: SessionContext = SessionContext :: new ( ) ;
229+ let batches = ctx
230+ . read_parquet (
231+ "tests/data/filter_pushdown/single_file_small_pages.gz.parquet" . to_string ( ) ,
232+ ParquetReadOptions :: default ( ) ,
233+ )
234+ . await
235+ . unwrap ( )
236+ . collect ( )
237+ . await
238+ . unwrap ( ) ;
239+ let test_parquet_file =
240+ TestParquetFile :: try_new ( tempdir. path ( ) . join ( "data.parquet" ) , props, batches)
241+ . unwrap ( ) ;
237242
238243 // The statistics on the 'pod' column are as follows:
239244 //
0 commit comments