diff --git a/Cargo.lock b/Cargo.lock index a9e7f7c432..578fae294d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -176,12 +176,6 @@ dependencies = [ "parking_lot 0.11.2", ] -[[package]] -name = "array-init-cursor" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf7d0a018de4f6aa429b9d33d69edf69072b1c5b1cb8d3e4a5f7ef898fc3eb76" - [[package]] name = "arrayref" version = "0.3.7" @@ -370,16 +364,6 @@ dependencies = [ "tonic", ] -[[package]] -name = "arrow-format" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "216249afef413d7e9e9b4b543e73b3e371ace3a812380af98f1c871521572cdd" -dependencies = [ - "planus", - "serde", -] - [[package]] name = "arrow-ipc" version = "36.0.0" @@ -477,26 +461,6 @@ dependencies = [ "regex-syntax", ] -[[package]] -name = "arrow2" -version = "0.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5feafd6df4e3f577529e6aa2b9b7cdb3c9fe8e8f66ebc8dc29abbe71a7e968f0" -dependencies = [ - "arrow-format", - "base64 0.13.1", - "bytemuck", - "chrono", - "either", - "fallible-streaming-iterator", - "futures 0.3.28", - "hash_hasher", - "num-traits", - "parquet2", - "simdutf8", - "streaming-iterator", -] - [[package]] name = "arrow_ext" version = "1.1.0" @@ -727,7 +691,6 @@ dependencies = [ "analytic_engine", "arena", "arrow 36.0.0", - "arrow2", "base64 0.13.1", "clap 3.2.23", "common_types", @@ -842,15 +805,6 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" -[[package]] -name = "bitpacking" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8c7d2ac73c167c06af4a5f37e6e59d84148d57ccbe4480b76f0273eefea82d7" -dependencies = [ - "crunchy", -] - [[package]] name = "bitvec" version = "1.0.1" @@ -1043,20 +997,6 @@ name = "bytemuck" version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "17febce684fd15d89027105661fec94afb475cb995fbc59d2865198446ba2eea" -dependencies = [ - "bytemuck_derive", -] - -[[package]] -name = "bytemuck_derive" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdde5c9cd29ebd706ce1b35600920a33550e402fc998a2e53ad3b42c3c47a192" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.12", -] [[package]] name = "byteorder" @@ -2434,12 +2374,6 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed" -[[package]] -name = "fallible-streaming-iterator" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" - [[package]] name = "fastrand" version = "1.9.0" @@ -2913,12 +2847,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "hash_hasher" -version = "2.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74721d007512d0cb3338cd20f0654ac913920061a4c4d0d8708edb3f2a698c0c" - [[package]] name = "hashbrown" version = "0.12.3" @@ -3255,21 +3183,11 @@ dependencies = [ "cfg-if 1.0.0", ] -[[package]] -name = "integer-encoding" -version = "1.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48dc51180a9b377fd75814d0cc02199c20f8e99433d6762f650d39cdbbd3b56f" - [[package]] name = "integer-encoding" version = "3.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" -dependencies = [ - "async-trait", - "futures-util", -] [[package]] name = "interpreters" @@ -3896,7 +3814,6 @@ dependencies = [ "futures 0.3.28", "log", "prost", - "rand 0.7.3", "reqwest", "serde", "serde_json", @@ -4507,15 +4424,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "ordered-float" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3305af35278dd29f46fcdd139e0b1fbfae2153f0e5928b39b035542dd31e37b7" -dependencies = [ - "num-traits", -] - [[package]] name = "ordered-float" version = "2.10.0" @@ -4627,36 +4535,12 @@ dependencies = [ "paste 1.0.12", "seq-macro", "snap", - "thrift 0.17.0", + "thrift", "tokio", "twox-hash", "zstd 0.12.3+zstd.1.5.2", ] -[[package]] -name = "parquet-format-async-temp" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1a672c84c3e5b5eb6530286b2d22cc1ea8e1e3560e4c314218d6ab749c6db99" -dependencies = [ - "async-trait", - "futures 0.3.28", - "integer-encoding 3.0.4", -] - -[[package]] -name = "parquet2" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73fd2690ad041f9296876daef1f2706f6347073bdbcc719090887f1691e4a09d" -dependencies = [ - "async-stream", - "bitpacking", - "futures 0.3.28", - "parquet-format-async-temp", - "streaming-decompression", -] - [[package]] name = "parquet_ext" version = "1.1.0" @@ -4670,7 +4554,6 @@ dependencies = [ "datafusion-expr", "log", "parquet", - "thrift 0.13.0", "tokio", ] @@ -4698,7 +4581,7 @@ dependencies = [ "schema", "snafu 0.7.4", "thiserror", - "thrift 0.17.0", + "thrift", "tokio", "uuid 1.3.0", "workspace-hack", @@ -4890,15 +4773,6 @@ version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160" -[[package]] -name = "planus" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bffebaf174d6cad46a5f0f1bb1c45c6eb509571688bcb18dfab217f3c9f9b151" -dependencies = [ - "array-init-cursor", -] - [[package]] name = "plotters" version = "0.3.4" @@ -5956,7 +5830,7 @@ dependencies = [ "crc32c", "flate2", "futures 0.3.28", - "integer-encoding 3.0.4", + "integer-encoding", "lz4", "parking_lot 0.12.1", "pin-project-lite", @@ -6834,21 +6708,6 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9091b6114800a5f2141aee1d1b9d6ca3592ac062dc5decb3764ec5895a47b4eb" -[[package]] -name = "streaming-decompression" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf6cc3b19bfb128a8ad11026086e31d3ce9ad23f8ea37354b31383a187c44cf3" -dependencies = [ - "fallible-streaming-iterator", -] - -[[package]] -name = "streaming-iterator" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" - [[package]] name = "stringprep" version = "0.1.2" @@ -7177,19 +7036,6 @@ dependencies = [ "num_cpus", ] -[[package]] -name = "thrift" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c6d965454947cc7266d22716ebfd07b18d84ebaf35eec558586bbb2a8cb6b5b" -dependencies = [ - "byteorder", - "integer-encoding 1.1.7", - "log", - "ordered-float 1.1.1", - "threadpool", -] - [[package]] name = "thrift" version = "0.17.0" @@ -7197,7 +7043,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" dependencies = [ "byteorder", - "integer-encoding 3.0.4", + "integer-encoding", "log", "ordered-float 2.10.0", "threadpool", @@ -8432,7 +8278,7 @@ dependencies = [ "sqlx-macros", "syn 1.0.109", "syn 2.0.12", - "thrift 0.17.0", + "thrift", "tokio", "tokio-stream", "tokio-util", diff --git a/benchmarks/benches/bench.rs b/benchmarks/benches/bench.rs index c134cb47c3..a35a5296d9 100644 --- a/benchmarks/benches/bench.rs +++ b/benchmarks/benches/bench.rs @@ -1,11 +1,10 @@ -// Copyright 2022 CeresDB Project Authors. Licensed under Apache-2.0. +// Copyright 2022-2023 CeresDB Project Authors. Licensed under Apache-2.0. //! Benchmarks use std::sync::Once; use benchmarks::{ - arrow2_bench::Arrow2Bench, config::{self, BenchConfig}, merge_memtable_bench::MergeMemTableBench, merge_sst_bench::MergeSstBench, @@ -171,33 +170,6 @@ fn bench_merge_memtable(c: &mut Criterion) { group.finish(); } -fn bench_arrow2_iter(b: &mut Bencher<'_>, bench: &Arrow2Bench) { - b.iter(|| bench.run_bench()) -} - -fn bench_arrow2(c: &mut Criterion) { - let config = init_bench(); - - let mut group = c.benchmark_group("read_arrow2"); - - group.measurement_time(config.sst_bench.bench_measurement_time.0); - group.sample_size(config.sst_bench.bench_sample_size); - - let mut bench = Arrow2Bench::new(config.sst_bench); - - for i in 0..bench.num_benches() { - bench.init_for_bench(i); - - group.bench_with_input( - BenchmarkId::new("read_arrow2", format!("{}/{}", bench.sst_file_name, i)), - &bench, - bench_arrow2_iter, - ); - } - - group.finish(); -} - fn bench_wal_write_iter(b: &mut Bencher<'_>, bench: &WalWriteBench) { b.iter(|| bench.run_bench()) } @@ -229,7 +201,6 @@ criterion_group!( bench_merge_sst, bench_scan_memtable, bench_merge_memtable, - bench_arrow2, bench_wal_write, ); diff --git a/benchmarks/src/arrow2_bench.rs b/benchmarks/src/arrow2_bench.rs deleted file mode 100644 index 686f186d9c..0000000000 --- a/benchmarks/src/arrow2_bench.rs +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright 2022 CeresDB Project Authors. Licensed under Apache-2.0. - -//! Arrow 2 bench. - -use std::{fs::File, io::BufReader, path::Path, sync::Arc, time::Instant}; - -use arrow2::io::parquet::read; -use common_util::runtime::Runtime; -use log::info; - -use crate::{config::SstBenchConfig, util}; - -pub struct Arrow2Bench { - store_path: String, - pub sst_file_name: String, - max_projections: usize, - projection: Vec, - runtime: Arc, -} - -impl Arrow2Bench { - pub fn new(config: SstBenchConfig) -> Self { - let runtime = util::new_runtime(config.runtime_thread_num); - - Arrow2Bench { - store_path: config.store_path, - sst_file_name: config.sst_file_name, - max_projections: config.max_projections, - projection: Vec::new(), - runtime: Arc::new(runtime), - } - } - - pub fn num_benches(&self) -> usize { - // One test reads all columns and `max_projections` tests read with projection. - 1 + self.max_projections - } - - pub fn init_for_bench(&mut self, i: usize) { - let projection = if i < self.max_projections { - (0..i + 1).collect() - } else { - Vec::new() - }; - - self.projection = projection; - } - - pub fn run_bench(&self) { - let sst_path = Path::new(&self.store_path).join(&self.sst_file_name); - - self.runtime.block_on(async { - let open_instant = Instant::now(); - let file = BufReader::new(File::open(sst_path).unwrap()); - - let record_reader = if self.projection.is_empty() { - read::FileReader::try_new(file, None, None, None, None).unwrap() - } else { - read::FileReader::try_new(file, Some(&self.projection), None, None, None).unwrap() - }; - let open_cost = open_instant.elapsed(); - - let iter_begin_instant = Instant::now(); - let mut total_rows = 0; - let mut batch_num = 0; - for record_batch in record_reader { - let num_rows = record_batch.unwrap().len(); - total_rows += num_rows; - batch_num += 1; - } - - info!( - "\nParquetBench total rows of sst: {}, total batch num: {}, open cost: {:?}, iter cost: {:?}", - total_rows, - batch_num, - open_cost, - iter_begin_instant.elapsed(), - ); - }); - } -} diff --git a/benchmarks/src/lib.rs b/benchmarks/src/lib.rs index 59aa3eba38..d0e622f030 100644 --- a/benchmarks/src/lib.rs +++ b/benchmarks/src/lib.rs @@ -1,10 +1,9 @@ -// Copyright 2022 CeresDB Project Authors. Licensed under Apache-2.0. +// Copyright 2022-2023 CeresDB Project Authors. Licensed under Apache-2.0. //! Utilities for benchmarks. use common_types::SequenceNumber; -pub mod arrow2_bench; pub mod config; pub mod merge_memtable_bench; pub mod merge_sst_bench;