From 9bd8ae18990a16402f911e7ae95ac8f289b27fe5 Mon Sep 17 00:00:00 2001 From: Jiacai Liu Date: Tue, 4 Jul 2023 16:52:18 +0800 Subject: [PATCH] feat: sst-metadata support sort (#1042) ## Rationale When debugging SST, it's useful to check sst ordered by time/max_seq/size. ## Detailed Changes - add a option `sort` ## Test Plan --- tools/src/bin/sst-metadata.rs | 65 ++++++++++++++++++++++++++++++----- 1 file changed, 57 insertions(+), 8 deletions(-) diff --git a/tools/src/bin/sst-metadata.rs b/tools/src/bin/sst-metadata.rs index a089ad2da5..a463ce09c9 100644 --- a/tools/src/bin/sst-metadata.rs +++ b/tools/src/bin/sst-metadata.rs @@ -2,7 +2,7 @@ //! A cli to query sst meta data -use std::{collections::HashMap, sync::Arc}; +use std::{collections::HashMap, fmt, str::FromStr, sync::Arc}; use analytic_engine::sst::{meta_data::cache::MetaData, parquet::async_reader::ChunkReaderAdapter}; use anyhow::{Context, Result}; @@ -34,6 +34,44 @@ struct Args { /// Print page indexes #[clap(short, long, required(false))] page_indexes: bool, + + /// Which field to sort ssts[valid: seq/time/size/row]. + #[clap(short, long, default_value = "time")] + sort: SortBy, +} + +#[derive(Debug)] +enum SortBy { + /// Max Sequence number + Seq, + /// Time range + Time, + /// File size + Size, + /// Row numbers + Row, +} + +impl fmt::Display for SortBy { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{self:?}") + } +} + +impl FromStr for SortBy { + type Err = String; + + fn from_str(s: &str) -> Result { + let sort_by = match s { + "seq" => Self::Seq, + "time" => Self::Time, + "size" => Self::Size, + "row" => Self::Row, + _ => return Err(format!("Invalid sort by, value:{s}")), + }; + + Ok(sort_by) + } } #[derive(Default, Debug)] @@ -119,13 +157,24 @@ async fn run(args: Args) -> Result<()> { metas.push(meta); } - // sort by time_range asc - metas.sort_by(|a, b| { - a.1.custom() - .time_range - .inclusive_start() - .cmp(&b.1.custom().time_range.inclusive_start()) - }); + match args.sort { + SortBy::Time => metas.sort_by(|a, b| { + a.1.custom() + .time_range + .inclusive_start() + .cmp(&b.1.custom().time_range.inclusive_start()) + }), + SortBy::Seq => { + metas.sort_by(|a, b| a.1.custom().max_sequence.cmp(&b.1.custom().max_sequence)) + } + SortBy::Size => metas.sort_by(|a, b| a.0.size.cmp(&b.0.size)), + SortBy::Row => metas.sort_by(|a, b| { + a.1.parquet() + .file_metadata() + .num_rows() + .cmp(&b.1.parquet().file_metadata().num_rows()) + }), + }; let mut file_stats = FileStatistics::default(); let mut field_stats_map = HashMap::new();