Skip to content

Commit

Permalink
feat: sst-metadata support sort (apache#1042)
Browse files Browse the repository at this point in the history
## Rationale

When debugging SST, it's useful to check sst ordered by
time/max_seq/size.

## Detailed Changes
- add a option `sort` 

## Test Plan
  • Loading branch information
jiacai2050 authored and dust1 committed Aug 9, 2023
1 parent 56230b2 commit 9e9c79c
Showing 1 changed file with 57 additions and 8 deletions.
65 changes: 57 additions & 8 deletions tools/src/bin/sst-metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

//! A cli to query sst meta data

use std::{collections::HashMap, sync::Arc};
use std::{collections::HashMap, fmt, str::FromStr, sync::Arc};

use analytic_engine::sst::{meta_data::cache::MetaData, parquet::async_reader::ChunkReaderAdapter};
use anyhow::{Context, Result};
Expand Down Expand Up @@ -34,6 +34,44 @@ struct Args {
/// Print page indexes
#[clap(short, long, required(false))]
page_indexes: bool,

/// Which field to sort ssts[valid: seq/time/size/row].
#[clap(short, long, default_value = "time")]
sort: SortBy,
}

#[derive(Debug)]
enum SortBy {
/// Max Sequence number
Seq,
/// Time range
Time,
/// File size
Size,
/// Row numbers
Row,
}

impl fmt::Display for SortBy {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{self:?}")
}
}

impl FromStr for SortBy {
type Err = String;

fn from_str(s: &str) -> Result<Self, Self::Err> {
let sort_by = match s {
"seq" => Self::Seq,
"time" => Self::Time,
"size" => Self::Size,
"row" => Self::Row,
_ => return Err(format!("Invalid sort by, value:{s}")),
};

Ok(sort_by)
}
}

#[derive(Default, Debug)]
Expand Down Expand Up @@ -119,13 +157,24 @@ async fn run(args: Args) -> Result<()> {
metas.push(meta);
}

// sort by time_range asc
metas.sort_by(|a, b| {
a.1.custom()
.time_range
.inclusive_start()
.cmp(&b.1.custom().time_range.inclusive_start())
});
match args.sort {
SortBy::Time => metas.sort_by(|a, b| {
a.1.custom()
.time_range
.inclusive_start()
.cmp(&b.1.custom().time_range.inclusive_start())
}),
SortBy::Seq => {
metas.sort_by(|a, b| a.1.custom().max_sequence.cmp(&b.1.custom().max_sequence))
}
SortBy::Size => metas.sort_by(|a, b| a.0.size.cmp(&b.0.size)),
SortBy::Row => metas.sort_by(|a, b| {
a.1.parquet()
.file_metadata()
.num_rows()
.cmp(&b.1.parquet().file_metadata().num_rows())
}),
};

let mut file_stats = FileStatistics::default();
let mut field_stats_map = HashMap::new();
Expand Down

0 comments on commit 9e9c79c

Please sign in to comment.