Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions rust/timsseek/src/scoring/scorer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,9 @@ impl SecondaryQuery {
}
}

pub struct Scorer<I: GenerallyQueriable<IonAnnot>> {
pub struct Scorer<I: GenerallyQueriable<IonAnnot> + ?Sized> {
pub index_cycle_rt_ms: Arc<[u32]>,
pub index: I,
pub index: Box<I>,
pub tolerance: Tolerance,
// The secondsty tolerance is used for ...
// the secondary query and is meant to be
Expand All @@ -81,7 +81,7 @@ pub struct Scorer<I: GenerallyQueriable<IonAnnot>> {
pub fragmented_range: IncludedRange<f64>,
}

impl<I: GenerallyQueriable<IonAnnot>> Scorer<I> {
impl<I: GenerallyQueriable<IonAnnot> + ?Sized> Scorer<I> {
// does inlining do anything here?
#[inline]
fn _build_prescore(&self, item: &QueryItemToScore) -> PreScore {
Expand Down Expand Up @@ -224,7 +224,7 @@ impl FromParallelIterator<(Option<IonSearchResults>, ScoreTimings)> for IonSearc
}
}

impl<I: GenerallyQueriable<IonAnnot>> Scorer<I> {
impl<I: GenerallyQueriable<IonAnnot> + ?Sized> Scorer<I> {
/// Scores a single query item by orchestrating the internal steps.
/// Useful for testing or single-item processing scenarios.
pub fn buffered_score(
Expand Down
6 changes: 6 additions & 0 deletions rust/timsseek/tests/test_scorer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
// use adder::add_two;

#[test]
fn it_adds_two() {
assert_eq!(1, 1);
}
57 changes: 57 additions & 0 deletions rust/timsseek_cli/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,74 @@ use serde::{
Deserialize,
Serialize,
};
use timsseek::IonAnnot;
use std::path::PathBuf;
use timsquery::Tolerance;
use timsquery::models::indices::{
ExpandedRawFrameIndex,
QuadSplittedTransposedIndex,
};
use timsquery::GenerallyQueriable;
use std::sync::Arc;
use timsquery::IncludedRange;

use crate::cli::Cli;
use crate::errors;

#[derive(Debug, Serialize, Deserialize, Clone, Default)]
pub enum IndexType {
#[default]
Transposed,
Expanded,
}

pub struct IndexElements {
pub index: Box<dyn GenerallyQueriable<IonAnnot>>,
pub index_cycle_rt_ms: Arc<[u32]>,
pub fragmented_range: IncludedRange<f64>,

}

impl IndexType {
pub fn build_index(
&self,
raw_file_path: &str,
) -> IndexElements {
match self {
IndexType::Expanded => {
// Throughput seems to be ~ 30% better if I use the centroided version
// But I like the idea of having the full resolution data available +
// for the high throughput use case, we have the transposed index.
let tmp = ExpandedRawFrameIndex::from_path(raw_file_path).unwrap();
let rts = tmp.cycle_rt_ms.clone();
let fragmented_range = tmp.fragmented_range();
IndexElements {
index: Box::new(tmp),
index_cycle_rt_ms: rts,
fragmented_range,
}
}
IndexType::Transposed => {
let tmp = QuadSplittedTransposedIndex::from_path_centroided(raw_file_path).unwrap();
let rts = tmp.cycle_rt_ms.clone();
let fragmented_range = tmp.fragmented_range();
IndexElements {
index: Box::new(tmp),
index_cycle_rt_ms: rts,
fragmented_range,
}
}
}
}
}

#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct Config {
pub input: Option<InputConfig>,
pub analysis: AnalysisConfig,
pub output: Option<OutputConfig>,
#[serde(default)]
pub index_type: IndexType,
}

#[derive(Debug, Serialize, Deserialize, Clone)]
Expand Down
18 changes: 7 additions & 11 deletions rust/timsseek_cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,17 +95,12 @@ fn main() -> std::result::Result<(), errors::CliError> {
};

let dotd_file_location = &config.analysis.dotd_file;
let index = QuadSplittedTransposedIndex::from_path_centroided(
// let index = QuadSplittedTransposedIndex::from_path(
dotd_file_location

let index_elems = config.index_type.build_index(dotd_file_location
.clone()
.unwrap() // TODO: Error handling
.to_str()
.expect("Path is not convertable to string"),
)
.unwrap();

let fragmented_range = index.fragmented_range();
.expect("Path is not convertable to string"));

// Process based on input type
match config.input {
Expand All @@ -118,14 +113,14 @@ fn main() -> std::result::Result<(), errors::CliError> {
// }
Some(InputConfig::Speclib { path }) => {
let scorer = Scorer {
index_cycle_rt_ms: index.cycle_rt_ms.clone(),
index,
index_cycle_rt_ms: index_elems.index_cycle_rt_ms,
index: index_elems.index,
tolerance: config.analysis.tolerance.clone(),
secondary_tolerance: config
.analysis
.tolerance
.with_rt_tolerance(RtTolerance::Minutes((0.5, 0.5))),
fragmented_range,
fragmented_range: index_elems.fragmented_range,
};
processing::process_speclib(path, &scorer, config.analysis.chunk_size, &output_config)
.unwrap();
Expand All @@ -139,3 +134,4 @@ fn main() -> std::result::Result<(), errors::CliError> {

Ok(())
}

6 changes: 3 additions & 3 deletions rust/timsseek_cli/src/processing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use tracing::{
info,
};

pub fn main_loop<I: GenerallyQueriable<IonAnnot>>(
pub fn main_loop<I: GenerallyQueriable<IonAnnot> + ?Sized>(
// query_iterator: impl ExactSizeIterator<Item = QueryItemToScore>,
// # I would like this to be streaming
query_iterator: Speclib,
Expand Down Expand Up @@ -70,9 +70,9 @@ pub fn main_loop<I: GenerallyQueriable<IonAnnot>>(
Ok(())
}

pub fn process_speclib(
pub fn process_speclib<I: GenerallyQueriable<IonAnnot> + ?Sized>(
path: PathBuf,
scorer: &Scorer<QuadSplittedTransposedIndex>,
scorer: &Scorer<I>,
chunk_size: usize,
output: &OutputConfig,
) -> std::result::Result<(), TimsSeekError> {
Expand Down
2 changes: 1 addition & 1 deletion rust/timsseek_rts/src/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ pub fn new_index(

Ok(Scorer {
index_cycle_rt_ms: ref_time_ms,
index,
index: Box::new(index),
tolerance: tolerance.clone(),
secondary_tolerance: tolerance.with_rt_tolerance(
timsquery::models::tolerance::RtTolerance::Minutes((0.5, 0.5)),
Expand Down