Skip to content

Commit e076c19

Browse files
committed
chore: development v0.2.80 - comprehensive testing complete [auto-commit]
1 parent 78fc9be commit e076c19

File tree

11 files changed

+117
-60
lines changed

11 files changed

+117
-60
lines changed

Cargo.lock

Lines changed: 8 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ exclude = [
3838
# Workspace Package Metadata (inherited by all crates)
3939
# ─────────────────────────────────────────────────────────────────────────────
4040
[workspace.package]
41-
version = "0.2.78"
41+
version = "0.2.80"
4242
edition = "2024"
4343
rust-version = "1.85"
4444
license = "MPL-2.0 OR LicenseRef-UFFS-Commercial"

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ Traditional file search tools (including `os.walk`, `FindFirstFile`, etc.) work
2121

2222
**UFFS reads the MFT directly** - once - and queries it in memory using Polars DataFrames. This is like reading the entire phonebook once instead of looking up each name individually.
2323

24-
### Benchmark Results (v0.2.78)
24+
### Benchmark Results (v0.2.80)
2525

2626
| Drive Type | Records | Time | Throughput |
2727
|------------|---------|------|------------|
@@ -33,7 +33,7 @@ Traditional file search tools (including `os.walk`, `FindFirstFile`, etc.) work
3333

3434
| Comparison | Records | Time | Notes |
3535
|------------|---------|------|-------|
36-
| **UFFS v0.2.78** | **18.7 Million** | **~142 seconds** | All disks, fast mode |
36+
| **UFFS v0.2.80** | **18.7 Million** | **~142 seconds** | All disks, fast mode |
3737
| UFFS v0.1.30 | 18.7 Million | ~315 seconds | Baseline |
3838
| Everything | 19 Million | 178 seconds | All disks |
3939
| WizFile | 6.5 Million | 299 seconds | Single HDD |

crates/uffs-cli/src/commands.rs

Lines changed: 63 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,8 @@ use uffs_mft::{MftProgress, MftReader};
271271
///
272272
/// Returns `true` if:
273273
/// - `QueryMode::ForceIndex` is set, OR
274-
/// - `QueryMode::Auto` AND query is simple (no parquet index, single drive, no tree columns)
274+
/// - `QueryMode::Auto` AND query is simple (no parquet index, single drive, no
275+
/// tree columns)
275276
///
276277
/// Returns `false` if:
277278
/// - `QueryMode::ForceDataFrame` is set, OR
@@ -433,12 +434,8 @@ pub async fn search(
433434
let needs_paths = !benchmark && output_config.needs_path_column();
434435

435436
// Decide which query path to use based on mode and query complexity
436-
let use_index_path = should_use_index_path(
437-
mode,
438-
index.as_ref(),
439-
multi_drives.as_ref(),
440-
&output_config,
441-
);
437+
let use_index_path =
438+
should_use_index_path(mode, index.as_ref(), multi_drives.as_ref(), &output_config);
442439

443440
let mut results = if use_index_path {
444441
info!("🚀 Using fast MftIndex query path");
@@ -696,9 +693,11 @@ async fn load_and_filter_data(
696693
}
697694
}
698695

699-
/// Load and filter data using fast `MftIndex` path (no `DataFrame` conversion during search).
696+
/// Load and filter data using fast `MftIndex` path (no `DataFrame` conversion
697+
/// during search).
700698
///
701-
/// This is the fast path for simple queries. Uses cached `MftIndex` when available.
699+
/// This is the fast path for simple queries. Uses cached `MftIndex` when
700+
/// available.
702701
#[cfg(windows)]
703702
#[allow(clippy::single_call_fn, clippy::print_stderr)]
704703
async fn load_and_filter_data_index(
@@ -712,28 +711,31 @@ async fn load_and_filter_data_index(
712711
// Get effective drive
713712
let effective_drive = single_drive.or_else(|| filters.parsed.drive());
714713
let drive_letter = effective_drive.ok_or_else(|| {
715-
anyhow::anyhow!("Index query mode requires a specific drive. Use --drive or include drive in pattern.")
714+
anyhow::anyhow!(
715+
"Index query mode requires a specific drive. Use --drive or include drive in pattern."
716+
)
716717
})?;
717718

718719
let t_load = std::time::Instant::now();
719720

720721
// Try to load from cache first
721-
let index = if let Some((cached_index, header)) = load_cached_index(drive_letter, INDEX_TTL_SECONDS) {
722-
info!(
723-
drive = %drive_letter,
724-
records = cached_index.len(),
725-
volume_serial = header.volume_serial,
726-
"📦 Using cached MftIndex"
727-
);
728-
cached_index
729-
} else {
730-
// Cache miss - read fresh
731-
info!(drive = %drive_letter, "🔄 Cache miss - reading MFT");
732-
let reader = MftReader::open(drive_letter)
733-
.await
734-
.with_context(|| format!("Failed to open drive {drive_letter}:"))?;
735-
reader.read_all_index().await?
736-
};
722+
let index =
723+
if let Some((cached_index, header)) = load_cached_index(drive_letter, INDEX_TTL_SECONDS) {
724+
info!(
725+
drive = %drive_letter,
726+
records = cached_index.len(),
727+
volume_serial = header.volume_serial,
728+
"📦 Using cached MftIndex"
729+
);
730+
cached_index
731+
} else {
732+
// Cache miss - read fresh
733+
info!(drive = %drive_letter, "🔄 Cache miss - reading MFT");
734+
let reader = MftReader::open(drive_letter)
735+
.await
736+
.with_context(|| format!("Failed to open drive {drive_letter}:"))?;
737+
reader.read_all_index().await?
738+
};
737739
let load_ms = t_load.elapsed().as_millis();
738740

739741
// Execute query on index
@@ -744,8 +746,14 @@ async fn load_and_filter_data_index(
744746
if profile {
745747
let total_ms = load_ms + query_ms;
746748
eprintln!("=== PROFILE: Drive {drive_letter} (Index Path): ===");
747-
eprintln!(" Index load: {load_ms:>6} ms ({} records)", index.len());
748-
eprintln!(" Query/filter: {query_ms:>6} ms ({} matches)", results.height());
749+
eprintln!(
750+
" Index load: {load_ms:>6} ms ({} records)",
751+
index.len()
752+
);
753+
eprintln!(
754+
" Query/filter: {query_ms:>6} ms ({} matches)",
755+
results.height()
756+
);
749757
eprintln!(" TOTAL: {total_ms:>6} ms");
750758
}
751759

@@ -828,20 +836,24 @@ fn execute_index_query(
828836
filters: &QueryFilters<'_>,
829837
resolve_paths: bool,
830838
) -> Result<uffs_mft::DataFrame> {
831-
use uffs_core::{IndexQuery, TypeFilter, compile_extensions, compile_parsed_pattern};
839+
use uffs_core::{IndexQuery, TypeFilter, compile_parsed_pattern};
832840

833841
let mut query = IndexQuery::new(index);
834842

835843
// Apply pattern filter
836844
let pattern = compile_parsed_pattern(filters.parsed);
837-
query = query.with_pattern(pattern);
845+
query = query.with_pattern_result(pattern);
838846

839-
// Apply extension filter if specified
847+
// Apply extension filter if specified (extensions are handled via pattern)
840848
if let Some(ext_str) = filters.ext_filter {
841849
let parsed_ext_filter = ExtensionFilter::parse(ext_str)
842850
.map_err(|err| anyhow::anyhow!("Invalid extension filter: {err}"))?;
843-
let ext_pattern = compile_extensions(parsed_ext_filter.extensions());
844-
query = query.with_extension_pattern(ext_pattern);
851+
let exts: Vec<&str> = parsed_ext_filter
852+
.extensions()
853+
.iter()
854+
.map(String::as_str)
855+
.collect();
856+
query = query.extensions(&exts);
845857
}
846858

847859
// Apply type filters
@@ -865,10 +877,10 @@ fn execute_index_query(
865877
}
866878

867879
// Apply case sensitivity
868-
query = query.case_sensitive(filters.parsed.case_sensitive());
880+
query = query.case_sensitive(filters.parsed.is_case_sensitive());
869881

870882
// Apply path resolution
871-
query = query.resolve_paths(resolve_paths);
883+
query = query.with_resolve_paths(resolve_paths);
872884

873885
// Execute and convert to DataFrame
874886
let results = query.collect();
@@ -885,20 +897,22 @@ fn results_to_dataframe(
885897
use uffs_polars::{IntoColumn, NamedFrom, Series};
886898

887899
// Build columns from results
888-
let mut names: Vec<&str> = Vec::with_capacity(results.len());
900+
let mut names: Vec<String> = Vec::with_capacity(results.len());
889901
let mut paths: Vec<String> = Vec::with_capacity(results.len());
890902
let mut sizes: Vec<u64> = Vec::with_capacity(results.len());
891903
let mut is_dirs: Vec<bool> = Vec::with_capacity(results.len());
892904
let mut frs_values: Vec<u64> = Vec::with_capacity(results.len());
893905

894906
for result in results {
895-
names.push(result.name);
907+
names.push(result.name.clone());
896908
paths.push(result.path.clone().unwrap_or_default());
897909
sizes.push(result.size);
898910
is_dirs.push(result.is_directory);
899911
frs_values.push(result.frs);
900912
}
901913

914+
let height = results.len();
915+
902916
// Create DataFrame
903917
let name_series = Series::new("name".into(), names);
904918
let path_series = Series::new("path".into(), paths);
@@ -908,17 +922,20 @@ fn results_to_dataframe(
908922

909923
// Add volume column
910924
let volume_str = format!("{}:", index.volume);
911-
let volumes: Vec<&str> = vec![volume_str.as_str(); results.len()];
925+
let volumes: Vec<&str> = vec![volume_str.as_str(); height];
912926
let volume_series = Series::new("volume".into(), volumes);
913927

914-
uffs_mft::DataFrame::new(vec![
915-
name_series.into_column(),
916-
path_series.into_column(),
917-
size_series.into_column(),
918-
is_dir_series.into_column(),
919-
frs_series.into_column(),
920-
volume_series.into_column(),
921-
])
928+
uffs_mft::DataFrame::new(
929+
height,
930+
vec![
931+
name_series.into_column(),
932+
path_series.into_column(),
933+
size_series.into_column(),
934+
is_dir_series.into_column(),
935+
frs_series.into_column(),
936+
volume_series.into_column(),
937+
],
938+
)
922939
.map_err(|err| anyhow::anyhow!("Failed to create DataFrame: {err}"))
923940
}
924941

0 commit comments

Comments
 (0)