Skip to content

Commit d8cba3c

Browse files
committed
chore: development v0.2.25 - comprehensive testing complete [auto-commit]
1 parent 5bfaaaa commit d8cba3c

File tree

18 files changed

+148
-45
lines changed

18 files changed

+148
-45
lines changed

Cargo.lock

Lines changed: 8 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ members = [
3232
# Workspace Package Metadata (inherited by all crates)
3333
# ─────────────────────────────────────────────────────────────────────────────
3434
[workspace.package]
35-
version = "0.2.24"
35+
version = "0.2.25"
3636
edition = "2024"
3737
rust-version = "1.85"
3838
license = "MPL-2.0 OR LicenseRef-UFFS-Commercial"

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ Traditional file search tools (including `os.walk`, `FindFirstFile`, etc.) work
2121

2222
**UFFS reads the MFT directly** - once - and queries it in memory using Polars DataFrames. This is like reading the entire phonebook once instead of looking up each name individually.
2323

24-
### Benchmark Results (v0.2.24)
24+
### Benchmark Results (v0.2.25)
2525

2626
| Drive Type | Records | Time | Throughput |
2727
|------------|---------|------|------------|
@@ -33,7 +33,7 @@ Traditional file search tools (including `os.walk`, `FindFirstFile`, etc.) work
3333

3434
| Comparison | Records | Time | Notes |
3535
|------------|---------|------|-------|
36-
| **UFFS v0.2.24** | **18.7 Million** | **~142 seconds** | All disks, fast mode |
36+
| **UFFS v0.2.25** | **18.7 Million** | **~142 seconds** | All disks, fast mode |
3737
| UFFS v0.1.30 | 18.7 Million | ~315 seconds | Baseline |
3838
| Everything | 19 Million | 178 seconds | All disks |
3939
| WizFile | 6.5 Million | 299 seconds | Single HDD |

crates/uffs-cli/src/commands.rs

Lines changed: 51 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,8 @@ pub async fn search(
291291
dirs_only: bool,
292292
hide_system: bool,
293293
profile: bool,
294+
benchmark: bool,
295+
no_bitmap: bool,
294296
min_size: Option<u64>,
295297
max_size: Option<u64>,
296298
limit: u32,
@@ -335,8 +337,9 @@ pub async fn search(
335337
.with_neg(neg);
336338

337339
// Streaming mode for multi-drive searches (Windows only)
340+
// Skip streaming in benchmark mode - we want to measure without output overhead
338341
#[cfg(windows)]
339-
{
342+
if !benchmark {
340343
let needs_streaming = index.is_none()
341344
&& (multi_drives.is_some()
342345
|| (single_drive.is_none() && filters.parsed.drive().is_none()));
@@ -350,6 +353,7 @@ pub async fn search(
350353
format,
351354
out,
352355
&output_config,
356+
no_bitmap,
353357
)
354358
.await;
355359
// Print timing after streaming completes
@@ -361,37 +365,52 @@ pub async fn search(
361365

362366
// Non-streaming mode: load all data, then output
363367
// Pass needs_paths so path resolution happens BEFORE filtering loses parent
364-
// directories
365-
let needs_paths = output_config.needs_path_column();
368+
// directories (skip path resolution in benchmark mode for speed)
369+
let needs_paths = !benchmark && output_config.needs_path_column();
366370
let mut results = load_and_filter_data(
367371
index,
368372
multi_drives,
369373
single_drive,
370374
&filters,
371375
needs_paths,
372376
profile,
377+
no_bitmap,
373378
)
374379
.await?;
375380

376-
// Compute tree columns only if specifically requested
381+
// Compute tree columns only if specifically requested (skip in benchmark mode)
377382
let t_tree = std::time::Instant::now();
378-
if output_config.needs_tree_columns() {
383+
if !benchmark && output_config.needs_tree_columns() {
379384
let tree_cols = output_config.get_tree_columns();
380385
info!(columns = tree_cols.len(), "Computing tree metrics");
381386
results =
382387
add_tree_columns(&results, &tree_cols).context("Failed to compute tree columns")?;
383388
}
384389
let tree_ms = t_tree.elapsed().as_millis();
385390

386-
// Output results
391+
// Output results (skip in benchmark mode)
387392
let t_output = std::time::Instant::now();
388-
write_results(&results, format, out, &output_config)?;
393+
if !benchmark {
394+
write_results(&results, format, out, &output_config)?;
395+
}
389396
let output_ms = t_output.elapsed().as_millis();
390397

391398
// Print timing (C++ compatibility: "Finished in X s")
392399
let elapsed = start_time.elapsed();
393400

394-
if profile {
401+
if benchmark {
402+
// Benchmark mode: print summary without output overhead
403+
let row_count = results.height();
404+
let total_ms = elapsed.as_millis();
405+
let secs = elapsed.as_secs_f64();
406+
eprintln!("=== BENCHMARK MODE (no output) ===");
407+
eprintln!(" Records found: {row_count:>10}");
408+
eprintln!(" Total time: {total_ms:>10} ms ({secs:.2} s)");
409+
// Throughput calculation intentionally uses floating-point
410+
#[allow(clippy::cast_precision_loss, clippy::float_arithmetic)]
411+
let throughput = row_count as f64 / secs;
412+
eprintln!(" Throughput: {throughput:>10.0} records/sec");
413+
} else if profile {
395414
let row_count = results.height();
396415
let total_ms = elapsed.as_millis();
397416
eprintln!("=== PROFILE: Output ===");
@@ -419,6 +438,7 @@ async fn search_streaming(
419438
format: &str,
420439
out: &str,
421440
output_config: &OutputConfig,
441+
no_bitmap: bool,
422442
) -> Result<()> {
423443
// Determine drives to search
424444
let drives: Vec<char> = if let Some(drives) = multi_drives {
@@ -452,12 +472,14 @@ async fn search_streaming(
452472

453473
if is_console {
454474
let stdout = std::io::stdout();
455-
search_multi_drive_streaming(&drives, filters, format, stdout, output_config).await
475+
search_multi_drive_streaming(&drives, filters, format, stdout, output_config, no_bitmap)
476+
.await
456477
} else {
457478
let file =
458479
File::create(out).with_context(|| format!("Failed to create output file: {out}"))?;
459480
let writer = BufWriter::new(file);
460-
search_multi_drive_streaming(&drives, filters, format, writer, output_config).await?;
481+
search_multi_drive_streaming(&drives, filters, format, writer, output_config, no_bitmap)
482+
.await?;
461483
info!(file = out, "Results written to file");
462484
Ok(())
463485
}
@@ -475,6 +497,8 @@ async fn search_streaming(
475497
/// built from FULL MFT data BEFORE filtering. This ensures parent directories
476498
/// are available for path resolution.
477499
/// * `profile` - If true, prints detailed timing breakdown to stderr.
500+
/// * `no_bitmap` - If true, disables MFT bitmap optimization (reads all
501+
/// records).
478502
#[allow(clippy::single_call_fn, clippy::print_stderr)]
479503
async fn load_and_filter_data(
480504
index: Option<PathBuf>,
@@ -483,6 +507,7 @@ async fn load_and_filter_data(
483507
filters: &QueryFilters<'_>,
484508
needs_paths: bool,
485509
profile: bool,
510+
no_bitmap: bool,
486511
) -> Result<uffs_mft::DataFrame> {
487512
if let Some(index_path) = index {
488513
// Load from pre-built index and filter
@@ -495,7 +520,7 @@ async fn load_and_filter_data(
495520

496521
if let Some(drives) = multi_drives {
497522
// Multi-drive search with per-drive filtering (memory efficient)
498-
return search_multi_drive_filtered(&drives, filters, needs_paths).await;
523+
return search_multi_drive_filtered(&drives, filters, needs_paths, no_bitmap).await;
499524
}
500525

501526
// Check for single drive: CLI flag overrides pattern-embedded drive
@@ -505,7 +530,8 @@ async fn load_and_filter_data(
505530
let t_open = std::time::Instant::now();
506531
let reader = MftReader::open(drive_letter)
507532
.await
508-
.with_context(|| format!("Failed to open drive {drive_letter}:"))?;
533+
.with_context(|| format!("Failed to open drive {drive_letter}:"))?
534+
.with_use_bitmap(!no_bitmap);
509535
let open_ms = t_open.elapsed().as_millis();
510536

511537
let t_read = std::time::Instant::now();
@@ -575,7 +601,7 @@ async fn load_and_filter_data(
575601
bail!("No NTFS drives found on this system");
576602
}
577603
info!(drives = ?all_drives, count = all_drives.len(), "No drive specified - searching all NTFS drives");
578-
search_multi_drive_filtered(&all_drives, filters, needs_paths).await
604+
search_multi_drive_filtered(&all_drives, filters, needs_paths, no_bitmap).await
579605
}
580606
#[cfg(not(windows))]
581607
{
@@ -793,11 +819,17 @@ struct DriveResult {
793819
/// When `needs_paths` is true, builds a FastPathResolver from the FULL MFT data
794820
/// BEFORE filtering. This ensures parent directories are available for path
795821
/// resolution, fixing the `<unknown>` path bug.
822+
///
823+
/// # Arguments
824+
///
825+
/// * `no_bitmap` - If true, disables MFT bitmap optimization (reads all
826+
/// records).
796827
#[cfg(windows)]
797828
async fn search_multi_drive_filtered(
798829
drives: &[char],
799830
filters: &QueryFilters<'_>,
800831
needs_paths: bool,
832+
no_bitmap: bool,
801833
) -> Result<uffs_mft::DataFrame> {
802834
use std::sync::Arc;
803835

@@ -838,13 +870,14 @@ async fn search_multi_drive_filtered(
838870
let tx = tx.clone();
839871
let filters = Arc::clone(&owned_filters);
840872
let pbs = progress_bars.clone();
873+
let use_bitmap = !no_bitmap; // Capture for the spawned task
841874

842875
tokio::spawn(async move {
843876
let pb = pbs.as_ref().and_then(|p| p.get(&drive_char));
844877

845878
// Open the drive
846879
let reader = match MftReader::open(drive_char).await {
847-
Ok(r) => r,
880+
Ok(r) => r.with_use_bitmap(use_bitmap),
848881
Err(e) => {
849882
if let Some(p) = pb {
850883
p.finish_with_message(format!("Error: {e}"));
@@ -1109,6 +1142,7 @@ async fn search_multi_drive_filtered(
11091142
_drives: &[char],
11101143
_filters: &QueryFilters<'_>,
11111144
_needs_paths: bool,
1145+
_no_bitmap: bool,
11121146
) -> Result<uffs_mft::DataFrame> {
11131147
bail!("Multi-drive search is only supported on Windows")
11141148
}
@@ -1124,6 +1158,7 @@ async fn search_multi_drive_streaming<W: Write + Send + 'static>(
11241158
format: &str,
11251159
writer: W,
11261160
output_config: &OutputConfig,
1161+
no_bitmap: bool,
11271162
) -> Result<()> {
11281163
use tokio::sync::mpsc;
11291164
use uffs_mft::{IntoLazy, col, lit};
@@ -1155,11 +1190,12 @@ async fn search_multi_drive_streaming<W: Write + Send + 'static>(
11551190
for &drive_char in drives {
11561191
let tx = tx.clone();
11571192
let filters = Arc::clone(&owned_filters);
1193+
let use_bitmap = !no_bitmap; // Capture for the spawned task
11581194

11591195
tokio::spawn(async move {
11601196
// Open the drive
11611197
let reader = match MftReader::open(drive_char).await {
1162-
Ok(r) => r,
1198+
Ok(r) => r.with_use_bitmap(use_bitmap),
11631199
Err(e) => {
11641200
let _ = tx
11651201
.send(DriveResult {

crates/uffs-cli/src/main.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,16 @@ struct Cli {
129129
#[arg(long)]
130130
profile: bool,
131131

132+
/// Benchmark mode: skip output, only measure MFT reading and filtering
133+
/// Use this for profiling without stdout I/O overhead
134+
#[arg(long)]
135+
benchmark: bool,
136+
137+
/// Disable MFT bitmap optimization (read ALL records)
138+
/// Use this for debugging if records appear to be missing
139+
#[arg(long)]
140+
no_bitmap: bool,
141+
132142
/// Minimum file size in bytes
133143
#[arg(long)]
134144
min_size: Option<u64>,
@@ -234,6 +244,16 @@ enum Commands {
234244
#[arg(long)]
235245
profile: bool,
236246

247+
/// Benchmark mode: skip output, only measure MFT reading and filtering
248+
/// Use this for profiling without stdout I/O overhead
249+
#[arg(long)]
250+
benchmark: bool,
251+
252+
/// Disable MFT bitmap optimization (read ALL records)
253+
/// Use this for debugging if records appear to be missing
254+
#[arg(long)]
255+
no_bitmap: bool,
256+
237257
/// Minimum file size in bytes
238258
#[arg(long)]
239259
min_size: Option<u64>,
@@ -464,6 +484,8 @@ async fn run() -> Result<()> {
464484
dirs_only,
465485
hide_system,
466486
profile,
487+
benchmark,
488+
no_bitmap,
467489
min_size,
468490
max_size,
469491
limit,
@@ -487,6 +509,8 @@ async fn run() -> Result<()> {
487509
dirs_only,
488510
hide_system,
489511
profile,
512+
benchmark,
513+
no_bitmap,
490514
min_size,
491515
max_size,
492516
limit,
@@ -528,6 +552,8 @@ async fn run() -> Result<()> {
528552
cli.dirs_only,
529553
cli.hide_system,
530554
cli.profile,
555+
cli.benchmark,
556+
cli.no_bitmap,
531557
cli.min_size,
532558
cli.max_size,
533559
cli.limit,

0 commit comments

Comments
 (0)