Skip to content

Commit 952e155

Browse files
committed
chore: development v0.2.87 - comprehensive testing complete [auto-commit]
1 parent f9d1f88 commit 952e155

File tree

13 files changed

+703
-98
lines changed

13 files changed

+703
-98
lines changed

Cargo.lock

Lines changed: 16 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,19 +26,20 @@ members = [
2626
"crates/uffs-tui", # 📟 Terminal UI
2727
"crates/uffs-gui", # 🪟 Graphical UI (future)
2828
"crates/uffs-legacy", # 📜 Legacy code (reference only)
29+
"crates/uffs-diag", # 🔬 Diagnostic tools (temporarily enabled)
2930
]
3031
exclude = [
3132
# Vendored reference tools / external code (kept out of the main workspace)
3233
"vendor/mft-reader-rs",
3334
# Diagnostic/forensic tools - rarely needed, move to members when required
34-
"crates/uffs-diag",
35+
# "crates/uffs-diag", # Temporarily enabled for analysis
3536
]
3637

3738
# ─────────────────────────────────────────────────────────────────────────────
3839
# Workspace Package Metadata (inherited by all crates)
3940
# ─────────────────────────────────────────────────────────────────────────────
4041
[workspace.package]
41-
version = "0.2.86"
42+
version = "0.2.87"
4243
edition = "2024"
4344
rust-version = "1.85"
4445
license = "MPL-2.0 OR LicenseRef-UFFS-Commercial"

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ Traditional file search tools (including `os.walk`, `FindFirstFile`, etc.) work
2121

2222
**UFFS reads the MFT directly** - once - and queries it in memory using Polars DataFrames. This is like reading the entire phonebook once instead of looking up each name individually.
2323

24-
### Benchmark Results (v0.2.86)
24+
### Benchmark Results (v0.2.87)
2525

2626
| Drive Type | Records | Time | Throughput |
2727
|------------|---------|------|------------|
@@ -33,7 +33,7 @@ Traditional file search tools (including `os.walk`, `FindFirstFile`, etc.) work
3333

3434
| Comparison | Records | Time | Notes |
3535
|------------|---------|------|-------|
36-
| **UFFS v0.2.86** | **18.7 Million** | **~142 seconds** | All disks, fast mode |
36+
| **UFFS v0.2.87** | **18.7 Million** | **~142 seconds** | All disks, fast mode |
3737
| UFFS v0.1.30 | 18.7 Million | ~315 seconds | Baseline |
3838
| Everything | 19 Million | 178 seconds | All disks |
3939
| WizFile | 6.5 Million | 299 seconds | Single HDD |

crates/uffs-core/src/index_search.rs

Lines changed: 115 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -397,24 +397,35 @@ pub fn compile_extensions(extensions: &[&str]) -> IndexPattern {
397397
// ============================================================================
398398

399399
/// Result of a search on `MftIndex`.
400+
///
401+
/// Each result represents a unique (record, name, stream) combination.
402+
/// Files with hard links produce multiple results (different paths, same FRS).
403+
/// Files with ADS produce multiple results (same path, different stream names).
400404
#[derive(Debug, Clone)]
401405
pub struct SearchResult {
402406
/// The file/directory name.
403407
pub name: String,
404-
/// The full path (if resolved).
408+
/// The full path (if resolved), including `:stream_name` for ADS.
405409
pub path: Option<String>,
406-
/// File size in bytes.
410+
/// File size in bytes (for this specific stream).
407411
pub size: u64,
408412
/// File Reference Segment number.
409413
pub frs: u64,
410-
/// Parent FRS.
414+
/// Parent FRS (for this specific hard link).
411415
pub parent_frs: u64,
412416
/// Whether this is a directory.
413417
pub is_directory: bool,
418+
/// Stream name (empty for default `$DATA` stream).
419+
pub stream_name: String,
420+
/// Which hard link (0 = primary name).
421+
pub name_index: u16,
422+
/// Which stream (0 = default `$DATA`).
423+
pub stream_index: u16,
414424
}
415425

416426
impl SearchResult {
417-
/// Create a new search result from a file record.
427+
/// Create a new search result from a file record (primary name, default
428+
/// stream).
418429
#[must_use]
419430
pub fn from_record(record: &FileRecord, index: &MftIndex) -> Self {
420431
Self {
@@ -424,6 +435,37 @@ impl SearchResult {
424435
frs: record.frs,
425436
parent_frs: u64::from(record.first_name.parent_frs),
426437
is_directory: record.is_directory(),
438+
stream_name: String::new(),
439+
name_index: 0,
440+
stream_index: 0,
441+
}
442+
}
443+
444+
/// Create a search result for a specific (name, stream) combination.
445+
#[must_use]
446+
pub fn from_expanded(
447+
record: &FileRecord,
448+
index: &MftIndex,
449+
name_idx: u16,
450+
stream_idx: u16,
451+
) -> Self {
452+
let name_info = index
453+
.get_name_at(record, name_idx)
454+
.unwrap_or(&record.first_name);
455+
let stream_info = index
456+
.get_stream_at(record, stream_idx)
457+
.unwrap_or(&record.first_stream);
458+
459+
Self {
460+
name: index.get_name(&name_info.name).to_owned(),
461+
path: None,
462+
size: stream_info.size.length,
463+
frs: record.frs,
464+
parent_frs: u64::from(name_info.parent_frs),
465+
is_directory: record.is_directory(),
466+
stream_name: index.stream_name(stream_info).to_owned(),
467+
name_index: name_idx,
468+
stream_index: stream_idx,
427469
}
428470
}
429471

@@ -433,6 +475,18 @@ impl SearchResult {
433475
self.path = Some(path);
434476
self
435477
}
478+
479+
/// Check if this is an Alternate Data Stream (ADS).
480+
#[must_use]
481+
pub fn is_ads(&self) -> bool {
482+
!self.stream_name.is_empty()
483+
}
484+
485+
/// Check if this is a hard link (not the primary name).
486+
#[must_use]
487+
pub const fn is_hard_link(&self) -> bool {
488+
self.name_index > 0
489+
}
436490
}
437491

438492
// ============================================================================
@@ -453,13 +507,18 @@ pub enum TypeFilter {
453507

454508
/// Query options for `IndexQuery`.
455509
#[derive(Debug, Clone, Copy, Default)]
510+
#[allow(clippy::struct_excessive_bools)] // Configuration struct with boolean flags
456511
pub struct QueryOptions {
457512
/// Type filter (files, dirs, or both).
458513
pub type_filter: TypeFilter,
459514
/// Whether to use case-sensitive matching.
460515
pub case_sensitive: bool,
461516
/// Whether to resolve full paths.
462517
pub resolve_paths: bool,
518+
/// Whether to expand hard links (multiple names per FRS).
519+
pub expand_names: bool,
520+
/// Whether to expand Alternate Data Streams (ADS).
521+
pub expand_streams: bool,
463522
}
464523

465524
/// Fluent query builder for searching `MftIndex` directly.
@@ -492,6 +551,8 @@ impl<'a> IndexQuery<'a> {
492551
type_filter: TypeFilter::All,
493552
case_sensitive: false, // Windows default
494553
resolve_paths: false,
554+
expand_names: true, // Match C++ behavior by default
555+
expand_streams: true, // Match C++ behavior by default
495556
},
496557
min_size: None,
497558
max_size: None,
@@ -606,23 +667,48 @@ impl<'a> IndexQuery<'a> {
606667
self
607668
}
608669

670+
/// Enable/disable hard link expansion (default: true).
671+
///
672+
/// When enabled, files with multiple hard links produce multiple results,
673+
/// one for each path.
674+
#[must_use]
675+
pub const fn with_expand_names(mut self, expand: bool) -> Self {
676+
self.options.expand_names = expand;
677+
self
678+
}
679+
680+
/// Enable/disable ADS expansion (default: true).
681+
///
682+
/// When enabled, files with Alternate Data Streams produce multiple
683+
/// results, one for each stream.
684+
#[must_use]
685+
pub const fn with_expand_streams(mut self, expand: bool) -> Self {
686+
self.options.expand_streams = expand;
687+
self
688+
}
689+
609690
/// Execute the query and collect results.
610691
///
611692
/// Uses Rayon for parallel execution across all records.
612693
/// Filters are applied in optimal order: type → size → pattern.
694+
/// When expansion is enabled, each (name × stream) combination produces a
695+
/// result.
613696
#[must_use]
614697
pub fn collect(self) -> Vec<SearchResult> {
615698
let records = self.index.records();
616699
let case_sensitive = self.options.case_sensitive;
617700
let type_filter = self.options.type_filter;
618701
let resolve_paths = self.options.resolve_paths;
702+
let expand_names = self.options.expand_names;
703+
let expand_streams = self.options.expand_streams;
619704
let pattern = &self.pattern;
620705
let min_size = self.min_size;
621706
let max_size = self.max_size;
622707
let limit = self.limit;
623708
let index = self.index;
624709

625710
// Parallel filter with early termination via take_any
711+
// Then expand (names × streams) for each matching record
626712
let filtered: Vec<SearchResult> = records
627713
.par_iter()
628714
.filter(|record| {
@@ -634,6 +720,7 @@ impl<'a> IndexQuery<'a> {
634720
}
635721

636722
// 2. Size filter (cheap - u64 compare)
723+
// Note: We check the first stream's size here; ADS may have different sizes
637724
let size = record.first_stream.size.length;
638725
if let Some(min) = min_size {
639726
if size < min {
@@ -647,6 +734,7 @@ impl<'a> IndexQuery<'a> {
647734
}
648735

649736
// 3. Pattern filter (expensive - string ops)
737+
// Note: We match against the primary name; hard links may have different names
650738
if let Some(pat) = pattern {
651739
let name = index.record_name(record);
652740
if !pat.matches(name, case_sensitive) {
@@ -657,13 +745,29 @@ impl<'a> IndexQuery<'a> {
657745
true
658746
})
659747
.take_any(limit.unwrap_or(usize::MAX))
660-
.map(|record| {
661-
let mut result = SearchResult::from_record(record, index);
662-
if resolve_paths {
663-
let path = index.build_path(record.frs);
664-
result = result.with_path(path);
665-
}
666-
result
748+
.flat_map_iter(|record| {
749+
// Expand (names × streams) for each matching record
750+
// Fast path: most files have 1 name and 1 stream
751+
let name_count = if expand_names { record.name_count } else { 1 };
752+
let stream_count = if expand_streams {
753+
record.stream_count
754+
} else {
755+
1
756+
};
757+
758+
(0..name_count).flat_map(move |name_idx| {
759+
(0..stream_count).map(move |stream_idx| {
760+
let mut result =
761+
SearchResult::from_expanded(record, index, name_idx, stream_idx);
762+
if resolve_paths {
763+
if let Some(stream) = index.get_stream_at(record, stream_idx) {
764+
let path = index.build_path_with_stream(record, name_idx, stream);
765+
result = result.with_path(path);
766+
}
767+
}
768+
result
769+
})
770+
})
667771
})
668772
.collect();
669773

0 commit comments

Comments
 (0)