Skip to content

Commit 391f739

Browse files
committed
chore: development v0.2.31 - comprehensive testing complete [auto-commit]
1 parent 32e6d17 commit 391f739

File tree

14 files changed

+408
-102
lines changed

14 files changed

+408
-102
lines changed

Cargo.lock

Lines changed: 7 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ members = [
3232
# Workspace Package Metadata (inherited by all crates)
3333
# ─────────────────────────────────────────────────────────────────────────────
3434
[workspace.package]
35-
version = "0.2.29"
35+
version = "0.2.31"
3636
edition = "2024"
3737
rust-version = "1.85"
3838
license = "MPL-2.0 OR LicenseRef-UFFS-Commercial"

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ Traditional file search tools (including `os.walk`, `FindFirstFile`, etc.) work
2121

2222
**UFFS reads the MFT directly** - once - and queries it in memory using Polars DataFrames. This is like reading the entire phonebook once instead of looking up each name individually.
2323

24-
### Benchmark Results (v0.2.29)
24+
### Benchmark Results (v0.2.31)
2525

2626
| Drive Type | Records | Time | Throughput |
2727
|------------|---------|------|------------|
@@ -33,7 +33,7 @@ Traditional file search tools (including `os.walk`, `FindFirstFile`, etc.) work
3333

3434
| Comparison | Records | Time | Notes |
3535
|------------|---------|------|-------|
36-
| **UFFS v0.2.29** | **18.7 Million** | **~142 seconds** | All disks, fast mode |
36+
| **UFFS v0.2.31** | **18.7 Million** | **~142 seconds** | All disks, fast mode |
3737
| UFFS v0.1.30 | 18.7 Million | ~315 seconds | Baseline |
3838
| Everything | 19 Million | 178 seconds | All disks |
3939
| WizFile | 6.5 Million | 299 seconds | Single HDD |

crates/uffs-cli/src/commands.rs

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -541,7 +541,7 @@ async fn load_and_filter_data(
541541

542542
// Build path resolver from FULL data BEFORE filtering
543543
let t_resolver = std::time::Instant::now();
544-
let mut path_resolver = if needs_paths {
544+
let path_resolver = if needs_paths {
545545
Some(
546546
uffs_core::FastPathResolver::build(&full_df, drive_letter)
547547
.context("Failed to build path resolver")?,
@@ -557,12 +557,11 @@ async fn load_and_filter_data(
557557
let filter_ms = t_filter.elapsed().as_millis();
558558
let filtered_count = filtered.height();
559559

560-
// Add paths using the pre-built resolver (auto-selects parallel for large
561-
// DataFrames)
560+
// Add paths using the pre-built resolver with directory suffix (C++ parity)
562561
let t_paths = std::time::Instant::now();
563-
if let Some(ref mut resolver) = path_resolver {
562+
if let Some(resolver) = &path_resolver {
564563
filtered = resolver
565-
.add_path_column_auto(&filtered)
564+
.add_path_column_with_dir_suffix(&filtered)
566565
.context("Failed to add path column")?;
567566
// Add path_only column (directory portion of path)
568567
filtered = uffs_core::add_path_only_column(&filtered)
@@ -979,10 +978,9 @@ async fn search_multi_drive_filtered(
979978

980979
let matches = filtered.height();
981980

982-
// Add paths using the pre-built resolver (auto-selects parallel for large
983-
// DataFrames)
984-
let with_paths = if let Some(ref mut resolver) = path_resolver {
985-
match resolver.add_path_column_auto(&filtered) {
981+
// Add paths using the pre-built resolver with directory suffix (C++ parity)
982+
let with_paths = if let Some(resolver) = &path_resolver {
983+
match resolver.add_path_column_with_dir_suffix(&filtered) {
986984
Ok(df) => {
987985
// Add path_only column (directory portion of path)
988986
match uffs_core::add_path_only_column(&df) {
@@ -1272,9 +1270,9 @@ async fn search_multi_drive_streaming<W: Write + Send + 'static>(
12721270

12731271
let matches = filtered.height();
12741272

1275-
// Add paths using the pre-built resolver
1276-
let with_paths = if let Some(ref mut resolver) = path_resolver {
1277-
match resolver.add_path_column_auto(&filtered) {
1273+
// Add paths using the pre-built resolver with directory suffix (C++ parity)
1274+
let with_paths = if let Some(resolver) = &path_resolver {
1275+
match resolver.add_path_column_with_dir_suffix(&filtered) {
12781276
Ok(df) => {
12791277
// Add path_only column (directory portion of path)
12801278
match uffs_core::add_path_only_column(&df) {

crates/uffs-core/src/path_resolver.rs

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,57 @@ impl FastPathResolver {
385385
}
386386
}
387387

388+
/// Add a "path" column with trailing slashes for directories (C++ parity).
389+
///
390+
/// This method adds a trailing backslash to directory paths to match
391+
/// the C++ `UltraFastFileSearch` output format.
392+
///
393+
/// # Errors
394+
///
395+
/// Returns an error if the frs or `is_directory` columns are missing.
396+
pub fn add_path_column_with_dir_suffix(&self, df: &DataFrame) -> Result<DataFrame> {
397+
let frs_col = df.column("frs")?.u64()?;
398+
let is_dir_col = df.column("is_directory")?.bool()?;
399+
let stream_name_col = df.column("stream_name").ok().and_then(|col| col.str().ok());
400+
401+
// Collect values for parallel iteration
402+
let frs_values: Vec<Option<u64>> = frs_col.into_iter().collect();
403+
let is_dir_values: Vec<Option<bool>> = is_dir_col.into_iter().collect();
404+
let stream_names: Vec<Option<&str>> = stream_name_col.map_or_else(
405+
|| vec![None; frs_values.len()],
406+
|col| col.into_iter().collect(),
407+
);
408+
409+
// Resolve paths in parallel with directory suffix
410+
let paths: Vec<String> = frs_values
411+
.par_iter()
412+
.zip(is_dir_values.par_iter())
413+
.zip(stream_names.par_iter())
414+
.map(|((frs, is_dir), stream_name)| {
415+
let mut path =
416+
frs.map_or_else(|| "<null>".to_owned(), |frs_val| self.resolve(frs_val));
417+
// Add trailing backslash for directories
418+
if is_dir.unwrap_or(false) && !path.ends_with('\\') {
419+
path.push('\\');
420+
}
421+
// Append stream name for ADS (e.g., "file.txt:Zone.Identifier")
422+
if let Some(stream) = stream_name {
423+
if !stream.is_empty() {
424+
path.push(':');
425+
path.push_str(stream);
426+
}
427+
}
428+
path
429+
})
430+
.collect();
431+
432+
let path_series = Column::new("path".into(), paths);
433+
let mut result = df.clone();
434+
result.with_column(path_series)?;
435+
436+
Ok(result)
437+
}
438+
388439
/// Get statistics about the resolver.
389440
#[must_use]
390441
pub fn stats(&self) -> FastPathResolverStats {

crates/uffs-mft/src/io.rs

Lines changed: 77 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -812,6 +812,8 @@ pub struct ParsedColumns {
812812
pub name_count: Vec<u16>,
813813
/// Number of data streams per record.
814814
pub stream_count: Vec<u16>,
815+
/// Stream name (empty for default stream, non-empty for ADS).
816+
pub stream_name: Vec<String>,
815817

816818
// Attribute flags (all boolean columns for C++ parity)
817819
/// Read-only flag.
@@ -876,6 +878,7 @@ impl ParsedColumns {
876878
is_directory: Vec::with_capacity(capacity),
877879
name_count: Vec::with_capacity(capacity),
878880
stream_count: Vec::with_capacity(capacity),
881+
stream_name: Vec::with_capacity(capacity),
879882
is_readonly: Vec::with_capacity(capacity),
880883
is_hidden: Vec::with_capacity(capacity),
881884
is_system: Vec::with_capacity(capacity),
@@ -925,6 +928,7 @@ impl ParsedColumns {
925928
self.is_directory.push(record.is_directory);
926929
self.name_count.push(record.name_count());
927930
self.stream_count.push(record.stream_count());
931+
self.stream_name.push(String::new()); // Default stream (no ADS)
928932
self.is_readonly.push(record.std_info.is_readonly);
929933
self.is_hidden.push(record.std_info.is_hidden);
930934
self.is_system.push(record.std_info.is_system);
@@ -946,56 +950,82 @@ impl ParsedColumns {
946950
self.flags.push(record.std_info.to_raw_flags());
947951
}
948952

949-
/// Pushes a record with hard link expansion.
953+
/// Pushes a record with full expansion (names × streams).
950954
///
951-
/// This matches C++ behavior: one row per hard link (name).
952-
/// If a file has 3 hard links, this creates 3 rows with different
953-
/// name/parent_frs but the same FRS and other attributes.
955+
/// This matches C++ behavior: one row per (hard link × stream) combination.
956+
/// If a file has 2 hard links and 3 streams, this creates 6 rows.
954957
///
955958
/// This is the default behavior for user-facing output, as users
956-
/// expect to see each hard link as a separate entry (matching Explorer).
959+
/// expect to see each hard link and ADS as separate entries.
957960
#[inline]
958961
pub fn push_record_expanded(&mut self, record: &ParsedRecord) {
959-
if record.names.is_empty() {
960-
// No names - use the primary name (fallback)
961-
self.push_record(record);
962-
return;
963-
}
964-
965-
// Create one row per hard link
966-
for name_info in &record.names {
967-
self.frs.push(record.frs);
968-
self.parent_frs.push(name_info.parent_frs);
969-
self.name.push(name_info.name.clone());
970-
self.size.push(record.size);
971-
self.allocated_size.push(record.allocated_size);
972-
self.created.push(record.std_info.created);
973-
self.modified.push(record.std_info.modified);
974-
self.accessed.push(record.std_info.accessed);
975-
self.mft_changed.push(record.std_info.mft_changed);
976-
self.is_directory.push(record.is_directory);
977-
// For expanded records, name_count is always 1 (this row represents one link)
978-
self.name_count.push(1);
979-
self.stream_count.push(record.stream_count());
980-
self.is_readonly.push(record.std_info.is_readonly);
981-
self.is_hidden.push(record.std_info.is_hidden);
982-
self.is_system.push(record.std_info.is_system);
983-
self.is_archive.push(record.std_info.is_archive);
984-
self.is_compressed.push(record.std_info.is_compressed);
985-
self.is_encrypted.push(record.std_info.is_encrypted);
986-
self.is_sparse.push(record.std_info.is_sparse);
987-
self.is_reparse.push(record.std_info.is_reparse);
988-
self.is_offline.push(record.std_info.is_offline);
989-
self.is_not_indexed
990-
.push(record.std_info.is_not_content_indexed);
991-
self.is_temporary.push(record.std_info.is_temporary);
992-
self.is_integrity_stream
993-
.push(record.std_info.is_integrity_stream);
994-
self.is_no_scrub_data.push(record.std_info.is_no_scrub_data);
995-
self.is_pinned.push(record.std_info.is_pinned);
996-
self.is_unpinned.push(record.std_info.is_unpinned);
997-
self.is_virtual.push(record.std_info.is_virtual);
998-
self.flags.push(record.std_info.to_raw_flags());
962+
// Get names to iterate over (use primary name if names is empty)
963+
let names: Vec<_> = if record.names.is_empty() {
964+
vec![NameInfo {
965+
name: record.name.clone(),
966+
parent_frs: record.parent_frs,
967+
namespace: 3, // Win32+DOS
968+
}]
969+
} else {
970+
record.names.clone()
971+
};
972+
973+
// Get streams to iterate over (use empty stream if streams is empty)
974+
let streams: Vec<_> = if record.streams.is_empty() {
975+
vec![StreamInfo {
976+
name: String::new(),
977+
size: record.size,
978+
allocated_size: record.allocated_size,
979+
is_sparse: false,
980+
is_compressed: false,
981+
}]
982+
} else {
983+
record.streams.clone()
984+
};
985+
986+
// Create one row per (name × stream) combination
987+
for name_info in &names {
988+
for stream_info in &streams {
989+
self.frs.push(record.frs);
990+
self.parent_frs.push(name_info.parent_frs);
991+
self.name.push(name_info.name.clone());
992+
// Use stream-specific size for ADS, file size for default stream
993+
let (size, alloc) = if stream_info.name.is_empty() {
994+
(record.size, record.allocated_size)
995+
} else {
996+
(stream_info.size, stream_info.allocated_size)
997+
};
998+
self.size.push(size);
999+
self.allocated_size.push(alloc);
1000+
self.created.push(record.std_info.created);
1001+
self.modified.push(record.std_info.modified);
1002+
self.accessed.push(record.std_info.accessed);
1003+
self.mft_changed.push(record.std_info.mft_changed);
1004+
self.is_directory.push(record.is_directory);
1005+
// For expanded records, counts are 1 (this row = one link + one stream)
1006+
self.name_count.push(1);
1007+
self.stream_count.push(1);
1008+
self.stream_name.push(stream_info.name.clone());
1009+
self.is_readonly.push(record.std_info.is_readonly);
1010+
self.is_hidden.push(record.std_info.is_hidden);
1011+
self.is_system.push(record.std_info.is_system);
1012+
self.is_archive.push(record.std_info.is_archive);
1013+
self.is_compressed.push(record.std_info.is_compressed);
1014+
self.is_encrypted.push(record.std_info.is_encrypted);
1015+
self.is_sparse.push(record.std_info.is_sparse);
1016+
self.is_reparse.push(record.std_info.is_reparse);
1017+
self.is_offline.push(record.std_info.is_offline);
1018+
self.is_not_indexed
1019+
.push(record.std_info.is_not_content_indexed);
1020+
self.is_temporary.push(record.std_info.is_temporary);
1021+
self.is_integrity_stream
1022+
.push(record.std_info.is_integrity_stream);
1023+
self.is_no_scrub_data.push(record.std_info.is_no_scrub_data);
1024+
self.is_pinned.push(record.std_info.is_pinned);
1025+
self.is_unpinned.push(record.std_info.is_unpinned);
1026+
self.is_virtual.push(record.std_info.is_virtual);
1027+
self.flags.push(record.std_info.to_raw_flags());
1028+
}
9991029
}
10001030
}
10011031

@@ -1015,6 +1045,7 @@ impl ParsedColumns {
10151045
self.is_directory.extend(other.is_directory);
10161046
self.name_count.extend(other.name_count);
10171047
self.stream_count.extend(other.stream_count);
1048+
self.stream_name.extend(other.stream_name);
10181049
self.is_readonly.extend(other.is_readonly);
10191050
self.is_hidden.extend(other.is_hidden);
10201051
self.is_system.extend(other.is_system);
@@ -1048,6 +1079,7 @@ impl ParsedColumns {
10481079
self.is_directory.reserve(additional);
10491080
self.name_count.reserve(additional);
10501081
self.stream_count.reserve(additional);
1082+
self.stream_name.reserve(additional);
10511083
self.is_readonly.reserve(additional);
10521084
self.is_hidden.reserve(additional);
10531085
self.is_system.reserve(additional);

0 commit comments

Comments
 (0)