githubrobbi
diff --git a/‎Cargo.lock‎
Lines changed: 8 additions & 8 deletions b/‎Cargo.lock‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎Cargo.toml‎
Lines changed: 1 addition & 1 deletion b/‎Cargo.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md‎
Lines changed: 2 additions & 2 deletions b/‎README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎crates/uffs-mft/src/io.rs‎
Lines changed: 20 additions & 9 deletions b/‎crates/uffs-mft/src/io.rs‎
Lines changed: 20 additions & 9 deletions
diff --git a/‎crates/uffs-mft/src/main.rs‎
Lines changed: 34 additions & 2 deletions b/‎crates/uffs-mft/src/main.rs‎
Lines changed: 34 additions & 2 deletions
@@ -38,7 +38,7 @@ exclude = [
 # Workspace Package Metadata (inherited by all crates)
 # ─────────────────────────────────────────────────────────────────────────────
 [workspace.package]
-version = "0.2.65"
+version = "0.2.66"
 edition = "2024"
 rust-version = "1.85"
 license = "MPL-2.0 OR LicenseRef-UFFS-Commercial"
 
@@ -21,7 +21,7 @@ Traditional file search tools (including `os.walk`, `FindFirstFile`, etc.) work
 
 **UFFS reads the MFT directly** - once - and queries it in memory using Polars DataFrames. This is like reading the entire phonebook once instead of looking up each name individually.
 
-### Benchmark Results (v0.2.65)
+### Benchmark Results (v0.2.66)
 
 | Drive Type | Records | Time | Throughput |
 |------------|---------|------|------------|
@@ -33,7 +33,7 @@ Traditional file search tools (including `os.walk`, `FindFirstFile`, etc.) work
 
 | Comparison | Records | Time | Notes |
 |------------|---------|------|-------|
-| **UFFS v0.2.65** | **18.7 Million** | **~142 seconds** | All disks, fast mode |
+| **UFFS v0.2.66** | **18.7 Million** | **~142 seconds** | All disks, fast mode |
 | UFFS v0.1.30 | 18.7 Million | ~315 seconds | Baseline |
 | Everything | 19 Million | 178 seconds | All disks |
 | WizFile | 6.5 Million | 299 seconds | Single HDD |
 
@@ -4007,11 +4007,21 @@ impl ParallelMftReader {
     ///
     /// This eliminates the separate parse and index build phases, saving ~7s
     /// on large MFTs by overlapping CPU work with I/O.
+    ///
+    /// # Arguments
+    ///
+    /// * `overlapped_handle` - IOCP handle for async I/O
+    /// * `volume` - Volume letter (e.g., 'C')
+    /// * `concurrency` - Number of I/O ops in flight (None = 2 for HDD)
+    /// * `io_chunk_size` - Size of each I/O in bytes (None = 1MB)
+    /// * `_progress_callback` - Optional progress callback
     #[allow(unsafe_code)]
     pub fn read_all_sliding_window_iocp_to_index<F>(
         &self,
         overlapped_handle: HANDLE,
         volume: char,
+        concurrency: Option<usize>,
+        io_chunk_size: Option<usize>,
         _progress_callback: Option<F>,
     ) -> Result<crate::index::MftIndex>
     where
@@ -4031,13 +4041,14 @@ impl ParallelMftReader {
         let record_size = self.extent_map.bytes_per_record as usize;
         let total_records = self.extent_map.total_records() as usize;
 
-        const CONCURRENCY: usize = 2;
-        const IO_CHUNK_SIZE: usize = 1024 * 1024; // 1MB per read
+        // Use provided values or defaults
+        let concurrency = concurrency.unwrap_or(2);
+        let io_chunk_size = io_chunk_size.unwrap_or(1024 * 1024); // 1MB default
 
         info!(
             total_records,
-            concurrency = CONCURRENCY,
-            io_size_kb = IO_CHUNK_SIZE / 1024,
+            concurrency,
+            io_size_kb = io_chunk_size / 1024,
             "🚀 Starting sliding window IOCP with INLINE parsing (C++ parity)"
         );
 
@@ -4067,7 +4078,7 @@ impl ParallelMftReader {
             let mut frs_offset = 0u64;
 
             while offset_within_chunk < chunk_bytes {
-                let io_size = std::cmp::min(IO_CHUNK_SIZE, chunk_bytes - offset_within_chunk);
+                let io_size = std::cmp::min(io_chunk_size, chunk_bytes - offset_within_chunk);
                 let records_in_io = io_size / record_size;
                 let disk_offset =
                     chunk.disk_offset + skip_begin_bytes as u64 + offset_within_chunk as u64;
@@ -4110,19 +4121,19 @@ impl ParallelMftReader {
             op: IoOp,
         }
 
-        let mut buffer_pool: Vec<AlignedBuffer> = (0..CONCURRENCY)
-            .map(|_| AlignedBuffer::new(IO_CHUNK_SIZE))
+        let mut buffer_pool: Vec<AlignedBuffer> = (0..concurrency)
+            .map(|_| AlignedBuffer::new(io_chunk_size))
             .collect();
 
         let mut in_flight: Vec<Option<Pin<Box<InFlightOp>>>> =
-            (0..CONCURRENCY).map(|_| None).collect();
+            (0..concurrency).map(|_| None).collect();
 
         let mut completed_count = 0usize;
         let mut bytes_read_total = 0u64;
         let mut records_parsed = 0usize;
 
         // Queue initial reads
-        for slot_id in 0..CONCURRENCY {
+        for slot_id in 0..concurrency {
             if let Some(op) = io_ops.pop_front() {
                 let buffer = buffer_pool.pop().unwrap();
                 let mut in_flight_op = Box::pin(InFlightOp {
 
@@ -442,6 +442,18 @@ enum Commands {
         /// paths lazily. Disabling saves ~15% of CPU time.
         #[arg(long)]
         no_placeholders: bool,
+
+        /// Number of concurrent I/O operations (reads in flight).
+        /// Default: 2 for HDD (optimal for sequential), higher for SSD/NVMe.
+        /// Use this to experiment with I/O parallelism.
+        #[arg(long, default_value = "2")]
+        concurrency: usize,
+
+        /// I/O chunk size in KB (e.g., 1024 = 1MB, 2048 = 2MB, 4096 = 4MB).
+        /// Default: 1024 (1MB). Larger chunks reduce syscall overhead but
+        /// increase latency per completion.
+        #[arg(long, default_value = "1024")]
+        io_size_kb: usize,
     },
 }
 
@@ -608,7 +620,19 @@ async fn run() -> Result<()> {
                 mode,
                 no_bitmap,
                 no_placeholders,
-            } => cmd_benchmark_index_lean(drive, &mode, no_bitmap, no_placeholders).await,
+                concurrency,
+                io_size_kb,
+            } => {
+                cmd_benchmark_index_lean(
+                    drive,
+                    &mode,
+                    no_bitmap,
+                    no_placeholders,
+                    concurrency,
+                    io_size_kb,
+                )
+                .await
+            }
         }
     }
 }
@@ -2847,6 +2871,8 @@ async fn cmd_benchmark_index_lean(
     mode_str: &str,
     no_bitmap: bool,
     no_placeholders: bool,
+    concurrency: usize,
+    io_size_kb: usize,
 ) -> Result<()> {
     use std::time::Instant;
 
@@ -2870,6 +2896,8 @@ async fn cmd_benchmark_index_lean(
             "enabled"
         }
     );
+    println!("Concurrency: {} I/O ops in flight", concurrency);
+    println!("I/O Size: {} KB ({} MB)", io_size_kb, io_size_kb / 1024);
     println!("This measures the UFFS indexing pipeline with lean MftIndex (no DataFrame overhead)");
     println!();
 
@@ -2904,12 +2932,16 @@ async fn cmd_benchmark_index_lean(
     // Open reader and read MFT into lean index
     // - no_bitmap: disable bitmap optimization to read entire MFT sequentially
     // - no_placeholders: skip placeholder creation for ~15% speedup
+    // - concurrency: number of I/O ops in flight
+    // - io_size_kb: I/O chunk size in KB
     let reader = MftReader::open(drive_upper)
         .await
         .with_context(|| format!("Failed to open drive {}:", drive_upper))?
         .with_mode(mode)
         .with_use_bitmap(!no_bitmap)
-        .with_add_placeholders(!no_placeholders);
+        .with_add_placeholders(!no_placeholders)
+        .with_concurrency(concurrency)
+        .with_io_size(io_size_kb * 1024);
 
     let index = reader
         .read_all_index()