fix: average multi-channel devices in to Mono for Vector Voice

JSKitty · JSKitty · commit 56d1c71d9ffa · 2025-04-24T17:19:49.000+01:00
This solves a distortion issue on stereo microphone setups, yeehah!
diff --git a/src-tauri/src/voice.rs b/src-tauri/src/voice.rs
@@ -1,17 +1,16 @@
 use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
-use std::sync::{Arc, Mutex};
-use std::sync::atomic::{AtomicBool, Ordering};
 use once_cell::sync::OnceCell;
-use std::sync::mpsc;
 use rubato::{
-    SincInterpolationParameters, SincInterpolationType, Resampler, 
-    SincFixedIn, WindowFunction
+    Resampler, SincFixedIn, SincInterpolationParameters, SincInterpolationType, WindowFunction,
 };
+use std::sync::atomic::{AtomicBool, Ordering};
+use std::sync::mpsc;
+use std::sync::{Arc, Mutex};
 
 static RECORDER: OnceCell<AudioRecorder> = OnceCell::new();
 
 // Standard sample rate for voice recording with good quality-to-size ratio
-const TARGET_SAMPLE_RATE: u32 = 16000;
+const TARGET_SAMPLE_RATE: u32 = 22000;
 
 pub struct AudioRecorder {
     recording: Arc<AtomicBool>,
@@ -43,34 +42,42 @@ impl AudioRecorder {
         *self.stop_tx.lock().unwrap() = Some(tx);
 
         let host = cpal::default_host();
-        let device = host.default_input_device()
-            .ok_or("No input device found")?;
+        let device = host.default_input_device().ok_or("No input device found")?;
+
+        let supported_config = device.default_input_config().map_err(|e| e.to_string())?;
+
+        *self.device_sample_rate.lock().unwrap() = supported_config.sample_rate().0;
+
+        let config: cpal::StreamConfig = supported_config.into();
+        let channels = config.channels as usize;
 
-        let config = device.default_input_config()
-            .map_err(|e| e.to_string())?;
-            
-        *self.device_sample_rate.lock().unwrap() = config.sample_rate().0;
         let samples = Arc::clone(&self.samples);
         let recording = Arc::clone(&self.recording);
-        
+
         self.recording.store(true, Ordering::SeqCst);
-        
+
         std::thread::spawn(move || {
-            let stream = device.build_input_stream(
-                &config.into(),
-                move |data: &[f32], _: &_| {
-                    if recording.load(Ordering::SeqCst) {
-                        if let Ok(mut guard) = samples.lock() {
-                            guard.extend(data.iter().map(|&x| (x * 32768.0) as i16));
+            let stream = device
+                .build_input_stream(
+                    &config,
+                    move |data: &[f32], _: &_| {
+                        if recording.load(Ordering::SeqCst) {
+                            if let Ok(mut guard) = samples.lock() {
+                                guard.extend(data.chunks(channels).map(|chunk| {
+                                    let sum: f32 = chunk.iter().sum();
+                                    let avg = sum / channels as f32;
+                                    (avg.clamp(-1.0, 1.0) * 32767.0) as i16
+                                }));
+                            }
                         }
-                    }
-                },
-                |err| eprintln!("Error: {}", err),
-                None
-            ).unwrap();
+                    },
+                    |err| eprintln!("Error: {}", err),
+                    None,
+                )
+                .unwrap();
 
             stream.play().unwrap();
-            
+
             // Wait for stop signal
             rx.recv().unwrap_or(());
         });
@@ -86,32 +93,29 @@ impl AudioRecorder {
         self.recording.store(false, Ordering::SeqCst);
 
         let wav_buffer = {
-            let samples = self.samples.lock()
-                .map_err(|_| "Failed to get samples")?;
+            let samples = self.samples.lock().map_err(|_| "Failed to get samples")?;
 
             if samples.is_empty() {
                 return Err("No audio data recorded".to_string());
             }
 
             let device_sample_rate = *self.device_sample_rate.lock().unwrap();
-            
+
             // Resample audio to target sample rate to ensure consistent quality
             let resampled_samples = self.resample_audio(&samples, device_sample_rate)?;
-    
+
             let spec = hound::WavSpec {
                 channels: 1,
                 sample_rate: TARGET_SAMPLE_RATE,
                 bits_per_sample: 16,
                 sample_format: hound::SampleFormat::Int,
             };
-    
+
             let mut buffer: Vec<u8> = Vec::new();
             {
-                let mut writer = hound::WavWriter::new(
-                    std::io::Cursor::new(&mut buffer),
-                    spec
-                ).map_err(|e| e.to_string())?;
-    
+                let mut writer = hound::WavWriter::new(std::io::Cursor::new(&mut buffer), spec)
+                    .map_err(|e| e.to_string())?;
+
                 for &sample in resampled_samples.iter() {
                     writer.write_sample(sample).map_err(|e| e.to_string())?;
                 }
@@ -124,22 +128,20 @@ impl AudioRecorder {
 
         Ok(wav_buffer)
     }
-    
+
     /// Resample audio using Rubato's high-quality resampling
     fn resample_audio(&self, samples: &[i16], source_rate: u32) -> Result<Vec<i16>, String> {
         // If sample rates are already the same, return the original samples
         if source_rate == TARGET_SAMPLE_RATE {
             return Ok(samples.to_vec());
         }
-        
+
         // Convert i16 samples to f32 for Rubato
-        let samples_f32: Vec<f32> = samples.iter()
-            .map(|&s| (s as f32) / 32768.0)
-            .collect();
-            
+        let samples_f32: Vec<f32> = samples.iter().map(|&s| (s as f32) / 32768.0).collect();
+
         // Since Rubato works with separate channels, wrap our mono audio in a Vec of Vecs
         let input_frames = vec![samples_f32];
-        
+
         // Create a Sinc resampler with good quality settings for voice
         let params = SincInterpolationParameters {
             sinc_len: 256,
@@ -148,27 +150,27 @@ impl AudioRecorder {
             oversampling_factor: 256,
             window: WindowFunction::BlackmanHarris2,
         };
-        
+
         let mut resampler = SincFixedIn::<f32>::new(
             TARGET_SAMPLE_RATE as f64 / source_rate as f64,
             1.0,
             params,
             samples.len(),
             1, // mono audio (1 channel)
-        ).map_err(|e| format!("Failed to create resampler: {}", e))?;
-        
+        )
+        .map_err(|e| format!("Failed to create resampler: {}", e))?;
+
         // Process the audio
-        let output_frames = resampler.process(
-            &input_frames,
-            None
-        ).map_err(|e| format!("Failed to resample audio: {}", e))?;
-        
+        let output_frames = resampler
+            .process(&input_frames, None)
+            .map_err(|e| format!("Failed to resample audio: {}", e))?;
+
         // Convert back to i16 from f32 (first channel only since we're using mono)
         let resampled_samples = output_frames[0]
             .iter()
             .map(|&s| (s * 32767.0) as i16)
             .collect();
-            
+
         Ok(resampled_samples)
     }
 }