Skip to content

Commit 56d1c71

Browse files
committed
fix: average multi-channel devices in to Mono for Vector Voice
This solves a distortion issue on stereo microphone setups, yeehah!
1 parent 74c846d commit 56d1c71

File tree

1 file changed

+54
-52
lines changed

1 file changed

+54
-52
lines changed

src-tauri/src/voice.rs

+54-52
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,16 @@
11
use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
2-
use std::sync::{Arc, Mutex};
3-
use std::sync::atomic::{AtomicBool, Ordering};
42
use once_cell::sync::OnceCell;
5-
use std::sync::mpsc;
63
use rubato::{
7-
SincInterpolationParameters, SincInterpolationType, Resampler,
8-
SincFixedIn, WindowFunction
4+
Resampler, SincFixedIn, SincInterpolationParameters, SincInterpolationType, WindowFunction,
95
};
6+
use std::sync::atomic::{AtomicBool, Ordering};
7+
use std::sync::mpsc;
8+
use std::sync::{Arc, Mutex};
109

1110
static RECORDER: OnceCell<AudioRecorder> = OnceCell::new();
1211

1312
// Standard sample rate for voice recording with good quality-to-size ratio
14-
const TARGET_SAMPLE_RATE: u32 = 16000;
13+
const TARGET_SAMPLE_RATE: u32 = 22000;
1514

1615
pub struct AudioRecorder {
1716
recording: Arc<AtomicBool>,
@@ -43,34 +42,42 @@ impl AudioRecorder {
4342
*self.stop_tx.lock().unwrap() = Some(tx);
4443

4544
let host = cpal::default_host();
46-
let device = host.default_input_device()
47-
.ok_or("No input device found")?;
45+
let device = host.default_input_device().ok_or("No input device found")?;
46+
47+
let supported_config = device.default_input_config().map_err(|e| e.to_string())?;
48+
49+
*self.device_sample_rate.lock().unwrap() = supported_config.sample_rate().0;
50+
51+
let config: cpal::StreamConfig = supported_config.into();
52+
let channels = config.channels as usize;
4853

49-
let config = device.default_input_config()
50-
.map_err(|e| e.to_string())?;
51-
52-
*self.device_sample_rate.lock().unwrap() = config.sample_rate().0;
5354
let samples = Arc::clone(&self.samples);
5455
let recording = Arc::clone(&self.recording);
55-
56+
5657
self.recording.store(true, Ordering::SeqCst);
57-
58+
5859
std::thread::spawn(move || {
59-
let stream = device.build_input_stream(
60-
&config.into(),
61-
move |data: &[f32], _: &_| {
62-
if recording.load(Ordering::SeqCst) {
63-
if let Ok(mut guard) = samples.lock() {
64-
guard.extend(data.iter().map(|&x| (x * 32768.0) as i16));
60+
let stream = device
61+
.build_input_stream(
62+
&config,
63+
move |data: &[f32], _: &_| {
64+
if recording.load(Ordering::SeqCst) {
65+
if let Ok(mut guard) = samples.lock() {
66+
guard.extend(data.chunks(channels).map(|chunk| {
67+
let sum: f32 = chunk.iter().sum();
68+
let avg = sum / channels as f32;
69+
(avg.clamp(-1.0, 1.0) * 32767.0) as i16
70+
}));
71+
}
6572
}
66-
}
67-
},
68-
|err| eprintln!("Error: {}", err),
69-
None
70-
).unwrap();
73+
},
74+
|err| eprintln!("Error: {}", err),
75+
None,
76+
)
77+
.unwrap();
7178

7279
stream.play().unwrap();
73-
80+
7481
// Wait for stop signal
7582
rx.recv().unwrap_or(());
7683
});
@@ -86,32 +93,29 @@ impl AudioRecorder {
8693
self.recording.store(false, Ordering::SeqCst);
8794

8895
let wav_buffer = {
89-
let samples = self.samples.lock()
90-
.map_err(|_| "Failed to get samples")?;
96+
let samples = self.samples.lock().map_err(|_| "Failed to get samples")?;
9197

9298
if samples.is_empty() {
9399
return Err("No audio data recorded".to_string());
94100
}
95101

96102
let device_sample_rate = *self.device_sample_rate.lock().unwrap();
97-
103+
98104
// Resample audio to target sample rate to ensure consistent quality
99105
let resampled_samples = self.resample_audio(&samples, device_sample_rate)?;
100-
106+
101107
let spec = hound::WavSpec {
102108
channels: 1,
103109
sample_rate: TARGET_SAMPLE_RATE,
104110
bits_per_sample: 16,
105111
sample_format: hound::SampleFormat::Int,
106112
};
107-
113+
108114
let mut buffer: Vec<u8> = Vec::new();
109115
{
110-
let mut writer = hound::WavWriter::new(
111-
std::io::Cursor::new(&mut buffer),
112-
spec
113-
).map_err(|e| e.to_string())?;
114-
116+
let mut writer = hound::WavWriter::new(std::io::Cursor::new(&mut buffer), spec)
117+
.map_err(|e| e.to_string())?;
118+
115119
for &sample in resampled_samples.iter() {
116120
writer.write_sample(sample).map_err(|e| e.to_string())?;
117121
}
@@ -124,22 +128,20 @@ impl AudioRecorder {
124128

125129
Ok(wav_buffer)
126130
}
127-
131+
128132
/// Resample audio using Rubato's high-quality resampling
129133
fn resample_audio(&self, samples: &[i16], source_rate: u32) -> Result<Vec<i16>, String> {
130134
// If sample rates are already the same, return the original samples
131135
if source_rate == TARGET_SAMPLE_RATE {
132136
return Ok(samples.to_vec());
133137
}
134-
138+
135139
// Convert i16 samples to f32 for Rubato
136-
let samples_f32: Vec<f32> = samples.iter()
137-
.map(|&s| (s as f32) / 32768.0)
138-
.collect();
139-
140+
let samples_f32: Vec<f32> = samples.iter().map(|&s| (s as f32) / 32768.0).collect();
141+
140142
// Since Rubato works with separate channels, wrap our mono audio in a Vec of Vecs
141143
let input_frames = vec![samples_f32];
142-
144+
143145
// Create a Sinc resampler with good quality settings for voice
144146
let params = SincInterpolationParameters {
145147
sinc_len: 256,
@@ -148,27 +150,27 @@ impl AudioRecorder {
148150
oversampling_factor: 256,
149151
window: WindowFunction::BlackmanHarris2,
150152
};
151-
153+
152154
let mut resampler = SincFixedIn::<f32>::new(
153155
TARGET_SAMPLE_RATE as f64 / source_rate as f64,
154156
1.0,
155157
params,
156158
samples.len(),
157159
1, // mono audio (1 channel)
158-
).map_err(|e| format!("Failed to create resampler: {}", e))?;
159-
160+
)
161+
.map_err(|e| format!("Failed to create resampler: {}", e))?;
162+
160163
// Process the audio
161-
let output_frames = resampler.process(
162-
&input_frames,
163-
None
164-
).map_err(|e| format!("Failed to resample audio: {}", e))?;
165-
164+
let output_frames = resampler
165+
.process(&input_frames, None)
166+
.map_err(|e| format!("Failed to resample audio: {}", e))?;
167+
166168
// Convert back to i16 from f32 (first channel only since we're using mono)
167169
let resampled_samples = output_frames[0]
168170
.iter()
169171
.map(|&s| (s * 32767.0) as i16)
170172
.collect();
171-
173+
172174
Ok(resampled_samples)
173175
}
174176
}

0 commit comments

Comments
 (0)