Skip to content

Commit 2f9b291

Browse files
committed
add: Audio Resampling for Vector Voice
1 parent 3e8e36d commit 2f9b291

File tree

3 files changed

+145
-7
lines changed

3 files changed

+145
-7
lines changed

src-tauri/Cargo.lock

+80
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src-tauri/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ tauri-plugin-clipboard-manager = "2.2.2"
4343
image = { version = "0.25.6", default-features = false, features = ["png"] }
4444
cpal = "0.15.3"
4545
hound = "3.5.1"
46+
rubato = "0.16.2"
4647

4748
# Workaround for a Tauri-4.2.1 dependency issue; screw you, deranged maintainers!
4849
deranged = "=0.4.0"

src-tauri/src/voice.rs

+64-7
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,20 @@ use std::sync::{Arc, Mutex};
33
use std::sync::atomic::{AtomicBool, Ordering};
44
use once_cell::sync::OnceCell;
55
use std::sync::mpsc;
6+
use rubato::{
7+
SincInterpolationParameters, SincInterpolationType, Resampler,
8+
SincFixedIn, WindowFunction
9+
};
610

711
static RECORDER: OnceCell<AudioRecorder> = OnceCell::new();
812

13+
// Standard sample rate for voice recording with good quality-to-size ratio
14+
const TARGET_SAMPLE_RATE: u32 = 16000;
15+
916
pub struct AudioRecorder {
1017
recording: Arc<AtomicBool>,
1118
samples: Arc<Mutex<Vec<i16>>>,
12-
sample_rate: Arc<Mutex<u32>>,
19+
device_sample_rate: Arc<Mutex<u32>>,
1320
stop_tx: Arc<Mutex<Option<mpsc::Sender<()>>>>,
1421
}
1522

@@ -22,7 +29,7 @@ impl AudioRecorder {
2229
AudioRecorder {
2330
recording: Arc::new(AtomicBool::new(false)),
2431
samples: Arc::new(Mutex::new(Vec::new())),
25-
sample_rate: Arc::new(Mutex::new(12000)),
32+
device_sample_rate: Arc::new(Mutex::new(48000)),
2633
stop_tx: Arc::new(Mutex::new(None)),
2734
}
2835
}
@@ -42,7 +49,7 @@ impl AudioRecorder {
4249
let config = device.default_input_config()
4350
.map_err(|e| e.to_string())?;
4451

45-
*self.sample_rate.lock().unwrap() = config.sample_rate().0;
52+
*self.device_sample_rate.lock().unwrap() = config.sample_rate().0;
4653
let samples = Arc::clone(&self.samples);
4754
let recording = Arc::clone(&self.recording);
4855

@@ -86,11 +93,14 @@ impl AudioRecorder {
8693
return Err("No audio data recorded".to_string());
8794
}
8895

89-
let sample_rate = *self.sample_rate.lock().unwrap();
96+
let device_sample_rate = *self.device_sample_rate.lock().unwrap();
97+
98+
// Resample audio to target sample rate to ensure consistent quality
99+
let resampled_samples = self.resample_audio(&samples, device_sample_rate)?;
90100

91101
let spec = hound::WavSpec {
92102
channels: 1,
93-
sample_rate,
103+
sample_rate: TARGET_SAMPLE_RATE,
94104
bits_per_sample: 16,
95105
sample_format: hound::SampleFormat::Int,
96106
};
@@ -102,7 +112,7 @@ impl AudioRecorder {
102112
spec
103113
).map_err(|e| e.to_string())?;
104114

105-
for &sample in samples.iter() {
115+
for &sample in resampled_samples.iter() {
106116
writer.write_sample(sample).map_err(|e| e.to_string())?;
107117
}
108118
writer.finalize().map_err(|e| e.to_string())?;
@@ -114,4 +124,51 @@ impl AudioRecorder {
114124

115125
Ok(wav_buffer)
116126
}
117-
}
127+
128+
/// Resample audio using Rubato's high-quality resampling
129+
fn resample_audio(&self, samples: &[i16], source_rate: u32) -> Result<Vec<i16>, String> {
130+
// If sample rates are already the same, return the original samples
131+
if source_rate == TARGET_SAMPLE_RATE {
132+
return Ok(samples.to_vec());
133+
}
134+
135+
// Convert i16 samples to f32 for Rubato
136+
let samples_f32: Vec<f32> = samples.iter()
137+
.map(|&s| (s as f32) / 32768.0)
138+
.collect();
139+
140+
// Since Rubato works with separate channels, wrap our mono audio in a Vec of Vecs
141+
let input_frames = vec![samples_f32];
142+
143+
// Create a Sinc resampler with good quality settings for voice
144+
let params = SincInterpolationParameters {
145+
sinc_len: 256,
146+
f_cutoff: 0.95,
147+
interpolation: SincInterpolationType::Linear,
148+
oversampling_factor: 256,
149+
window: WindowFunction::BlackmanHarris2,
150+
};
151+
152+
let mut resampler = SincFixedIn::<f32>::new(
153+
TARGET_SAMPLE_RATE as f64 / source_rate as f64,
154+
1.0,
155+
params,
156+
samples.len(),
157+
1, // mono audio (1 channel)
158+
).map_err(|e| format!("Failed to create resampler: {}", e))?;
159+
160+
// Process the audio
161+
let output_frames = resampler.process(
162+
&input_frames,
163+
None
164+
).map_err(|e| format!("Failed to resample audio: {}", e))?;
165+
166+
// Convert back to i16 from f32 (first channel only since we're using mono)
167+
let resampled_samples = output_frames[0]
168+
.iter()
169+
.map(|&s| (s * 32767.0) as i16)
170+
.collect();
171+
172+
Ok(resampled_samples)
173+
}
174+
}

0 commit comments

Comments
 (0)