@@ -3,13 +3,20 @@ use std::sync::{Arc, Mutex};
3
3
use std:: sync:: atomic:: { AtomicBool , Ordering } ;
4
4
use once_cell:: sync:: OnceCell ;
5
5
use std:: sync:: mpsc;
6
+ use rubato:: {
7
+ SincInterpolationParameters , SincInterpolationType , Resampler ,
8
+ SincFixedIn , WindowFunction
9
+ } ;
6
10
7
11
static RECORDER : OnceCell < AudioRecorder > = OnceCell :: new ( ) ;
8
12
13
+ // Standard sample rate for voice recording with good quality-to-size ratio
14
+ const TARGET_SAMPLE_RATE : u32 = 16000 ;
15
+
9
16
pub struct AudioRecorder {
10
17
recording : Arc < AtomicBool > ,
11
18
samples : Arc < Mutex < Vec < i16 > > > ,
12
- sample_rate : Arc < Mutex < u32 > > ,
19
+ device_sample_rate : Arc < Mutex < u32 > > ,
13
20
stop_tx : Arc < Mutex < Option < mpsc:: Sender < ( ) > > > > ,
14
21
}
15
22
@@ -22,7 +29,7 @@ impl AudioRecorder {
22
29
AudioRecorder {
23
30
recording : Arc :: new ( AtomicBool :: new ( false ) ) ,
24
31
samples : Arc :: new ( Mutex :: new ( Vec :: new ( ) ) ) ,
25
- sample_rate : Arc :: new ( Mutex :: new ( 12000 ) ) ,
32
+ device_sample_rate : Arc :: new ( Mutex :: new ( 48000 ) ) ,
26
33
stop_tx : Arc :: new ( Mutex :: new ( None ) ) ,
27
34
}
28
35
}
@@ -42,7 +49,7 @@ impl AudioRecorder {
42
49
let config = device. default_input_config ( )
43
50
. map_err ( |e| e. to_string ( ) ) ?;
44
51
45
- * self . sample_rate . lock ( ) . unwrap ( ) = config. sample_rate ( ) . 0 ;
52
+ * self . device_sample_rate . lock ( ) . unwrap ( ) = config. sample_rate ( ) . 0 ;
46
53
let samples = Arc :: clone ( & self . samples ) ;
47
54
let recording = Arc :: clone ( & self . recording ) ;
48
55
@@ -86,11 +93,14 @@ impl AudioRecorder {
86
93
return Err ( "No audio data recorded" . to_string ( ) ) ;
87
94
}
88
95
89
- let sample_rate = * self . sample_rate . lock ( ) . unwrap ( ) ;
96
+ let device_sample_rate = * self . device_sample_rate . lock ( ) . unwrap ( ) ;
97
+
98
+ // Resample audio to target sample rate to ensure consistent quality
99
+ let resampled_samples = self . resample_audio ( & samples, device_sample_rate) ?;
90
100
91
101
let spec = hound:: WavSpec {
92
102
channels : 1 ,
93
- sample_rate,
103
+ sample_rate : TARGET_SAMPLE_RATE ,
94
104
bits_per_sample : 16 ,
95
105
sample_format : hound:: SampleFormat :: Int ,
96
106
} ;
@@ -102,7 +112,7 @@ impl AudioRecorder {
102
112
spec
103
113
) . map_err ( |e| e. to_string ( ) ) ?;
104
114
105
- for & sample in samples . iter ( ) {
115
+ for & sample in resampled_samples . iter ( ) {
106
116
writer. write_sample ( sample) . map_err ( |e| e. to_string ( ) ) ?;
107
117
}
108
118
writer. finalize ( ) . map_err ( |e| e. to_string ( ) ) ?;
@@ -114,4 +124,51 @@ impl AudioRecorder {
114
124
115
125
Ok ( wav_buffer)
116
126
}
117
- }
127
+
128
+ /// Resample audio using Rubato's high-quality resampling
129
+ fn resample_audio ( & self , samples : & [ i16 ] , source_rate : u32 ) -> Result < Vec < i16 > , String > {
130
+ // If sample rates are already the same, return the original samples
131
+ if source_rate == TARGET_SAMPLE_RATE {
132
+ return Ok ( samples. to_vec ( ) ) ;
133
+ }
134
+
135
+ // Convert i16 samples to f32 for Rubato
136
+ let samples_f32: Vec < f32 > = samples. iter ( )
137
+ . map ( |& s| ( s as f32 ) / 32768.0 )
138
+ . collect ( ) ;
139
+
140
+ // Since Rubato works with separate channels, wrap our mono audio in a Vec of Vecs
141
+ let input_frames = vec ! [ samples_f32] ;
142
+
143
+ // Create a Sinc resampler with good quality settings for voice
144
+ let params = SincInterpolationParameters {
145
+ sinc_len : 256 ,
146
+ f_cutoff : 0.95 ,
147
+ interpolation : SincInterpolationType :: Linear ,
148
+ oversampling_factor : 256 ,
149
+ window : WindowFunction :: BlackmanHarris2 ,
150
+ } ;
151
+
152
+ let mut resampler = SincFixedIn :: < f32 > :: new (
153
+ TARGET_SAMPLE_RATE as f64 / source_rate as f64 ,
154
+ 1.0 ,
155
+ params,
156
+ samples. len ( ) ,
157
+ 1 , // mono audio (1 channel)
158
+ ) . map_err ( |e| format ! ( "Failed to create resampler: {}" , e) ) ?;
159
+
160
+ // Process the audio
161
+ let output_frames = resampler. process (
162
+ & input_frames,
163
+ None
164
+ ) . map_err ( |e| format ! ( "Failed to resample audio: {}" , e) ) ?;
165
+
166
+ // Convert back to i16 from f32 (first channel only since we're using mono)
167
+ let resampled_samples = output_frames[ 0 ]
168
+ . iter ( )
169
+ . map ( |& s| ( s * 32767.0 ) as i16 )
170
+ . collect ( ) ;
171
+
172
+ Ok ( resampled_samples)
173
+ }
174
+ }
0 commit comments