Skip to content

Commit bdd6ab6

Browse files
committed
chores
1 parent 4d26bf7 commit bdd6ab6

File tree

3 files changed

+30
-45
lines changed

3 files changed

+30
-45
lines changed

crates/chunker/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ pub trait VadExt: AsyncSource + Sized {
7070
Self: Unpin,
7171
{
7272
let config = VadConfig {
73-
post_speech_pad: Duration::from_millis(50),
73+
redemption_time: Duration::from_millis(600),
7474
..Default::default()
7575
};
7676

crates/whisper-local/src/model.rs

Lines changed: 28 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -139,33 +139,55 @@ impl Whisper {
139139

140140
let mut segments = Vec::new();
141141
for i in 0..num_segments {
142-
let text = self.state.full_get_segment_text_lossy(i)?;
142+
let text = TRAILING_DOTS
143+
.replace(&self.state.full_get_segment_text_lossy(i)?, "")
144+
.to_string();
145+
143146
let (start, end) = (
144147
self.state.full_get_segment_t0(i)?,
145148
self.state.full_get_segment_t1(i)?,
146149
);
147150
let confidence = self.calculate_segment_confidence(i);
148151

149-
let mut segment = Segment {
152+
segments.push(Segment {
150153
text,
151154
start: start as f32 / 1000.0,
152155
end: end as f32 / 1000.0,
153156
confidence,
154157
..Default::default()
155-
};
156-
segment.trim();
157-
segments.push(segment);
158+
});
158159
}
159160

160-
self.dynamic_prompt = segments
161+
let segments = Self::filter_segments(segments);
162+
163+
let full_text = segments
161164
.iter()
162165
.map(|s| s.text())
163166
.collect::<Vec<&str>>()
164167
.join(" ");
165168

169+
if !full_text.is_empty() {
170+
self.dynamic_prompt = full_text;
171+
}
172+
166173
Ok(segments)
167174
}
168175

176+
fn filter_segments(segments: Vec<Segment>) -> Vec<Segment> {
177+
segments
178+
.into_iter()
179+
.filter(|s| {
180+
let t = s.text.trim().to_lowercase();
181+
182+
if s.confidence < 0.005 || t == "you" || t == "thank you" || t == "🎵" {
183+
false
184+
} else {
185+
true
186+
}
187+
})
188+
.collect()
189+
}
190+
169191
// https://github.com/ggml-org/whisper.cpp/pull/971/files#diff-2d3599a9fad195f2c3c60bd06691bc1815325b3560b5feda41a91fa71194e805R310-R327
170192
fn calculate_segment_confidence(&self, segment_idx: i32) -> f32 {
171193
let n_tokens = self.state.full_n_tokens(segment_idx).unwrap_or(0);
@@ -262,47 +284,13 @@ impl Segment {
262284
pub fn meta(&self) -> Option<serde_json::Value> {
263285
self.meta.clone()
264286
}
265-
266-
pub fn trim(&mut self) {
267-
self.text = TRAILING_DOTS.replace(&self.text, "").to_string();
268-
}
269287
}
270288

271289
#[cfg(test)]
272290
mod tests {
273291
use super::*;
274292
use futures_util::StreamExt;
275293

276-
#[test]
277-
fn test_trim() {
278-
{
279-
let mut segment = Segment {
280-
text: "Hello...".to_string(),
281-
..Default::default()
282-
};
283-
segment.trim();
284-
assert_eq!(segment.text, "Hello");
285-
}
286-
287-
{
288-
let mut segment = Segment {
289-
text: "Hello".to_string(),
290-
..Default::default()
291-
};
292-
segment.trim();
293-
assert_eq!(segment.text, "Hello");
294-
}
295-
296-
{
297-
let mut segment = Segment {
298-
text: "Hello.".to_string(),
299-
..Default::default()
300-
};
301-
segment.trim();
302-
assert_eq!(segment.text, "Hello.");
303-
}
304-
}
305-
306294
#[test]
307295
fn test_whisper() {
308296
let mut whisper = Whisper::builder()

plugins/local-stt/src/server.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -207,10 +207,7 @@ async fn process_transcription_stream(
207207
let duration = chunk.duration() as u64;
208208
let confidence = chunk.confidence();
209209

210-
if confidence < 0.1 {
211-
tracing::warn!(confidence, "skipping_transcript: {}", text);
212-
continue;
213-
}
210+
214211

215212
let source = meta.and_then(|meta|
216213
meta.get("source")

0 commit comments

Comments
 (0)