@@ -139,33 +139,55 @@ impl Whisper {
139139
140140 let mut segments = Vec :: new ( ) ;
141141 for i in 0 ..num_segments {
142- let text = self . state . full_get_segment_text_lossy ( i) ?;
142+ let text = TRAILING_DOTS
143+ . replace ( & self . state . full_get_segment_text_lossy ( i) ?, "" )
144+ . to_string ( ) ;
145+
143146 let ( start, end) = (
144147 self . state . full_get_segment_t0 ( i) ?,
145148 self . state . full_get_segment_t1 ( i) ?,
146149 ) ;
147150 let confidence = self . calculate_segment_confidence ( i) ;
148151
149- let mut segment = Segment {
152+ segments . push ( Segment {
150153 text,
151154 start : start as f32 / 1000.0 ,
152155 end : end as f32 / 1000.0 ,
153156 confidence,
154157 ..Default :: default ( )
155- } ;
156- segment. trim ( ) ;
157- segments. push ( segment) ;
158+ } ) ;
158159 }
159160
160- self . dynamic_prompt = segments
161+ let segments = Self :: filter_segments ( segments) ;
162+
163+ let full_text = segments
161164 . iter ( )
162165 . map ( |s| s. text ( ) )
163166 . collect :: < Vec < & str > > ( )
164167 . join ( " " ) ;
165168
169+ if !full_text. is_empty ( ) {
170+ self . dynamic_prompt = full_text;
171+ }
172+
166173 Ok ( segments)
167174 }
168175
176+ fn filter_segments ( segments : Vec < Segment > ) -> Vec < Segment > {
177+ segments
178+ . into_iter ( )
179+ . filter ( |s| {
180+ let t = s. text . trim ( ) . to_lowercase ( ) ;
181+
182+ if s. confidence < 0.005 || t == "you" || t == "thank you" || t == "🎵" {
183+ false
184+ } else {
185+ true
186+ }
187+ } )
188+ . collect ( )
189+ }
190+
169191 // https://github.com/ggml-org/whisper.cpp/pull/971/files#diff-2d3599a9fad195f2c3c60bd06691bc1815325b3560b5feda41a91fa71194e805R310-R327
170192 fn calculate_segment_confidence ( & self , segment_idx : i32 ) -> f32 {
171193 let n_tokens = self . state . full_n_tokens ( segment_idx) . unwrap_or ( 0 ) ;
@@ -262,47 +284,13 @@ impl Segment {
262284 pub fn meta ( & self ) -> Option < serde_json:: Value > {
263285 self . meta . clone ( )
264286 }
265-
266- pub fn trim ( & mut self ) {
267- self . text = TRAILING_DOTS . replace ( & self . text , "" ) . to_string ( ) ;
268- }
269287}
270288
271289#[ cfg( test) ]
272290mod tests {
273291 use super :: * ;
274292 use futures_util:: StreamExt ;
275293
276- #[ test]
277- fn test_trim ( ) {
278- {
279- let mut segment = Segment {
280- text : "Hello..." . to_string ( ) ,
281- ..Default :: default ( )
282- } ;
283- segment. trim ( ) ;
284- assert_eq ! ( segment. text, "Hello" ) ;
285- }
286-
287- {
288- let mut segment = Segment {
289- text : "Hello" . to_string ( ) ,
290- ..Default :: default ( )
291- } ;
292- segment. trim ( ) ;
293- assert_eq ! ( segment. text, "Hello" ) ;
294- }
295-
296- {
297- let mut segment = Segment {
298- text : "Hello." . to_string ( ) ,
299- ..Default :: default ( )
300- } ;
301- segment. trim ( ) ;
302- assert_eq ! ( segment. text, "Hello." ) ;
303- }
304- }
305-
306294 #[ test]
307295 fn test_whisper ( ) {
308296 let mut whisper = Whisper :: builder ( )
0 commit comments