@@ -30,7 +30,7 @@ def get_wav_file_parameters(input_file: Union[str, os.PathLike]) -> Dict[str, Un
3030 'duration' : nframes / rate ,
3131 'nchannels' : wf .getnchannels (),
3232 'sampwidth' : wf .getsampwidth (),
33- 'data_offset' : wf .getfp ().size_read + wf .getfp ().offset
33+ 'data_offset' : wf .getfp ().size_read + wf .getfp ().offset ,
3434 }
3535 except :
3636 # Not a WAV file
@@ -46,11 +46,11 @@ class AudioChunkFileIterator:
4646 def __init__ (
4747 self ,
4848 input_file : Union [str , os .PathLike ],
49- chunk_n_frames : int ,
49+ chunk_duration_ms : int ,
5050 delay_callback : Optional [Callable [[bytes , float ], None ]] = None ,
5151 ) -> None :
5252 self .input_file : Path = Path (input_file ).expanduser ()
53- self .chunk_n_frames = chunk_n_frames
53+ self .chunk_duration_ms = chunk_duration_ms
5454 self .delay_callback = delay_callback
5555 self .file_parameters = get_wav_file_parameters (self .input_file )
5656 self .file_object : Optional [typing .BinaryIO ] = open (str (self .input_file ), 'rb' )
@@ -75,16 +75,21 @@ def __iter__(self):
7575
7676 def __next__ (self ) -> bytes :
7777 if self .file_parameters :
78- data = self .file_object .read (self .chunk_n_frames * self .file_parameters ['sampwidth' ] * self .file_parameters ['nchannels' ])
78+ num_frames = int (self .chunk_duration_ms * self .file_parameters ['framerate' ] / 1000 )
79+ data = self .file_object .read (
80+ num_frames * self .file_parameters ['sampwidth' ] * self .file_parameters ['nchannels' ]
81+ )
7982 else :
80- data = self .file_object .read (self .chunk_n_frames )
83+ # Fixed chunk size when file_parameters is not available
84+ data = self .file_object .read (8192 )
8185 if not data :
8286 self .close ()
8387 raise StopIteration
8488 if self .delay_callback is not None :
8589 offset = self .file_parameters ['data_offset' ] if self .first_buffer else 0
8690 self .delay_callback (
87- data [offset :], (len (data ) - offset ) / self .file_parameters ['sampwidth' ] / self .file_parameters ['framerate' ]
91+ data [offset :],
92+ (len (data ) - offset ) / self .file_parameters ['sampwidth' ] / self .file_parameters ['framerate' ],
8893 )
8994 self .first_buffer = False
9095 return data
@@ -104,8 +109,7 @@ def add_word_boosting_to_config(
104109
105110
106111def add_audio_file_specs_to_config (
107- config : Union [rasr .StreamingRecognitionConfig , rasr .RecognitionConfig ],
108- audio_file : Union [str , os .PathLike ],
112+ config : Union [rasr .StreamingRecognitionConfig , rasr .RecognitionConfig ], audio_file : Union [str , os .PathLike ],
109113) -> None :
110114 inner_config : rasr .RecognitionConfig = config if isinstance (config , rasr .RecognitionConfig ) else config .config
111115 wav_parameters = get_wav_file_parameters (audio_file )
@@ -114,10 +118,7 @@ def add_audio_file_specs_to_config(
114118 inner_config .audio_channel_count = wav_parameters ['nchannels' ]
115119
116120
117- def add_speaker_diarization_to_config (
118- config : Union [rasr .RecognitionConfig ],
119- diarization_enable : bool ,
120- ) -> None :
121+ def add_speaker_diarization_to_config (config : Union [rasr .RecognitionConfig ], diarization_enable : bool ,) -> None :
121122 inner_config : rasr .RecognitionConfig = config if isinstance (config , rasr .RecognitionConfig ) else config .config
122123 if diarization_enable :
123124 diarization_config = rasr .SpeakerDiarizationConfig (enable_speaker_diarization = True )
@@ -129,6 +130,7 @@ def add_speaker_diarization_to_config(
129130
130131def print_streaming (
131132 responses : Iterable [rasr .StreamingRecognizeResponse ],
133+ input_file : str = None ,
132134 output_file : Optional [Union [Union [os .PathLike , str , TextIO ], List [Union [os .PathLike , str , TextIO ]]]] = None ,
133135 additional_info : str = 'no' ,
134136 word_time_offsets : bool = False ,
@@ -194,6 +196,10 @@ def print_streaming(
194196 output_file [i ] = Path (elem ).expanduser ().open (file_mode )
195197 start_time = time .time () # used in 'time` additional_info
196198 num_chars_printed = 0 # used in 'no' additional_info
199+ final_transcript = "" # for printing best final transcript
200+ if input_file :
201+ for f in output_file :
202+ f .write (f"File: { input_file } \n " )
197203 for response in responses :
198204 if not response .results :
199205 continue
@@ -204,6 +210,7 @@ def print_streaming(
204210 transcript = result .alternatives [0 ].transcript
205211 if additional_info == 'no' :
206212 if result .is_final :
213+ final_transcript += transcript
207214 if show_intermediate :
208215 overwrite_chars = ' ' * (num_chars_printed - len (transcript ))
209216 for i , f in enumerate (output_file ):
@@ -221,6 +228,7 @@ def print_streaming(
221228 partial_transcript += transcript
222229 elif additional_info == 'time' :
223230 if result .is_final :
231+ final_transcript += transcript
224232 for i , alternative in enumerate (result .alternatives ):
225233 for f in output_file :
226234 f .write (
@@ -239,6 +247,7 @@ def print_streaming(
239247 partial_transcript += transcript
240248 else : # additional_info == 'confidence'
241249 if result .is_final :
250+ final_transcript += transcript
242251 for f in output_file :
243252 f .write (f'## { transcript } \n ' )
244253 f .write (f'Confidence: { result .alternatives [0 ].confidence :9.4f} \n ' )
@@ -259,6 +268,9 @@ def print_streaming(
259268 else :
260269 for f in output_file :
261270 f .write ('----\n ' )
271+ for f in output_file :
272+ f .write (f"Final transcripts:\n " )
273+ f .write (f"0 : { final_transcript } \n " )
262274 finally :
263275 for fo , elem in zip (file_opened , output_file ):
264276 if fo :
@@ -284,6 +296,7 @@ def streaming_request_generator(
284296
285297class ASRService :
286298 """Provides streaming and offline recognition services. Calls gRPC stubs with authentication metadata."""
299+
287300 def __init__ (self , auth : Auth ) -> None :
288301 """
289302 Initializes an instance of the class.
0 commit comments