Skip to content

Commit 24e3984

Browse files
b-loved-dreamertelpirion
authored andcommitted
fix: deleted a line duplicating the call to the recognizer (#83)
* I updated the comment on the transcribe_async file to reflect time limitations on local files for the long_running_recognize * I updated the comment on the transcribe_async file to reflect time limitations on local files for the long_running_recognize * docs: I updated the comment on the transcribe_async file to reflect time limitations on local files for the long_running_recognize * chore: I updated the comments on the transcribe_async file to reflect time limitations on local files for the long_running_recognize * fix: resolved conflicts pick f510e8f chore: I updated the comments on the transcribe_async file to reflect time limitations on local files for the long_running_recognize * fix: conflicts * fix: migrated to speech 2.0.0 * fix: fixed lint issues * fix: deleted a duplicate line that calls the recognizer * docs: repaired region tag mismatch * chore: formatting * chore: added ]
1 parent 2ce9b5d commit 24e3984

9 files changed

+178
-142
lines changed

speech/microphone/transcribe_streaming_infinite.py

+40-31
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,9 @@
4141
SAMPLE_RATE = 16000
4242
CHUNK_SIZE = int(SAMPLE_RATE / 10) # 100ms
4343

44-
RED = '\033[0;31m'
45-
GREEN = '\033[0;32m'
46-
YELLOW = '\033[0;33m'
44+
RED = "\033[0;31m"
45+
GREEN = "\033[0;32m"
46+
YELLOW = "\033[0;33m"
4747

4848

4949
def get_current_time():
@@ -123,12 +123,14 @@ def generator(self):
123123
if self.bridging_offset > self.final_request_end_time:
124124
self.bridging_offset = self.final_request_end_time
125125

126-
chunks_from_ms = round((self.final_request_end_time -
127-
self.bridging_offset) / chunk_time)
126+
chunks_from_ms = round(
127+
(self.final_request_end_time - self.bridging_offset)
128+
/ chunk_time
129+
)
128130

129-
self.bridging_offset = (round((
130-
len(self.last_audio_input) - chunks_from_ms)
131-
* chunk_time))
131+
self.bridging_offset = round(
132+
(len(self.last_audio_input) - chunks_from_ms) * chunk_time
133+
)
132134

133135
for i in range(chunks_from_ms, len(self.last_audio_input)):
134136
data.append(self.last_audio_input[i])
@@ -157,7 +159,7 @@ def generator(self):
157159
except queue.Empty:
158160
break
159161

160-
yield b''.join(data)
162+
yield b"".join(data)
161163

162164

163165
def listen_print_loop(responses, stream):
@@ -203,32 +205,35 @@ def listen_print_loop(responses, stream):
203205

204206
stream.result_end_time = int((result_seconds * 1000) + (result_micros / 1000))
205207

206-
corrected_time = (stream.result_end_time - stream.bridging_offset
207-
+ (STREAMING_LIMIT * stream.restart_counter))
208+
corrected_time = (
209+
stream.result_end_time
210+
- stream.bridging_offset
211+
+ (STREAMING_LIMIT * stream.restart_counter)
212+
)
208213
# Display interim results, but with a carriage return at the end of the
209214
# line, so subsequent lines will overwrite them.
210215

211216
if result.is_final:
212217

213218
sys.stdout.write(GREEN)
214-
sys.stdout.write('\033[K')
215-
sys.stdout.write(str(corrected_time) + ': ' + transcript + '\n')
219+
sys.stdout.write("\033[K")
220+
sys.stdout.write(str(corrected_time) + ": " + transcript + "\n")
216221

217222
stream.is_final_end_time = stream.result_end_time
218223
stream.last_transcript_was_final = True
219224

220225
# Exit recognition if any of the transcribed phrases could be
221226
# one of our keywords.
222-
if re.search(r'\b(exit|quit)\b', transcript, re.I):
227+
if re.search(r"\b(exit|quit)\b", transcript, re.I):
223228
sys.stdout.write(YELLOW)
224-
sys.stdout.write('Exiting...\n')
229+
sys.stdout.write("Exiting...\n")
225230
stream.closed = True
226231
break
227232

228233
else:
229234
sys.stdout.write(RED)
230-
sys.stdout.write('\033[K')
231-
sys.stdout.write(str(corrected_time) + ': ' + transcript + '\r')
235+
sys.stdout.write("\033[K")
236+
sys.stdout.write(str(corrected_time) + ": " + transcript + "\r")
232237

233238
stream.last_transcript_was_final = False
234239

@@ -240,34 +245,38 @@ def main():
240245
config = speech.RecognitionConfig(
241246
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
242247
sample_rate_hertz=SAMPLE_RATE,
243-
language_code='en-US',
244-
max_alternatives=1)
248+
language_code="en-US",
249+
max_alternatives=1,
250+
)
251+
245252
streaming_config = speech.StreamingRecognitionConfig(
246-
config=config,
247-
interim_results=True)
253+
config=config, interim_results=True
254+
)
248255

249256
mic_manager = ResumableMicrophoneStream(SAMPLE_RATE, CHUNK_SIZE)
250257
print(mic_manager.chunk_size)
251258
sys.stdout.write(YELLOW)
252259
sys.stdout.write('\nListening, say "Quit" or "Exit" to stop.\n\n')
253-
sys.stdout.write('End (ms) Transcript Results/Status\n')
254-
sys.stdout.write('=====================================================\n')
260+
sys.stdout.write("End (ms) Transcript Results/Status\n")
261+
sys.stdout.write("=====================================================\n")
255262

256263
with mic_manager as stream:
257264

258265
while not stream.closed:
259266
sys.stdout.write(YELLOW)
260-
sys.stdout.write('\n' + str(
261-
STREAMING_LIMIT * stream.restart_counter) + ': NEW REQUEST\n')
267+
sys.stdout.write(
268+
"\n" + str(STREAMING_LIMIT * stream.restart_counter) + ": NEW REQUEST\n"
269+
)
262270

263271
stream.audio_input = []
264272
audio_generator = stream.generator()
265273

266-
requests = (speech.StreamingRecognizeRequest(
267-
audio_content=content)for content in audio_generator)
274+
requests = (
275+
speech.StreamingRecognizeRequest(audio_content=content)
276+
for content in audio_generator
277+
)
268278

269-
responses = client.streaming_recognize(streaming_config,
270-
requests)
279+
responses = client.streaming_recognize(streaming_config, requests)
271280

272281
# Now, put the transcription responses to use.
273282
listen_print_loop(responses, stream)
@@ -281,11 +290,11 @@ def main():
281290
stream.restart_counter = stream.restart_counter + 1
282291

283292
if not stream.last_transcript_was_final:
284-
sys.stdout.write('\n')
293+
sys.stdout.write("\n")
285294
stream.new_stream = True
286295

287296

288-
if __name__ == '__main__':
297+
if __name__ == "__main__":
289298

290299
main()
291300

speech/microphone/transcribe_streaming_mic.py

+21-14
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343

4444
class MicrophoneStream(object):
4545
"""Opens a recording stream as a generator yielding the audio chunks."""
46+
4647
def __init__(self, rate, chunk):
4748
self._rate = rate
4849
self._chunk = chunk
@@ -57,8 +58,10 @@ def __enter__(self):
5758
format=pyaudio.paInt16,
5859
# The API currently only supports 1-channel (mono) audio
5960
# https://goo.gl/z757pE
60-
channels=1, rate=self._rate,
61-
input=True, frames_per_buffer=self._chunk,
61+
channels=1,
62+
rate=self._rate,
63+
input=True,
64+
frames_per_buffer=self._chunk,
6265
# Run the audio stream asynchronously to fill the buffer object.
6366
# This is necessary so that the input device's buffer doesn't
6467
# overflow while the calling thread makes network requests, etc.
@@ -103,7 +106,7 @@ def generator(self):
103106
except queue.Empty:
104107
break
105108

106-
yield b''.join(data)
109+
yield b"".join(data)
107110

108111

109112
def listen_print_loop(responses):
@@ -141,10 +144,10 @@ def listen_print_loop(responses):
141144
#
142145
# If the previous result was longer than this one, we need to print
143146
# some extra spaces to overwrite the previous result
144-
overwrite_chars = ' ' * (num_chars_printed - len(transcript))
147+
overwrite_chars = " " * (num_chars_printed - len(transcript))
145148

146149
if not result.is_final:
147-
sys.stdout.write(transcript + overwrite_chars + '\r')
150+
sys.stdout.write(transcript + overwrite_chars + "\r")
148151
sys.stdout.flush()
149152

150153
num_chars_printed = len(transcript)
@@ -154,8 +157,8 @@ def listen_print_loop(responses):
154157

155158
# Exit recognition if any of the transcribed phrases could be
156159
# one of our keywords.
157-
if re.search(r'\b(exit|quit)\b', transcript, re.I):
158-
print('Exiting..')
160+
if re.search(r"\b(exit|quit)\b", transcript, re.I):
161+
print("Exiting..")
159162
break
160163

161164
num_chars_printed = 0
@@ -164,28 +167,32 @@ def listen_print_loop(responses):
164167
def main():
165168
# See http://g.co/cloud/speech/docs/languages
166169
# for a list of supported languages.
167-
language_code = 'en-US' # a BCP-47 language tag
170+
language_code = "en-US" # a BCP-47 language tag
168171

169172
client = speech.SpeechClient()
170173
config = speech.RecognitionConfig(
171174
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
172175
sample_rate_hertz=RATE,
173-
language_code=language_code)
176+
language_code=language_code,
177+
)
178+
174179
streaming_config = speech.StreamingRecognitionConfig(
175-
config=config,
176-
interim_results=True)
180+
config=config, interim_results=True
181+
)
177182

178183
with MicrophoneStream(RATE, CHUNK) as stream:
179184
audio_generator = stream.generator()
180-
requests = (speech.StreamingRecognizeRequest(audio_content=content)
181-
for content in audio_generator)
185+
requests = (
186+
speech.StreamingRecognizeRequest(audio_content=content)
187+
for content in audio_generator
188+
)
182189

183190
responses = client.streaming_recognize(streaming_config, requests)
184191

185192
# Now, put the transcription responses to use.
186193
listen_print_loop(responses)
187194

188195

189-
if __name__ == '__main__':
196+
if __name__ == "__main__":
190197
main()
191198
# [END speech_transcribe_streaming_mic]

speech/microphone/transcribe_streaming_mic_test.py

+11-8
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
import mock
2020

21-
RESOURCES = os.path.join(os.path.dirname(__file__), 'resources')
21+
RESOURCES = os.path.join(os.path.dirname(__file__), "resources")
2222

2323

2424
class MockPyAudio(object):
@@ -32,8 +32,9 @@ def open(self, stream_callback, rate, *args, **kwargs):
3232
self.rate = rate
3333
self.closed = threading.Event()
3434
self.stream_thread = threading.Thread(
35-
target=self.stream_audio, args=(
36-
self.audio_filename, stream_callback, self.closed))
35+
target=self.stream_audio,
36+
args=(self.audio_filename, stream_callback, self.closed),
37+
)
3738
self.stream_thread.start()
3839
return self
3940

@@ -47,23 +48,25 @@ def terminate(self):
4748
pass
4849

4950
def stream_audio(self, audio_filename, callback, closed, num_frames=512):
50-
with open(audio_filename, 'rb') as audio_file:
51+
with open(audio_filename, "rb") as audio_file:
5152
while not closed.is_set():
5253
# Approximate realtime by sleeping for the appropriate time for
5354
# the requested number of frames
5455
time.sleep(num_frames / float(self.rate))
5556
# audio is 16-bit samples, whereas python byte is 8-bit
5657
num_bytes = 2 * num_frames
57-
chunk = audio_file.read(num_bytes) or b'\0' * num_bytes
58+
chunk = audio_file.read(num_bytes) or b"\0" * num_bytes
5859
callback(chunk, None, None, None)
5960

6061

61-
@mock.patch.dict('sys.modules', pyaudio=mock.MagicMock(
62-
PyAudio=MockPyAudio(os.path.join(RESOURCES, 'quit.raw'))))
62+
@mock.patch.dict(
63+
"sys.modules",
64+
pyaudio=mock.MagicMock(PyAudio=MockPyAudio(os.path.join(RESOURCES, "quit.raw"))),
65+
)
6366
def test_main(capsys):
6467
import transcribe_streaming_mic
6568

6669
transcribe_streaming_mic.main()
6770
out, err = capsys.readouterr()
6871

69-
assert re.search(r'quit', out, re.DOTALL | re.I)
72+
assert re.search(r"quit", out, re.DOTALL | re.I)

0 commit comments

Comments
 (0)