-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathbing2.py
86 lines (75 loc) · 2.82 KB
/
bing2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import requests
# import httplib
import http.client
import uuid
import json
import speech_recognition as sr
class Microsoft_ASR():
def __init__(self):
self.sub_key = '74c16290bd574b09bb279d6f2224fd1d'
self.token = "4cf3f63793cc4c518648eb2c5ba11ece"
pass
def get_speech_token(self):
FetchTokenURI = "/sts/v1.0/issueToken"
header = {'Ocp-Apim-Subscription-Key': self.sub_key}
conn = http.client.HTTPSConnection('api.cognitive.microsoft.com')
body = ""
conn.request("POST", FetchTokenURI, body, header)
response = conn.getresponse()
str_data = response.read()
conn.close()
self.token = str_data
print("Got Token: ", self.token)
return True
def transcribe(self,speech_file):
# Grab the token if we need it
if self.token is None:
print("No Token... Getting one")
self.get_speech_token()
# endpoint = 'https://speech.platform.bing.com/recognize'
endpoint = "https://api.cognitive.microsoft.com/sts/v1.0"
request_id = uuid.uuid4()
# Params form Microsoft Example
params = {'scenarios': 'ulm',
'appid': 'cc219f0a-2124-47bd-beb8-cc185bbddf68',
'locale': 'en-US',
'version': '3.0',
'format': 'json',
# 'instanceid': '565D69FF-E928-4B7E-87DA-9A750B96D9E3',
'requestid': uuid.uuid4(),
'device.os': 'linux'}
content_type = "audio/wav; codec=""audio/pcm""; samplerate=16000"
# def stream_audio_file(speech_file, chunk_size=1024):
# with open(speech_file, 'rb') as f:
# while 1:
# data = f.read(1024)
# if not data:
# break
# yield data
headers = {'Authorization': 'Bearer ' + self.token,
'Content-Type': content_type}
resp = requests.post(endpoint,
params=params,
data=speech_file,
headers=headers)
print(resp.status_code)
val = json.loads(resp)
print(val)
return val["results"][0]["name"], val["results"][0]["confidence"]
if __name__ == "__main__":
ms_asr = Microsoft_ASR()
# ms_asr.get_speech_token()
r = sr.Recognizer()
mic = sr.Microphone(device_index=0)
print("Say something...")
with mic as source:
r.adjust_for_ambient_noise(source, duration=1.0)
audio = r.listen(source)
print("Audio recorded!")
# client = speech.SpeechClient()
audio = audio.get_wav_data()
text, confidence = ms_asr.transcribe(audio)
print("Text: ", text)
print("Confidence: ", confidence)