Skip to content

Commit

Permalink
feat: access the azure voice system
Browse files Browse the repository at this point in the history
  • Loading branch information
liuxinqi committed Mar 29, 2023
1 parent f21db55 commit 8daad6c
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 107 deletions.
6 changes: 6 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -1,2 +1,8 @@
# OpenAi Key
VITE_OPENAI_API_KEY=xxx
# Network Proxy
VITE_SERVE_PROXY=xxx
# Aruze Key
VITE_SCRIPTION_KEY=dxxx
# Aruze Region
VITE_REGION=xxx
2 changes: 1 addition & 1 deletion index.html
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<meta charset="UTF-8" />
<link rel="icon" type="image/x-icon" href="/favicon.ico" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<meta http-equiv="Content-Security-Policy" content="script-src 'self' 'unsafe-inline';" />
<!-- <meta http-equiv="Content-Security-Policy" content="script-src 'self' 'unsafe-inline';" /> -->
<title>Polyglot</title>
</head>
<body class="font-sans dark:text-white">
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
"dependencies": {
"@vueuse/core": "^9.13.0",
"eventsource-parser": "^0.1.0",
"microsoft-cognitiveservices-speech-sdk": "^1.26.0",
"unocss": "^0.50.4",
"vue": "^3.2.47"
},
Expand Down
40 changes: 22 additions & 18 deletions src/components/Content.vue
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,24 @@
import Button from '@/components/widgets/Button.vue'
import { generateText } from '@/server/api'
import { useScroll } from '@/hooks'
import { Recognition, getKey, verifyKey } from '@/utils'
import { SpeechService, getKey, verifyKey } from '@/utils'
const { VITE_REGION, VITE_SCRIPTION_KEY } = import.meta.env
// states
const chatMessages = ref<ChatMessage[]>([])
const message = ref('')
const loading = ref(false)
const text = ref('')
const recognition = new Recognition('en-US')
const speechService = new SpeechService(VITE_SCRIPTION_KEY, VITE_REGION)
// hooks
const { el, scrollToBottom } = useScroll()
const speech = useSpeechSynthesis(text)
const { start } = useSpeechRecognition()
// effects
watch(chatMessages.value, () => nextTick(() => scrollToBottom()))
// methods
function play(content: string) {
text.value = content
speech.speak()
}
const roleClass = (role: string) => {
switch (role) {
case 'user':
Expand All @@ -36,11 +31,21 @@ const roleClass = (role: string) => {
}
}
const startTalking = () => {
recognition.start()
recognition.onResult((value) => {
console.log('value', value)
})
const speak = (content: string) => {
text.value = content
speechService.textToSpeak(content)
}
const recognize = async () => {
loading.value = true
try {
const text = await speechService.recognizeSpeech()
console.log(text)
loading.value = false
}
catch (error) {
loading.value = false
}
}
const onSubmit = async () => {
Expand Down Expand Up @@ -82,7 +87,7 @@ const onSubmit = async () => {
{{ item.content }}
</p>
<p v-if="item.role === 'assistant'" flex>
<span class="bg-gray-100/20 rounded-lg w-4 py-1 px-3 center" @click="play(item.content)">
<span class="bg-gray-100/20 rounded-lg w-4 py-1 px-3 center" @click="speak(item.content)">
<i icon-btn rotate-90 i-ic:sharp-wifi />
</span>
<!-- <span
Expand All @@ -99,8 +104,7 @@ const onSubmit = async () => {
<div class="flex h-10 w-[-webkit-fill-available] mt-1">
<Button
mr-1
i-carbon:microphon
@click="startTalking()"
@click="recognize()"
>
<i i-carbon:microphone />
</Button>
Expand All @@ -112,7 +116,7 @@ const onSubmit = async () => {
input-box p-3 flex-1
>
<div v-else class="loading-btn">
AI Is Thinking...
loading...
</div>
<Button
:disabled="loading"
Expand Down
132 changes: 44 additions & 88 deletions src/utils/speaker.ts
Original file line number Diff line number Diff line change
@@ -1,90 +1,46 @@
export class Speaker {
public utter: SpeechSynthesisUtterance
public voices: SpeechSynthesisVoice[] = []

constructor(option: { lang?: string; pitch?: number; rate?: number; volume?: number; text?: string }) {
const {
lang = 'zh-CN',
pitch = 1,
rate = 1,
volume = 1,
text = '',
} = option
this.utter = new window.SpeechSynthesisUtterance()
this.utter.lang = lang
this.utter.pitch = pitch
this.utter.rate = rate
this.utter.volume = volume
this.utter.text = text
this.getVoices()
}

getVoices() {
window.speechSynthesis.onvoiceschanged = () => {
this.voices = window.speechSynthesis.getVoices()
if (this.voices.length > 0)
this.utter.voice = this.voices[0] // 设置声音来源
}
}

// 开始播放当前的语音
start() {
window.speechSynthesis.speak(this.utter)
}

// 暂停播放
pause() {
window.speechSynthesis.pause()
}

// 暂停之后继续播放
resume() {
window.speechSynthesis.resume()
}

// 清空所有播放
cancel() {
window.speechSynthesis.cancel()
}

// 切换语音的内容
change(text: string) {
this.utter.text = text
window.speechSynthesis.speak(this.utter)
}
}

export class Recognition {
public recognition: any
public isListening: boolean
public result: string

constructor(lang = 'zh-CN') {
this.recognition = new (window.SpeechRecognition || window.webkitSpeechRecognition || window.mozSpeechRecognition || window.msSpeechRecognition)()
this.isListening = false
this.result = ''
this.recognition.lang = lang
}

// 开始语音识别
start() {
this.isListening = true
this.recognition.start()
}

// 结束语音识别
stop() {
this.isListening = false
this.recognition.stop()
}

// 监听语音识别的结果
onResult(callback: (result: string) => void) {
this.recognition.onresult = (e: any) => {
const result = e.results[0][0].transcript
this.result = result
callback(result)
}
import type { VoiceInfo } from 'microsoft-cognitiveservices-speech-sdk'
import {
AudioConfig,
SpeechConfig,
SpeechRecognizer,
SpeechSynthesizer,
} from 'microsoft-cognitiveservices-speech-sdk'

export class SpeechService {
private recognizer: SpeechRecognizer
private synthesizer: SpeechSynthesizer
private speechConfig: SpeechConfig
constructor(subscriptionKey: string, region: string) {
const speechConfig = SpeechConfig.fromSubscription(subscriptionKey, region)
speechConfig.speechRecognitionLanguage = 'en-US'
speechConfig.speechSynthesisLanguage = 'en-US'
speechConfig.speechSynthesisVoiceName = 'en-US-GuyNeural'

this.speechConfig = speechConfig

const audioConfig = AudioConfig.fromDefaultMicrophoneInput()
this.recognizer = new SpeechRecognizer(this.speechConfig, audioConfig)
this.synthesizer = new SpeechSynthesizer(this.speechConfig)
}

public recognizeSpeech(): Promise<string> {
return new Promise((resolve, reject) => {
this.recognizer.recognizeOnceAsync((result) => {
if (result.text)
resolve(result.text)
else
reject(new Error('语音识别失败'))
})
})
}

public textToSpeak(text: string, voice?: string) {
this.speechConfig.speechSynthesisVoiceName = voice || this.speechConfig.speechSynthesisVoiceName
this.synthesizer.speakTextAsync(text)
}

public async getVoices(): Promise<VoiceInfo[]> {
const res = await this.synthesizer.getVoicesAsync()
return res.voices
}
}

0 comments on commit 8daad6c

Please sign in to comment.