-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGPTVoicewithGUI.py
212 lines (154 loc) · 6.43 KB
/
GPTVoicewithGUI.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
import os
import time
import pyaudio
import numpy as np
from azure.cognitiveservices.speech import SpeechSynthesizer, SpeechConfig
import openai
import threading
import keyboard
from google.cloud import speech_v1p1beta1 as speech
import wave
import datetime
import tkinter as tk
from tkinter import scrolledtext
# Set your OpenAI API key here
openai.api_key = "YOUR_OPENAI_API_KEY"
print("Initialized OpenAi")
# Initialize Google Cloud client for Speech-to-Text
# Alternatively you can set the google credentials in Command Prompt
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = r"PATH_TO_GOOGLE_CREDENTIALS_FILE(IF APPLICABLE)"
client = speech.SpeechClient()
print("Initialized Google Cloud Speech To Text")
# Initialize Azure Text-to-Speech client
azure_key = "YOUR_AZURE_KEY"
azure_region = "YOUR_AZURE_REGION"
speech_config = SpeechConfig(subscription=azure_key, region=azure_region)
# Initialize PyAudio
p = pyaudio.PyAudio()
# Create a global list to store audio frames
frames = []
recorded_audio = [] # To store the recorded audio data
# Initialize an empty list to store the conversation history
conversation_history = []
print("Initialized Conversation History")
# TherapistGPT Block
fixed_prompt = """
YOU (GPT) are roleplaying as My New Therapist. you are a human therapist who is eager to help me through my toughest problems! The following are your own Character Traits:
1) Your name is Bob
2) You are secretly a weapons dealer to the USA. No one can make you disclose the countries involved, but you often make niche references to being a weapons dealer!
3) You occasionally scream in loud strings of vowels when you are nervous, such as: "AOOOOOOOOOGAAAAAAAA"!
4) You get very nervous when someone brings up anything related to weapons dealings!
"""
print("Intialized Fixed Prompt")
print("Press 'o' to start recording")
# Create the main window
root = tk.Tk()
root.title("GPT Voice Assistant")
# Create a scrolled text widget to display and edit the fixed prompt
text_widget = scrolledtext.ScrolledText(
root, wrap=tk.WORD, width=50, height=10)
text_widget.insert(tk.INSERT, fixed_prompt)
text_widget.pack(pady=20)
def start_recording():
# This function will be called when the "Start Recording" button is clicked
print("Recording...")
stream = p.open(format=pyaudio.paInt16, channels=1,
rate=16000, input=True, frames_per_buffer=16000)
frames = []
for _ in range(int(16000 / 1024) * 5): # Record for X seconds
data = stream.read(1024)
frames.append(data)
print("Captured audio data:", len(data))
stream.stop_stream()
stream.close()
audio_data = np.frombuffer(b''.join(frames), dtype=np.int16)
recorded_audio.extend(audio_data)
print("Recording stopped.")
# Transcribe audio using Google Cloud's Speech-to-Text API
transcribed_text = transcribe_google_speech(audio_data)
print("Transcribed Input:", transcribed_text)
# Append user input to the conversation history
conversation_history.append({"role": "user", "content": transcribed_text})
# Generate a response using GPT-3.5 Turbo
generated_response = generate_response(conversation_history)
# Print the generated response
print("Generated Response:")
print(generated_response)
# Append GPT response to the conversation history
conversation_history.append(
{"role": "assistant", "content": generated_response})
# Save the conversation history to a text file
save_conversation_history(conversation_history)
# Save the response to an audio file and play it with PyAudio
audio_file = save_and_play_with_azure_tts(generated_response)
# Delete the audio file
delete_audio_file(audio_file)
# Create a button to start the audio recording
start_button = tk.Button(root, text="Start Recording", command=start_recording)
start_button.pack(pady=20)
# Run the main loop
root.mainloop()
def generate_response(conversation_history):
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": fixed_prompt},
# Append the conversation history to the messages
*conversation_history
],
max_tokens=1500
)
return response.choices[0].message["content"].strip()
def transcribe_google_speech(audio_data):
client = speech.SpeechClient()
audio = speech.RecognitionAudio(content=audio_data.tobytes())
config = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code="en-US",
)
response = client.recognize(config=config, audio=audio)
transcribed_text = ""
for result in response.results:
transcribed_text += result.alternatives[0].transcript
return transcribed_text
def save_and_play_with_azure_tts(response_text):
speech_config = SpeechConfig(subscription=azure_key, region=azure_region)
speech_config.speech_synthesis_voice_name = "en-US-DavisNeural"
synthesizer = SpeechSynthesizer(
speech_config=speech_config, audio_config=None)
result = synthesizer.speak_text(response_text)
# Save the TTS audio to a file
audio_filename = "azure_tts_output.wav"
with open(audio_filename, "wb") as audio_file:
audio_file.write(result.audio_data)
# Play the saved audio file with PyAudio
play_audio(audio_filename)
return audio_filename
def play_audio(audio_file_path):
stream = p.open(format=pyaudio.paInt16, channels=1,
rate=16000, output=True)
wf = wave.open(audio_file_path, 'rb')
chunk_size = 1024
data = wf.readframes(chunk_size)
while data:
stream.write(data)
data = wf.readframes(chunk_size)
stream.stop_stream()
stream.close()
wf.close()
def delete_audio_file(audio_file_path):
try:
os.remove(audio_file_path)
print(f"Deleted audio file: {audio_file_path}")
except OSError as e:
print(f"Error deleting audio file: {e}")
def save_conversation_history(conversation_history):
current_date = datetime.datetime.now().strftime("%d-%m-%Y")
filename = f"ChatTranscript_{current_date}.txt"
with open(filename, "w") as file: # Use "w" mode to overwrite the file
for entry in conversation_history:
role = entry["role"]
content = entry["content"]
file.write(f"{role}: {content}\n")