-
Notifications
You must be signed in to change notification settings - Fork 0
/
inputtovoice.py
133 lines (99 loc) · 4.34 KB
/
inputtovoice.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
from openai import OpenAI
import os
import time
import pyaudio
import numpy as np
from azure.cognitiveservices.speech import SpeechSynthesizer, SpeechConfig
import wave
# Retrieve your OpenAI API key from an environment variable
api_key = os.getenv("OPENAI_API_KEY")
# Check if the API key is set
if not api_key:
raise ValueError("The OpenAI API key is not set in the environment variables.")
# Initialize the OpenAI client
client = OpenAI(api_key=api_key) # Updated initialization
# Initialize Azure Text-to-Speech client
azure_key = "YOUR_AZURE_KEY"
azure_region = "YOUR_AZURE_REGION"
speech_config = SpeechConfig(subscription=azure_key, region=azure_region)
# Initialize PyAudio
p = pyaudio.PyAudio()
# Create a global list to store audio frames
recorded_audio = []
# Initialize an empty list to store the conversation history
conversation_history = []
# Set the prompt
fixed_prompt = """
YOU (GPT) are roleplaying as My New Therapist. you are a human therapist who is eager to help me through my toughest problems! The following are your own Character Traits:
1) Your name is Bob
2) You are secretly a weapons dealer to the USA. No one can make you disclose the countries involved, but you often make niche references to being a weapons dealer!
3) You occasionally scream in loud strings of vowels when you are nervous, such as: "AOOOOOOOOOGAAAAAAAA"!
4) You get very nervous when someone brings up anything related to weapons dealings!
"""
def generate_response(conversation_history, model_name="gpt-4-1106-preview"):
try:
completion = client.chat.completions.create(
model=model_name,
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": fixed_prompt},
*conversation_history # Append the conversation history
]
)
return completion.choices[0].message.content.strip()
except Exception as e:
if model_name == "gpt-4-1106-preview":
print("Failed to access GPT-4-1106-preview, falling back to GPT-4.")
return generate_response(conversation_history, "gpt-4")
elif model_name == "gpt-4":
print("Failed to access GPT-4, falling back to GPT-3.5-turbo.")
return generate_response(conversation_history, "gpt-3.5-turbo")
else:
raise e
def save_and_play_with_azure_tts(response_text):
speech_config = SpeechConfig(subscription=azure_key, region=azure_region)
speech_config.speech_synthesis_voice_name = "en-US-DavisNeural"
synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=None)
result = synthesizer.speak_text(response_text)
audio_filename = "azure_tts_output.wav"
with open(audio_filename, "wb") as audio_file:
audio_file.write(result.audio_data)
play_audio(audio_filename)
return audio_filename
def play_audio(audio_file_path):
stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, output=True)
wf = wave.open(audio_file_path, 'rb')
chunk_size = 1024
data = wf.readframes(chunk_size)
while data:
stream.write(data)
data = wf.readframes(chunk_size)
stream.stop_stream()
stream.close()
wf.close()
def delete_audio_file(audio_file_path):
try:
os.remove(audio_file_path)
print(f"Deleted audio file: {audio_file_path}")
except OSError as e:
print(f"Error deleting audio file: {e}")
def save_conversation_history(conversation_history):
current_date = time.strftime("%d-%m-%Y")
filename = f"ChatTranscript_{current_date}.txt"
with open(filename, "w") as file:
for entry in conversation_history:
role = entry["role"]
content = entry["content"]
file.write(f"{role}: {content}\n")
# Get user input via text
while True:
user_input = input("You: ")
if user_input.lower() == "exit":
print("Goodbye!")
break
conversation_history.append({"role": "user", "content": user_input})
generated_response = generate_response(conversation_history[-1]["content"]) # Only use the last user input
print("GPT-3.5 Turbo: " + generated_response)
conversation_history.append({"role": "assistant", "content": generated_response})
audio_file = save_and_play_with_azure_tts(generated_response)
delete_audio_file(audio_file)