-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbackup_app.py
260 lines (205 loc) · 10.9 KB
/
backup_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
# backup_app.py
from flask import Flask, request, jsonify, send_from_directory, render_template
from flask_cors import CORS
from openai import OpenAI
from datetime import datetime
import os
import numpy as np
import librosa # Import librosa for audio analysis
import json
from python_speech_features import mfcc
from dotenv import load_dotenv
from flask_socketio import SocketIO, emit
import tempfile # Import tempfile for temporary file storage
from pathlib import Path # Import Path for handling file paths
import webrtcvad
import boto3 # Import boto3 for Amazon Polly
# Load environment variables from .env file
load_dotenv()
# Initialize Flask app and SocketIO
app = Flask(__name__, static_folder='static', template_folder='templates')
CORS(app) # Enable CORS for all routes of the Flask app
socketio = SocketIO(app) # Initialize Flask-SocketIO before using it
# AWS Polly setup
polly_client = boto3.client('polly', region_name=os.getenv("AWS_REGION"))
# Retrieve the API key from the environment variable
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
raise ValueError("No OpenAI API key found. Please set the OPENAI_API_KEY environment variable.")
client = OpenAI(api_key=api_key)
def get_chatbot_response(user_input):
# Comprehensive system message in Japanese with essential details, ensuring all important parts are included
system_content = """
あなたは、TOKYO BEASTプロジェクトに関する知識豊富なアシスタントです。このプロジェクトは、2024年にリリース予定のWeb3エンターテインメントで、2124年の未来の東京を舞台にしています。回答は実際のアシスタントのように、短い文で分かりやすく伝えてください。長すぎないようにしましょう。
主な概要:
- **TOKYO BEAST**は、サイバーパンク風の未来の東京を舞台にしたゲームです。
- 2023年9月発表、2024年リリース予定。**gumi**が開発し、**Turingum**が技術・財務アドバイザリーを担当。
- ゲーム内では、プレイヤーは**BEAST NFT**と**$TBZトークン**を使って相互作用します。
ゲームの特徴と機能:
- **$TBZトークン**: ゲーム内通貨で、**BASE**モジュールでの投資に使用。
- **BEAST NFT**: プレイヤーが所有、育成、訓練するデジタルアセット。**FARM**モジュールで育成可能。
- **TRIALS**: BEASTのコピーを使用してアリーナでバトルし、トッププレイヤーはチャンピオンシップへ進出。
- **BETTING**: 暗号資産を使ったベッティング機能。チャンピオンシップの勝敗予想が可能。
コミュニティと法的対応:
- 暗号通貨を使ったギャンブル機能があり、法的レビューを受けて開発中。
- 世界配信される試合とグローバルなベッティングが予定されています。
拡張モジュール:
- **CLASH**: 日々のベッティング機会。
- **FUSION**: 人気NFTやゲームとのコラボレーション。
- **ITEMIZE**: フィジカルグッズやアパレル販売。
- **ANIMATION**: ゲームの世界観を拡張するアニメーション展開。
早期参加キャンペーン:
- 2023年10月31日まで**Early Entry Campaign**が実施され、NFTや$TBZトークンが報酬として提供される可能性があります。
"""
# Create a completion request with system context and user input
response = client.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": system_content}, # System message with full content in Japanese
{"role": "user", "content": user_input} # User's input or question
]
)
return response.choices[0].message.content.strip()
# # Function to convert text to speech using OpenAI's TTS API
# def text_to_speech(text):
# try:
# # Use OpenAI TTS API to generate speech
# response = client.audio.speech.create(
# model="tts-1",
# voice="alloy",
# input=text
# )
# # Create a temporary file to store the audio data
# temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
# # Save the generated speech to the specified file path
# response.stream_to_file(Path(temp_file.name))
# return temp_file.name # Return the path to the audio file
# except Exception as e:
# print(f"Error generating speech: {e}")
# return None
# Function to convert text to speech using Amazon Polly
def text_to_speech(text):
try:
# Request speech synthesis from Amazon Polly
response = polly_client.synthesize_speech(
Text=text,
VoiceId="Mizuki", # Use the 'Mizuki' voice for Japanese, you can change this if needed
OutputFormat="mp3"
)
# Create a temporary file to store the audio data
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
with open(temp_file.name, "wb") as audio_file:
audio_file.write(response['AudioStream'].read())
return temp_file.name # Return the path to the temporary audio file
except Exception as e:
print(f"Error generating speech with Polly: {e}")
return None
def analyze_audio(audio_path, aggressiveness=0, voiced_threshold=0.3):
"""
Analyze audio using WebRTC VAD to determine if it contains human voice.
Args:
audio_path (str): Path to the audio file to be analyzed.
aggressiveness (int): Aggressiveness level of VAD (0-3). Lower values are less aggressive.
voiced_threshold (float): Proportion of voiced chunks required to consider human voice.
Returns:
bool: True if human voice is detected, otherwise False.
"""
try:
# Initialize the VAD object with the specified aggressiveness level
vad = webrtcvad.Vad(aggressiveness) # 0: Least aggressive, 3: Most aggressive
# Load audio using librosa, ensuring it's at 16kHz (recommended sample rate for VAD)
y, sr = librosa.load(audio_path, sr=16000)
# Check if the loaded audio has any content
if len(y) == 0:
print("Error: Loaded audio has no content.")
return False
# Convert the audio signal to 16-bit PCM format as required by VAD
# Normalize the float samples to -32768 to 32767 (int16 range)
samples = (y * 32768).astype(np.int16).tobytes()
# Define parameters for chunking the audio
sample_width = 2 # Each sample is 2 bytes (16-bit)
frame_duration = 30 # Frame duration of 30ms
frame_size = int(sr * (frame_duration / 1000.0)) # Calculate number of samples per frame
# Split the audio into 30ms frames (webrtcvad accepts 10ms, 20ms, or 30ms frames)
frames = [samples[i:i + frame_size * sample_width] for i in range(0, len(samples), frame_size * sample_width)]
# Check if any frames were created
if len(frames) == 0:
print("Error: No frames created for analysis.")
return False
# Filter out invalid frames (frames must have consistent length for webrtcvad)
valid_frames = [frame for frame in frames if len(frame) == frame_size * sample_width]
# Check if there are any valid frames to process
if len(valid_frames) == 0:
print("Error: No valid frames found for analysis.")
return False
# Count the number of voiced frames using webrtcvad
num_voiced_frames = sum([1 for frame in valid_frames if vad.is_speech(frame, sr)])
# Avoid division by zero by checking if the total number of valid frames is zero
if len(valid_frames) == 0:
print("Error: Division by zero - no valid frames for VAD processing.")
return False
# Calculate the proportion of voiced frames
voiced_proportion = num_voiced_frames / len(valid_frames)
# Consider it human voice if the proportion of voiced frames is greater than the threshold
if voiced_proportion > voiced_threshold:
print(f"Detected human voice. Voiced proportion: {voiced_proportion:.2f}")
return True
else:
print(f"No human voice detected. Voiced proportion: {voiced_proportion:.2f}")
return False
except Exception as e:
print(f"Error analyzing audio with VAD: {e}")
return False
@app.route('/')
def index():
return render_template('index.html')
# @socketio.on('connect')
# def handle_connect():
# """Send a greeting message when the user first connects to the chat."""
# initial_message = "Hi there! Need help or just a friendly chat?"
# # Convert the initial greeting to speech
# audio_path = text_to_speech(initial_message)
# if audio_path:
# with open(audio_path, 'rb') as audio_file:
# audio_data = audio_file.read()
# # Emit the initial message and audio to the frontend
# emit('response_with_audio', {'message': initial_message, 'audio': audio_data})
# else:
# emit('response_with_audio', {'message': initial_message, 'error': 'Failed to generate initial audio.'})
@socketio.on('message')
def handle_message(msg):
# Get chatbot response
bot_message = get_chatbot_response(msg)
# Convert response to speech
audio_path = text_to_speech(bot_message)
if audio_path: # Check if the audio path is valid before proceeding
with open(audio_path, 'rb') as audio_file:
audio_data = audio_file.read()
# Emit both the message and audio data to the frontend
emit('response_with_audio', {'message': bot_message, 'audio': audio_data})
else:
emit('response_with_audio', {'message': bot_message, 'error': 'Failed to generate audio.'})
@socketio.on('speech_to_text')
def handle_speech_to_text(data):
try:
# Create a temporary file to save the incoming audio data
temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
temp_audio_file.write(data['audio'])
temp_audio_file.close()
# Analyze the audio to detect if it contains human voice
contains_human_voice = analyze_audio(temp_audio_file.name)
if contains_human_voice:
# Call OpenAI API for transcription if human voice is detected
with open(temp_audio_file.name, 'rb') as audio_file:
transcription = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file
)
emit('stt_response', {'text': transcription.text})
else:
emit('stt_response', {'text': None, 'error': 'No human voice detected.'})
except Exception as e:
print(f"Error transcribing speech: {e}")
emit('stt_response', {'text': None, 'error': str(e)})
if __name__ == '__main__':
socketio.run(app, debug=True)