-
-
Notifications
You must be signed in to change notification settings - Fork 135
/
modelname_settings_page.py
290 lines (258 loc) · 21.2 KB
/
modelname_settings_page.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
import os
import json
import requests
import gradio as gr
from tqdm import tqdm
from pathlib import Path
from .help_content import AllTalkHelpContent
this_dir = Path(__file__).parent.resolve() # Sets up self.this_dir as a variable for the folder THIS script is running in.
main_dir = Path(__file__).parent.parent.parent.parent.resolve() # Sets up self.main_dir as a variable for the folder AllTalk is running in
##########################################################################
# REQUIRED CHANGE #
# Populate the voices list, using the method specific to your TTS engine #
##########################################################################
# This function is responsible for populating the list of available voices for your TTS engine.
# You need to modify this function to use the appropriate method for your engine to retrieve the voice list.
#
# The current implementation lists all the WAV files in a "voices" directory, which may not be suitable for your engine.
# You should replace the `xxxx_voices_file_list` function name to match your engine name. For example, if your engine
# is named "mytts", the function should be named `mytts_voices_file_list`.
#
# You will also neef to update the code with your own implementation that retrieves the voice list according to your
# engine's specific requirements. Typically this is the same code as will be in your model_engine.py file.
#
# For example, if your engine has a dedicated API or configuration file for managing voices, you should modify this
# function to interact with that API or read from that configuration file.
#
# After making the necessary changes, this function should return a list of available voices that can be used
# in your TTS engine's settings page.
def xtts_voices_file_list():
directory = main_dir / "voices"
files = [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f)) and f.endswith(".wav")]
return files
######################################################
# REQUIRED CHANGE #
# Imports and saves the TTS engine-specific settings #
######################################################
# This function is responsible for importing and saving the settings specific to your TTS engine.
# You need to make the following change:
#
# 1. Change the name of the function `xxxx_model_update_settings` to match your engine's name.
# For example, if your engine is named "mytts", the function should be named `mytts_model_update_settings`.
#
# After making this change, the function will load the model settings from a JSON file, update the settings and voice
# dictionaries with the values provided as arguments, and save the updated settings back to the JSON file.
#
# You do not need to modify the function's logic or any other part of the code.
def xtts_model_update_settings(def_character_voice_gr, def_narrator_voice_gr, lowvram_enabled_gr, deepspeed_enabled_gr, temperature_set_gr, repetitionpenalty_set_gr, pitch_set_gr, generationspeed_set_gr, alloy_gr, echo_gr, fable_gr, nova_gr, onyx_gr, shimmer_gr):
# Load the model_config_data from the JSON file
with open(os.path.join(this_dir, "model_settings.json"), "r") as f:
model_config_data = json.load(f)
# Update the settings and openai_voices dictionaries with the new values
model_config_data["settings"]["def_character_voice"] = def_character_voice_gr
model_config_data["settings"]["def_narrator_voice"] = def_narrator_voice_gr
model_config_data["openai_voices"]["alloy"] = alloy_gr
model_config_data["openai_voices"]["echo"] = echo_gr
model_config_data["openai_voices"]["fable"] = fable_gr
model_config_data["openai_voices"]["nova"] = nova_gr
model_config_data["openai_voices"]["onyx"] = onyx_gr
model_config_data["openai_voices"]["shimmer"] = shimmer_gr
model_config_data["settings"]["lowvram_enabled"] = lowvram_enabled_gr == "Enabled"
model_config_data["settings"]["deepspeed_enabled"] = deepspeed_enabled_gr == "Enabled"
model_config_data["settings"]["temperature_set"] = temperature_set_gr
model_config_data["settings"]["repetitionpenalty_set"] = repetitionpenalty_set_gr
model_config_data["settings"]["pitch_set"] = pitch_set_gr
model_config_data["settings"]["generationspeed_set"] = generationspeed_set_gr
# Save the updated model_config_data to the JSON file
with open(os.path.join(this_dir, "model_settings.json"), "w") as f:
json.dump(model_config_data, f, indent=4)
return "Settings updated successfully!"
#######################################################
# REQUIRED CHANGE #
# Sets up the engine-specific settings page in Gradio #
#######################################################
# This function sets up the Gradio interface for the settings page specific to your TTS engine.
# You need to make the following changes:
#
# 1. Change the name of the function `xxxx_model_alltalk_settings` to match your engine's name.
# For example, if your engine is named "mytts", the function should be named `mytts_model_alltalk_settings`.
#
# 2. Change the name of the `submit_button.click` function call to match the name you gave to the function
# that imports and saves your engine's settings (the function you modified above).
#
# 3. Change the name of the `voice_list` function call to match the name of the function that lists
# the available voices for your TTS engine.
#
# 4. Change the 'title' of the `gr.Blocks` to match your engine's name e.g. title="mytts TTS"
#
# After making these changes, this function will create and return the Gradio interface for your TTS engine's
# settings page, allowing users to configure various options and voice selections.
def xtts_model_alltalk_settings(model_config_data):
features_list = model_config_data['model_capabilties']
voice_list = xtts_voices_file_list()
with gr.Blocks(title="Xtts TTS", analytics_enabled=False) as app:
with gr.Tab("Default Settings"):
with gr.Row():
lowvram_enabled_gr = gr.Radio(choices={"Enabled": "true", "Disabled": "false"}, label="Low VRAM" if model_config_data["model_capabilties"]["lowvram_capable"] else "Low VRAM N/A", value="Enabled" if model_config_data["settings"]["lowvram_enabled"] else "Disabled", interactive=model_config_data["model_capabilties"]["lowvram_capable"])
deepspeed_enabled_gr = gr.Radio(choices={"Enabled": "true", "Disabled": "false"}, label="DeepSpeed Activate" if model_config_data["model_capabilties"]["deepspeed_capable"] else "DeepSpeed N/A", value="Enabled" if model_config_data["settings"]["deepspeed_enabled"] else "Disabled", interactive=model_config_data["model_capabilties"]["deepspeed_capable"])
temperature_set_gr = gr.Slider(value=float(model_config_data["settings"]["temperature_set"]), minimum=0, maximum=1, step=0.05, label="Temperature" if model_config_data["model_capabilties"]["temperature_capable"] else "Temperature N/A", interactive=model_config_data["model_capabilties"]["temperature_capable"])
repetitionpenalty_set_gr = gr.Slider(value=float(model_config_data["settings"]["repetitionpenalty_set"]), minimum=1, maximum=20, step=1, label="Repetition Penalty" if model_config_data["model_capabilties"]["repetitionpenalty_capable"] else "Repetition N/A", interactive=model_config_data["model_capabilties"]["repetitionpenalty_capable"])
pitch_set_gr = gr.Slider(value=float(model_config_data["settings"]["pitch_set"]), minimum=-10, maximum=10, step=1, label="Pitch" if model_config_data["model_capabilties"]["pitch_capable"] else "Pitch N/A", interactive=model_config_data["model_capabilties"]["pitch_capable"])
generationspeed_set_gr = gr.Slider(value=float(model_config_data["settings"]["generationspeed_set"]), minimum=0.25, maximum=2.00, step=0.25, label="Speed" if model_config_data["model_capabilties"]["generationspeed_capable"] else "Speed N/A", interactive=model_config_data["model_capabilties"]["generationspeed_capable"])
with gr.Row():
with gr.Column():
gr.Markdown("### OpenAI Voice Mappings")
with gr.Group():
with gr.Row():
alloy_gr = gr.Dropdown(value=model_config_data["openai_voices"]["alloy"], label="Alloy", choices=voice_list, allow_custom_value=True)
echo_gr = gr.Dropdown(value=model_config_data["openai_voices"]["echo"], label="Echo", choices=voice_list, allow_custom_value=True)
with gr.Row():
fable_gr = gr.Dropdown(value=model_config_data["openai_voices"]["fable"], label="Fable", choices=voice_list, allow_custom_value=True)
nova_gr = gr.Dropdown(value=model_config_data["openai_voices"]["nova"], label="Nova", choices=voice_list, allow_custom_value=True)
with gr.Row():
onyx_gr = gr.Dropdown(value=model_config_data["openai_voices"]["onyx"], label="Onyx", choices=voice_list, allow_custom_value=True)
shimmer_gr = gr.Dropdown(value=model_config_data["openai_voices"]["shimmer"], label="Shimmer", choices=voice_list, allow_custom_value=True)
with gr.Column():
gr.Markdown("### Default Voices")
with gr.Row():
def_character_voice_gr = gr.Dropdown(value=model_config_data["settings"]["def_character_voice"], label="Default/Character Voice", choices=voice_list, allow_custom_value=True)
def_narrator_voice_gr = gr.Dropdown(value=model_config_data["settings"]["def_narrator_voice"], label="Narrator Voice", choices=voice_list, allow_custom_value=True)
with gr.Group():
with gr.Row():
details_text = gr.Textbox(label="Details", show_label=False, lines=5, interactive=False, value="Configure default settings and voice mappings for the selected TTS engine. Unavailable options are grayed out based on engine capabilities. See the Help section below for detailed information about each setting.")
with gr.Row():
submit_button = gr.Button("Update Settings")
output_message = gr.Textbox(label="Output Message", interactive=False, show_label=False)
with gr.Accordion("HELP - 🔊 Understanding TTS Engine Default Settings Page", open=False):
with gr.Row():
gr.Markdown(AllTalkHelpContent.DEFAULT_SETTINGS, elem_classes="custom-markdown")
with gr.Row():
gr.Markdown(AllTalkHelpContent.DEFAULT_SETTINGS1, elem_classes="custom-markdown")
gr.Markdown(AllTalkHelpContent.DEFAULT_SETTINGS2, elem_classes="custom-markdown")
submit_button.click(xtts_model_update_settings, inputs=[def_character_voice_gr, def_narrator_voice_gr, lowvram_enabled_gr, deepspeed_enabled_gr, temperature_set_gr, repetitionpenalty_set_gr, pitch_set_gr, generationspeed_set_gr, alloy_gr, echo_gr, fable_gr, nova_gr, onyx_gr, shimmer_gr], outputs=output_message)
###########################################################################################
# Do not change this section apart from "TTS Engine Name" value to match your engine name #
###########################################################################################
with gr.Tab("Engine Information"):
with gr.Row():
with gr.Group():
gr.Textbox(label="Manufacturer Name", value=model_config_data['model_details']['manufacturer_name'], interactive=False)
gr.Textbox(label="Manufacturer Website/TTS Engine Support", value=model_config_data['model_details']['manufacturer_website'], interactive=False)
gr.Textbox(label="Engine/Model Description", value=model_config_data['model_details']['model_description'], interactive=False, lines=13)
with gr.Column():
with gr.Row():
gr.Textbox(label="DeepSpeed Capable", value='Yes' if features_list['deepspeed_capable'] else 'No', interactive=False)
gr.Textbox(label="Pitch Capable", value='Yes' if features_list['pitch_capable'] else 'No', interactive=False)
gr.Textbox(label="Generation Speed Capable", value='Yes' if features_list['generationspeed_capable'] else 'No', interactive=False)
with gr.Row():
gr.Textbox(label="Repetition Penalty Capable", value='Yes' if features_list['repetitionpenalty_capable'] else 'No', interactive=False)
gr.Textbox(label="Multi Languages Capable", value='Yes' if features_list['languages_capable'] else 'No', interactive=False)
gr.Textbox(label="Streaming Capable", value='Yes' if features_list['streaming_capable'] else 'No', interactive=False)
with gr.Row():
gr.Textbox(label="Low VRAM Capable", value='Yes' if features_list['lowvram_capable'] else 'No', interactive=False)
gr.Textbox(label="Temperature Capable", value='Yes' if features_list['temperature_capable'] else 'No', interactive=False)
gr.Textbox(label="Multi Model Capable Engine", value='Yes' if features_list['multimodel_capable'] else 'No', interactive=False)
with gr.Row():
gr.Textbox(label="Multi Voice Capable Models", value='Yes' if features_list['multivoice_capable'] else 'No', interactive=False)
gr.Textbox(label="Default Audio output format", value=model_config_data['model_capabilties']['audio_format'], interactive=False)
gr.Textbox(label="TTS Engine Name", value="XTTS", interactive=False)
with gr.Row():
gr.Textbox(label="Windows Support", value='Yes' if features_list['windows_capable'] else 'No', interactive=False)
gr.Textbox(label="Linux Support", value='Yes' if features_list['linux_capable'] else 'No', interactive=False)
gr.Textbox(label="Mac Support", value='Yes' if features_list['mac_capable'] else 'No', interactive=False)
with gr.Row():
with gr.Accordion("HELP - 🔊 Understanding TTS Engine Capabilities", open=False):
with gr.Row():
gr.Markdown(AllTalkHelpContent.ENGINE_INFORMATION, elem_classes="custom-markdown")
with gr.Row():
gr.Markdown(AllTalkHelpContent.ENGINE_INFORMATION1, elem_classes="custom-markdown")
gr.Markdown(AllTalkHelpContent.ENGINE_INFORMATION2, elem_classes="custom-markdown")
with gr.Tab("Models/Voices Download"):
with gr.Row():
# Load the available models from the JSON file
with open(os.path.join(this_dir, "available_models.json"), "r") as f:
available_models = json.load(f)
# Extract the model names for the dropdown
model_names = [model["model_name"] for model in available_models["models"]]
# Create the dropdown
model_dropdown = gr.Dropdown(choices=sorted(model_names), label="Select Model", value=model_names[0])
download_button = gr.Button("Download Model/Missing Files")
with gr.Row():
download_status = gr.Textbox(label="Download Status")
def download_model(model_name):
# Find the selected model in the available models
selected_model = next(model for model in available_models["models"] if model["model_name"] == model_name)
# Get the folder path and files to download
folder_path = os.path.join(main_dir, "models", "xtts", selected_model["folder_path"])
files_to_download = selected_model["files_to_download"]
# Check if all files are already downloaded
all_files_exists = all(os.path.exists(os.path.join(folder_path, file)) for file in files_to_download)
if all_files_exists:
return "All files are already downloaded. No need to download again."
else:
# Create the folder if it doesn't exist
os.makedirs(folder_path, exist_ok=True)
# Download the missing files
for file, url in files_to_download.items():
file_path = os.path.join(folder_path, file)
if not os.path.exists(file_path):
print(f"Downloading {file}...")
response = requests.get(url, stream=True)
total_size_in_bytes = int(response.headers.get("content-length", 0))
block_size = 1024 # 1 Kibibyte
progress_bar = tqdm(total=total_size_in_bytes, unit="iB", unit_scale=True)
with open(file_path, "wb") as file:
for data in response.iter_content(block_size):
progress_bar.update(len(data))
file.write(data)
progress_bar.close()
return "Model downloaded successfully!"
download_button.click(download_model, inputs=model_dropdown, outputs=download_status)
def show_confirm_cancel(model_name):
all_files_exists = all(os.path.exists(os.path.join(main_dir, "models", "xtts", model["folder_path"], file)) for model in available_models["models"] if model["model_name"] == model_name for file in model["files_to_download"])
if all_files_exists:
return [gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)]
else:
return [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)]
def confirm_download(model_name):
download_model(model_name)
return [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)]
with gr.Row():
confirm_button = gr.Button("Download Anyway", visible=False)
cancel_button = gr.Button("Cancel", visible=False)
download_button.click(show_confirm_cancel, model_dropdown, [confirm_button, download_button, cancel_button])
confirm_button.click(confirm_download, model_dropdown, [confirm_button, download_button, cancel_button])
cancel_button.click(lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, [confirm_button, download_button, cancel_button])
###################################################################################################
# REQUIRED CHANGE #
# Add any engine specific help, bugs, issues, operating system specifc requirements/setup in here #
# Please use Markdown format, so gr.Markdown() with your markdown inside it. #
###################################################################################################
with gr.Tab("Engine Help"):
with gr.Row():
gr.Markdown(AllTalkHelpContent.HELP_PAGE, elem_classes="custom-markdown")
with gr.Row():
gr.Markdown(AllTalkHelpContent.HELP_PAGE1, elem_classes="custom-markdown")
gr.Markdown(AllTalkHelpContent.HELP_PAGE2, elem_classes="custom-markdown")
return app
################################
# REQUIRED CHANGE #
# Sets up the Gradio interface #
################################
# This function sets up the Gradio interface for your TTS engine's settings page.
# You need to change the name of the function calls to match the names you set in the functions above.
#
# Specifically, you need to update the following:
#
# 1. The name of the function `xxxx_at_gradio_settings_page` to match your engine's name.
# For example, if your engine is named "mytts", the function should be named `mytts_at_gradio_settings_page`.
#
# 2. The name of the function call `xxxx_model_alltalk_settings(model_config_data)`.
# This should match the name you gave to the function that sets up the engine-specific settings page in Gradio.
# If you named that function `mytts_model_alltalk_settings`, then the call should be:
# `mytts_model_alltalk_settings(model_config_data)`
#
# After making these changes, this function will create and return the Gradio app for your TTS engine's settings page.
def xtts_at_gradio_settings_page(model_config_data):
app = xtts_model_alltalk_settings(model_config_data)
return app