Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option to play first voiceline immediately #515

Merged
2 changes: 2 additions & 0 deletions src/config/config_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,8 @@ def __update_config_values_from_current_state(self):
self.xtts_server_path = ""

self.lip_generation = self.__definitions.get_string_value("lip_generation").strip().lower()
self.fast_response_mode = self.__definitions.get_bool_value("fast_response_mode")
self.fast_response_mode_volume = self.__definitions.get_int_value("fast_response_mode_volume")

#Added from xTTS implementation
self.xtts_default_model = self.__definitions.get_string_value("xtts_default_model")
Expand Down
12 changes: 12 additions & 0 deletions src/config/definitions/tts_definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,18 @@ def get_lip_generation_config_value() -> ConfigValue:
Set to 'Lazy' to skip lip syncing only for the first sentence spoken of every response."""
return ConfigValueSelection("lip_generation","Lip File Generation",description,"Enabled",["Enabled","Lazy","Disabled"],tags=[ConfigValueTag.advanced])

@staticmethod
def get_fast_response_mode_config_value() -> ConfigValue:
description = """Whether to play the first voiceline of every response directly from the Mantella exe instead of in-game (Skyrim only).
Enable this setting to improve response times.
Disable this setting to play all voicelines natively in-game."""
return ConfigValueBool("fast_response_mode","Fast Response Mode", description, False, tags=[ConfigValueTag.advanced,ConfigValueTag.share_row])

@staticmethod
def get_fast_response_mode_volume_config_value() -> ConfigValue:
description = """Adjust the volume of the first delivered voiceline (from 1-100) if Fast Response Mode is enabled."""
return ConfigValueInt("fast_response_mode_volume","Fast Response Mode Volume", description, 40, 1, 100, tags=[ConfigValueTag.advanced,ConfigValueTag.share_row])

# XTTS Section

@staticmethod
Expand Down
4 changes: 3 additions & 1 deletion src/config/mantella_config_value_definitions_new.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,10 @@ def get_config_values(is_integrated: bool, actions: list[action], on_value_chang
tts_category.add_config_value(TTSDefinitions.get_piper_folder_config_value(is_integrated))
tts_category.add_config_value(TTSDefinitions.get_lipgen_folder_config_value(is_integrated))
tts_category.add_config_value(TTSDefinitions.get_facefx_folder_config_value(is_integrated))
tts_category.add_config_value(TTSDefinitions.get_lip_generation_config_value())
tts_category.add_config_value(TTSDefinitions.get_number_words_tts_config_value())
tts_category.add_config_value(TTSDefinitions.get_lip_generation_config_value())
tts_category.add_config_value(TTSDefinitions.get_fast_response_mode_config_value())
tts_category.add_config_value(TTSDefinitions.get_fast_response_mode_volume_config_value())
tts_category.add_config_value(TTSDefinitions.get_xtts_url_config_value())
tts_category.add_config_value(TTSDefinitions.get_xtts_default_model_config_value())
tts_category.add_config_value(TTSDefinitions.get_xtts_device_config_value())
Expand Down
10 changes: 8 additions & 2 deletions src/game_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def __init__(self, game: gameable, chat_manager: ChatManager, config: ConfigLoad
self.__stt_api_file: str = stt_api_file
self.__api_file: str = api_file
self.__stt: Transcriber | None = None
self.__first_line: bool = True

###### react to calls from the game #######
@utils.time_it
Expand Down Expand Up @@ -95,15 +96,17 @@ def continue_conversation(self, input_json: dict[str, Any]) -> dict[str, Any]:
if replyType == comm_consts.KEY_REQUESTTYPE_TTS:
# if player input is detected mid-response, immediately process the player input
reply = self.player_input({"mantella_context": {}, "mantella_player_input": "", "mantella_request_type": "mantella_player_input"})
self.__first_line = False # since the NPC is already speaking in-game, setting this to True would just cause two voicelines to play at once
continue # continue conversation with new player input (ie call self.__talk.continue_conversation() again)
else:
reply: dict[str, Any] = {comm_consts.KEY_REPLYTYPE: replyType}
break

if sentence_to_play:
if not sentence_to_play.error_message:
self.__game.prepare_sentence_for_game(sentence_to_play, self.__talk.context, self.__config, topicInfoID)
self.__game.prepare_sentence_for_game(sentence_to_play, self.__talk.context, self.__config, topicInfoID, self.__first_line)
reply[comm_consts.KEY_REPLYTYPE_NPCTALK] = self.sentence_to_json(sentence_to_play, topicInfoID)
self.__first_line = False
else:
self.__talk.end()
return self.error_message(sentence_to_play.error_message)
Expand All @@ -114,6 +117,8 @@ def player_input(self, input_json: dict[str, Any]) -> dict[str, Any]:
if(not self.__talk ):
return self.error_message("No running conversation.")

self.__first_line = True

player_text: str = input_json.get(comm_consts.KEY_REQUESTTYPE_PLAYERINPUT, '')
self.__update_context(input_json)
updated_player_text, update_events, player_spoken_sentence = self.__talk.process_player_input(player_text)
Expand All @@ -136,7 +141,8 @@ def player_input(self, input_json: dict[str, Any]) -> dict[str, Any]:
# if the player response is not an action command, return a regular player reply type
if player_spoken_sentence:
topicInfoID: int = int(input_json.get(comm_consts.KEY_CONTINUECONVERSATION_TOPICINFOFILE,1))
self.__game.prepare_sentence_for_game(player_spoken_sentence, self.__talk.context, self.__config, topicInfoID)
self.__game.prepare_sentence_for_game(player_spoken_sentence, self.__talk.context, self.__config, topicInfoID, self.__first_line)
self.__first_line = False
return {comm_consts.KEY_REPLYTYPE: comm_consts.KEY_REPLYTYPE_NPCTALK, comm_consts.KEY_REPLYTYPE_NPCTALK: self.sentence_to_json(player_spoken_sentence, topicInfoID)}
else:
return {comm_consts.KEY_REPLYTYPE: comm_consts.KEY_REPLYTYPE_NPCTALK}
Expand Down
2 changes: 1 addition & 1 deletion src/games/fallout4.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def load_unnamed_npc(self, name: str, actor_race: str, actor_sex: int, ingame_vo
return character_info

@utils.time_it
def prepare_sentence_for_game(self, queue_output: sentence, context_of_conversation: context, config: ConfigLoader, topicID: int):
def prepare_sentence_for_game(self, queue_output: sentence, context_of_conversation: context, config: ConfigLoader, topicID: int, isFirstVoiceLine: bool):
audio_file = queue_output.voice_file
fuz_file = audio_file.replace(".wav",".fuz")
speaker = queue_output.speaker
Expand Down
50 changes: 49 additions & 1 deletion src/games/gameable.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
from src.llm.sentence import sentence
from src.games.external_character_info import external_character_info
import src.utils as utils
import sounddevice as sd
import soundfile as sf
import threading
import wave

class gameable(ABC):
"""Abstract class for different implementations of games to support.
Expand Down Expand Up @@ -98,13 +102,15 @@ def load_external_character_info(self, base_id: str, name: str, race: str, gende
pass

@abstractmethod
def prepare_sentence_for_game(self, queue_output: sentence, context_of_conversation: context, config: ConfigLoader, topicID: int):
def prepare_sentence_for_game(self, queue_output: sentence, context_of_conversation: context, config: ConfigLoader, topicID: int, isFirstLine: bool):
"""Does what ever is needed to play a sentence ingame

Args:
queue_output (sentence): the sentence to play
context_of_conversation (context): the context of the conversation
config (ConfigLoader): the current config
topicID (int): the Mantella dialogue line to write to
isFirstLine (bool): whether this is the first voiceline of a given response
"""
pass

Expand Down Expand Up @@ -291,3 +297,45 @@ def get_string_from_df(iloc, column_name: str) -> str:
if pd.isna(entry): entry = ""
elif not isinstance(entry, str): entry = str(entry)
return entry

@staticmethod
@utils.time_it
def play_audio_async(filename: str, volume: float = 0.5):
"""
Play audio file asynchronously with volume control

Args:
filename (str): Path to audio file
volume (float): Volume multiplier (0.0 to 1.0)
"""
def audio_thread():
data, samplerate = sf.read(filename)
data = data * volume
sd.play(data, samplerate)

thread = threading.Thread(target=audio_thread)
thread.start()

@staticmethod
@utils.time_it
def send_muted_voiceline_to_game_folder(audio_file: str, filename: str, voice_folder_path: str):
"""
Save muted voiceline to game folder, keeping the audio duration of the original file

Args:
audio_file (str): Path to the audio file
filename (str): Name of the audio file to save in the game folder
voice_folder_path (str): Game path to save the muted audio file to
"""
# Create a muted version of the wav file
with wave.open(audio_file, 'rb') as wav_file:
params = wav_file.getparams()
frames = wav_file.readframes(wav_file.getnframes())

# Create muted frames (all zeros) with same length as original
muted_frames = b'\x00' * len(frames)

# Save muted wav file to game folder
with wave.open(os.path.join(voice_folder_path, f"{filename}.wav"), 'wb') as muted_wav:
muted_wav.setparams(params)
muted_wav.writeframes(muted_frames)
13 changes: 10 additions & 3 deletions src/games/skyrim.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,21 +156,28 @@ def load_unnamed_npc(self, name: str, actor_race: str, actor_sex: int, ingame_vo
return character_info

@utils.time_it
def prepare_sentence_for_game(self, queue_output: sentence, context_of_conversation: context, config: ConfigLoader, topicID: int):
def prepare_sentence_for_game(self, queue_output: sentence, context_of_conversation: context, config: ConfigLoader, topicID: int, isFirstLine: bool = False):
"""Save voicelines and subtitles to the correct game folders"""

audio_file = queue_output.voice_file
if not os.path.exists(audio_file):
return

mod_folder = config.mod_path
# subtitle = queue_output.sentence
speaker: Character = queue_output.speaker
voice_folder_path = os.path.join(mod_folder,"MantellaVoice00")
os.makedirs(voice_folder_path, exist_ok=True)

filename = self.DIALOGUELINE1_FILENAME
if topicID == 2:
filename = self.DIALOGUELINE2_FILENAME
shutil.copyfile(audio_file, os.path.join(voice_folder_path, f"{filename}.wav"))

if config.fast_response_mode and isFirstLine:
self.play_audio_async(audio_file, volume=config.fast_response_mode_volume/100)
self.send_muted_voiceline_to_game_folder(audio_file, filename, voice_folder_path)
else:
shutil.copyfile(audio_file, os.path.join(voice_folder_path, f"{filename}.wav"))

try:
shutil.copyfile(audio_file.replace(".wav", ".lip"), os.path.join(voice_folder_path, f"{filename}.lip"))
except Exception as e:
Expand Down