art-from-the-machine · art-from-the-machine · Feb 13, 2025 · Feb 12, 2025 · Feb 12, 2025 · Feb 12, 2025
diff --git a/src/config/config_loader.py b/src/config/config_loader.py
@@ -196,6 +196,8 @@ def __update_config_values_from_current_state(self):
                 self.xtts_server_path = ""
 
             self.lip_generation = self.__definitions.get_string_value("lip_generation").strip().lower()
+            self.fast_response_mode = self.__definitions.get_bool_value("fast_response_mode")
+            self.fast_response_mode_volume = self.__definitions.get_int_value("fast_response_mode_volume")
 
             #Added from xTTS implementation
             self.xtts_default_model = self.__definitions.get_string_value("xtts_default_model")

diff --git a/src/config/definitions/tts_definitions.py b/src/config/definitions/tts_definitions.py
@@ -65,6 +65,18 @@ def get_lip_generation_config_value() -> ConfigValue:
                         Set to 'Lazy' to skip lip syncing only for the first sentence spoken of every response."""
         return ConfigValueSelection("lip_generation","Lip File Generation",description,"Enabled",["Enabled","Lazy","Disabled"],tags=[ConfigValueTag.advanced])
 
+    @staticmethod
+    def get_fast_response_mode_config_value() -> ConfigValue:
+        description = """Whether to play the first voiceline of every response directly from the Mantella exe instead of in-game (Skyrim only).
+                        Enable this setting to improve response times.
+                        Disable this setting to play all voicelines natively in-game."""
+        return ConfigValueBool("fast_response_mode","Fast Response Mode", description, False, tags=[ConfigValueTag.advanced,ConfigValueTag.share_row])
+
+    @staticmethod
+    def get_fast_response_mode_volume_config_value() -> ConfigValue:
+        description = """Adjust the volume of the first delivered voiceline (from 1-100) if Fast Response Mode is enabled."""
+        return ConfigValueInt("fast_response_mode_volume","Fast Response Mode Volume", description, 40, 1, 100, tags=[ConfigValueTag.advanced,ConfigValueTag.share_row])
+
     # XTTS Section
 
     @staticmethod

diff --git a/src/config/mantella_config_value_definitions_new.py b/src/config/mantella_config_value_definitions_new.py
@@ -57,8 +57,10 @@ def get_config_values(is_integrated: bool, actions: list[action], on_value_chang
         tts_category.add_config_value(TTSDefinitions.get_piper_folder_config_value(is_integrated))
         tts_category.add_config_value(TTSDefinitions.get_lipgen_folder_config_value(is_integrated))
         tts_category.add_config_value(TTSDefinitions.get_facefx_folder_config_value(is_integrated))
-        tts_category.add_config_value(TTSDefinitions.get_lip_generation_config_value())
         tts_category.add_config_value(TTSDefinitions.get_number_words_tts_config_value())
+        tts_category.add_config_value(TTSDefinitions.get_lip_generation_config_value())
+        tts_category.add_config_value(TTSDefinitions.get_fast_response_mode_config_value())
+        tts_category.add_config_value(TTSDefinitions.get_fast_response_mode_volume_config_value())
         tts_category.add_config_value(TTSDefinitions.get_xtts_url_config_value())
         tts_category.add_config_value(TTSDefinitions.get_xtts_default_model_config_value())
         tts_category.add_config_value(TTSDefinitions.get_xtts_device_config_value())

diff --git a/src/game_manager.py b/src/game_manager.py
@@ -41,6 +41,7 @@ def __init__(self, game: gameable, chat_manager: ChatManager, config: ConfigLoad
         self.__stt_api_file: str = stt_api_file
         self.__api_file: str = api_file
         self.__stt: Transcriber | None = None
+        self.__first_line: bool = True
 
     ###### react to calls from the game #######
     @utils.time_it
@@ -95,15 +96,17 @@ def continue_conversation(self, input_json: dict[str, Any]) -> dict[str, Any]:
             if replyType == comm_consts.KEY_REQUESTTYPE_TTS:
                 # if player input is detected mid-response, immediately process the player input
                 reply = self.player_input({"mantella_context": {}, "mantella_player_input": "", "mantella_request_type": "mantella_player_input"})
+                self.__first_line = False # since the NPC is already speaking in-game, setting this to True would just cause two voicelines to play at once
                 continue # continue conversation with new player input (ie call self.__talk.continue_conversation() again)
             else:
                 reply: dict[str, Any] = {comm_consts.KEY_REPLYTYPE: replyType}
                 break
 
         if sentence_to_play:
             if not sentence_to_play.error_message:
-                self.__game.prepare_sentence_for_game(sentence_to_play, self.__talk.context, self.__config, topicInfoID)            
+                self.__game.prepare_sentence_for_game(sentence_to_play, self.__talk.context, self.__config, topicInfoID, self.__first_line)            
                 reply[comm_consts.KEY_REPLYTYPE_NPCTALK] = self.sentence_to_json(sentence_to_play, topicInfoID)
+                self.__first_line = False
             else:
                 self.__talk.end()
                 return self.error_message(sentence_to_play.error_message)
@@ -114,6 +117,8 @@ def player_input(self, input_json: dict[str, Any]) -> dict[str, Any]:
         if(not self.__talk ):
             return self.error_message("No running conversation.")
 
+        self.__first_line = True
+
         player_text: str = input_json.get(comm_consts.KEY_REQUESTTYPE_PLAYERINPUT, '')
         self.__update_context(input_json)
         updated_player_text, update_events, player_spoken_sentence = self.__talk.process_player_input(player_text)
@@ -136,7 +141,8 @@ def player_input(self, input_json: dict[str, Any]) -> dict[str, Any]:
         # if the player response is not an action command, return a regular player reply type
         if player_spoken_sentence:
             topicInfoID: int = int(input_json.get(comm_consts.KEY_CONTINUECONVERSATION_TOPICINFOFILE,1))
-            self.__game.prepare_sentence_for_game(player_spoken_sentence, self.__talk.context, self.__config, topicInfoID)
+            self.__game.prepare_sentence_for_game(player_spoken_sentence, self.__talk.context, self.__config, topicInfoID, self.__first_line)
+            self.__first_line = False
             return {comm_consts.KEY_REPLYTYPE: comm_consts.KEY_REPLYTYPE_NPCTALK, comm_consts.KEY_REPLYTYPE_NPCTALK: self.sentence_to_json(player_spoken_sentence, topicInfoID)}
         else:
             return {comm_consts.KEY_REPLYTYPE: comm_consts.KEY_REPLYTYPE_NPCTALK}

diff --git a/src/games/fallout4.py b/src/games/fallout4.py
@@ -187,7 +187,7 @@ def load_unnamed_npc(self, name: str, actor_race: str, actor_sex: int, ingame_vo
         return character_info
 
     @utils.time_it
-    def prepare_sentence_for_game(self, queue_output: sentence, context_of_conversation: context, config: ConfigLoader, topicID: int):
+    def prepare_sentence_for_game(self, queue_output: sentence, context_of_conversation: context, config: ConfigLoader, topicID: int, isFirstVoiceLine: bool):
         audio_file = queue_output.voice_file
         fuz_file = audio_file.replace(".wav",".fuz")
         speaker = queue_output.speaker

diff --git a/src/games/gameable.py b/src/games/gameable.py
@@ -11,6 +11,10 @@
 from src.llm.sentence import sentence
 from src.games.external_character_info import external_character_info
 import src.utils as utils
+import sounddevice as sd
+import soundfile as sf
+import threading
+import wave
 
 class gameable(ABC):
     """Abstract class for different implementations of games to support. 
@@ -98,13 +102,15 @@ def load_external_character_info(self, base_id: str, name: str, race: str, gende
         pass    
 
     @abstractmethod
-    def prepare_sentence_for_game(self, queue_output: sentence, context_of_conversation: context, config: ConfigLoader, topicID: int):
+    def prepare_sentence_for_game(self, queue_output: sentence, context_of_conversation: context, config: ConfigLoader, topicID: int, isFirstLine: bool):
         """Does what ever is needed to play a sentence ingame
 
         Args:
             queue_output (sentence): the sentence to play
             context_of_conversation (context): the context of the conversation
             config (ConfigLoader): the current config
+            topicID (int): the Mantella dialogue line to write to
+            isFirstLine (bool): whether this is the first voiceline of a given response
         """
         pass
 
@@ -291,3 +297,45 @@ def get_string_from_df(iloc, column_name: str) -> str:
         if pd.isna(entry): entry = ""
         elif not isinstance(entry, str): entry = str(entry)
         return entry        
+
+    @staticmethod
+    @utils.time_it
+    def play_audio_async(filename: str, volume: float = 0.5):
+        """
+        Play audio file asynchronously with volume control
+
+        Args:
+            filename (str): Path to audio file
+            volume (float): Volume multiplier (0.0 to 1.0)
+        """
+        def audio_thread():
+            data, samplerate = sf.read(filename)
+            data = data * volume
+            sd.play(data, samplerate)
+
+        thread = threading.Thread(target=audio_thread)
+        thread.start()
+
+    @staticmethod
+    @utils.time_it
+    def send_muted_voiceline_to_game_folder(audio_file: str, filename: str, voice_folder_path: str):
+        """
+        Save muted voiceline to game folder, keeping the audio duration of the original file
+
+        Args:
+            audio_file (str): Path to the audio file
+            filename (str): Name of the audio file to save in the game folder
+            voice_folder_path (str): Game path to save the muted audio file to
+        """
+        # Create a muted version of the wav file
+        with wave.open(audio_file, 'rb') as wav_file:
+            params = wav_file.getparams()
+            frames = wav_file.readframes(wav_file.getnframes())
+
+        # Create muted frames (all zeros) with same length as original
+        muted_frames = b'\x00' * len(frames)
+
+        # Save muted wav file to game folder
+        with wave.open(os.path.join(voice_folder_path, f"{filename}.wav"), 'wb') as muted_wav:
+            muted_wav.setparams(params)
+            muted_wav.writeframes(muted_frames)
diff --git a/src/games/skyrim.py b/src/games/skyrim.py
@@ -156,21 +156,28 @@ def load_unnamed_npc(self, name: str, actor_race: str, actor_sex: int, ingame_vo
         return character_info
 
     @utils.time_it
-    def prepare_sentence_for_game(self, queue_output: sentence, context_of_conversation: context, config: ConfigLoader, topicID: int):
+    def prepare_sentence_for_game(self, queue_output: sentence, context_of_conversation: context, config: ConfigLoader, topicID: int, isFirstLine: bool = False):
         """Save voicelines and subtitles to the correct game folders"""
 
         audio_file = queue_output.voice_file
         if not os.path.exists(audio_file):
             return
+
         mod_folder = config.mod_path
-        # subtitle = queue_output.sentence
         speaker: Character = queue_output.speaker
         voice_folder_path = os.path.join(mod_folder,"MantellaVoice00")
         os.makedirs(voice_folder_path, exist_ok=True)
+
         filename = self.DIALOGUELINE1_FILENAME
         if topicID == 2:
             filename = self.DIALOGUELINE2_FILENAME
-        shutil.copyfile(audio_file, os.path.join(voice_folder_path, f"{filename}.wav"))
+
+        if config.fast_response_mode and isFirstLine:
+            self.play_audio_async(audio_file, volume=config.fast_response_mode_volume/100)
+            self.send_muted_voiceline_to_game_folder(audio_file, filename, voice_folder_path)
+        else:
+            shutil.copyfile(audio_file, os.path.join(voice_folder_path, f"{filename}.wav"))    
+
         try:
             shutil.copyfile(audio_file.replace(".wav", ".lip"), os.path.join(voice_folder_path, f"{filename}.lip"))
         except Exception as e: