Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed voice change function call for multi NPC conversations #427

Merged
merged 1 commit into from
Nov 12, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 20 additions & 2 deletions src/output_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from src.tts.synthesization_options import SynthesizationOptions

class ChatManager:
@utils.time_it
def __init__(self, game: gameable, config: ConfigLoader, tts: ttsable, client: openai_client):
self.loglevel = 28
self.__game: gameable = game
Expand All @@ -37,6 +38,11 @@ def __init__(self, game: gameable, config: ConfigLoader, tts: ttsable, client: o
self.__end_of_sentence_chars = ['.', '?', '!', ':', ';', '。', '?', '!', ';', ':']
self.__end_of_sentence_chars = [unicodedata.normalize('NFKC', char) for char in self.__end_of_sentence_chars]

@property
def tts(self) -> ttsable:
return self.__tts

@utils.time_it
def generate_sentence(self, text: str, character_to_talk: Character, is_system_generated_sentence: bool = False) -> mantella_sentence:
"""Generates the audio for a text and returns the corresponding sentence

Expand All @@ -58,6 +64,7 @@ def generate_sentence(self, text: str, character_to_talk: Character, is_system_g
return mantella_sentence(character_to_talk, text, "", 0, True, error_text)
return mantella_sentence(character_to_talk, text, audio_file, self.get_audio_duration(audio_file), is_system_generated_sentence)

@utils.time_it
def num_tokens(self, content_to_measure: message | str | message_thread | list[message]) -> int:
"""Measures the length of an input in tokens

Expand All @@ -72,6 +79,7 @@ def num_tokens(self, content_to_measure: message | str | message_thread | list[m
else:
return openai_client.num_tokens_from_message(content_to_measure, None)

@utils.time_it
def generate_response(self, messages: message_thread, characters: Characters, blocking_queue: sentence_queue, actions: list[action]):
"""Starts generating responses by the LLM for the current state of the input messages

Expand All @@ -87,6 +95,7 @@ def generate_response(self, messages: message_thread, characters: Characters, bl

asyncio.run(self.process_response(characters.last_added_character, blocking_queue, messages, characters, actions))

@utils.time_it
def stop_generation(self):
"""Stops the current generation and only returns once this stop has been successful
"""
Expand All @@ -99,6 +108,7 @@ def stop_generation(self):
self.__stop_generation = False
return

@utils.time_it
def get_audio_duration(self, audio_file: str):
"""Check if the external software has finished playing the audio file"""

Expand All @@ -110,6 +120,7 @@ def get_audio_duration(self, audio_file: str):
duration = frames / float(rate) + self.__config.wait_time_buffer
return duration

@utils.time_it
def clean_sentence(self, sentence: str) -> str:
def remove_as_a(sentence: str) -> str:
"""Remove 'As an XYZ,' from beginning of sentence"""
Expand Down Expand Up @@ -160,19 +171,22 @@ def parse_asterisks_brackets(sentence: str) -> str:
sentence = sentence.strip() + " "
return sentence

@utils.time_it
def __matching_action_keyword(self, keyword: str, actions: list[action]) -> action | None:
for a in actions:
if keyword.lower() == a.keyword.lower():
return a
return None

@utils.time_it
def __character_switched_to(self, extracted_keyword: str, charaters_in_conversation: Characters) -> Character | None:
for actor in charaters_in_conversation.get_all_characters():
actor_name = actor.name.lower()
if actor_name.startswith(extracted_keyword.lower()):
return actor
return None

@utils.time_it
async def process_response(self, active_character: Character, blocking_queue: sentence_queue, messages : message_thread, characters: Characters, actions: list[action]):
"""Stream response from LLM one sentence at a time"""

Expand All @@ -186,6 +200,7 @@ async def process_response(self, active_character: Character, blocking_queue: se
cumulative_sentence_bool = False
current_sentence: str = ""
actions_in_sentence: list[action] = []
first_token = True
while True:
try:
start_time = time.time()
Expand All @@ -194,6 +209,10 @@ async def process_response(self, active_character: Character, blocking_queue: se
break
if not content:
continue

if first_token:
logging.log(self.loglevel, f"LLM took {round(time.time() - start_time, 5)} seconds to respond")
first_token = False

sentence += content
# Check for the last occurrence of sentence-ending punctuation within first 150 chars
Expand Down Expand Up @@ -242,7 +261,7 @@ async def process_response(self, active_character: Character, blocking_queue: se
logging.log(28, f"Switched to {character_switched_to.name}")
active_character = character_switched_to
full_reply += f"{keyword_extraction}: "
self.__tts.change_voice(active_character.tts_voice_model, voice_accent=active_character.voice_accent)
self.__tts.change_voice(active_character.tts_voice_model, active_character.in_game_voice_model, active_character.csv_in_game_voice_model, active_character.advanced_voice_model, voice_accent=active_character.voice_accent)
else:
action_to_take: action | None = self.__matching_action_keyword(keyword_extraction, actions)
if action_to_take:
Expand All @@ -266,7 +285,6 @@ async def process_response(self, active_character: Character, blocking_queue: se
logging.log(28, f'Skipping voiceline that is too short: {sentence}')
break

logging.log(self.loglevel, f"LLM returned sentence took {time.time() - start_time} seconds to execute")
# Generate the audio and return the audio file path
# Put the audio file path in the sentence_queue

Expand Down