Skip to content

Commit

Permalink
audito transcription tool
Browse files Browse the repository at this point in the history
  • Loading branch information
fatihozturkh2o committed Sep 19, 2024
1 parent 77bf097 commit 7ff6764
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 5 deletions.
29 changes: 25 additions & 4 deletions openai_server/agent_prompting.py
Original file line number Diff line number Diff line change
Expand Up @@ -514,9 +514,10 @@ def get_image_generation_helper():
* For an image generation, you are recommended to use the existing pre-built python code, E.g.:
```sh
# filename: my_image_generation.sh
# execution: true
python {cwd}/openai_server/agent_tools/image_generation.py --prompt "PROMPT" --file_name "image.png"
```
* usage: python {cwd}/openai_server/agent_tools/image_generation.py [-h] --prompt PROMPT --file_name FILE
* usage: python {cwd}/openai_server/agent_tools/image_generation.py [-h] --prompt PROMPT --file_name FILE_NAME
* If you make an image, ensure you use python or shell code properly to generate the image file.
* By default the image will be saved in the base directory: {base_path}, you can read the image file from there.
"""
Expand All @@ -527,6 +528,26 @@ def get_image_generation_helper():
)
return image_generation

def get_audio_transcription_helper():
stt_url = os.getenv("STT_OPENAI_BASE_URL", None)
if stt_url:
cwd = os.path.abspath(os.getcwd())
audio_transcription = f"""\n* Audio transcription using python. Use for transcribing audio files to text.
* For an audio transcription, you are recommended to use the existing pre-built python code, E.g.:
```sh
# filename: my_audio_transcription.sh
# execution: true
python {cwd}/openai_server/agent_tools/audio_transcription.py --file_path "./audio.wav"
```
* usage: python {cwd}/openai_server/agent_tools/audio_transcription.py [-h] --file_path FILE_PATH
* If you make an audio transcription, ensure you use python or shell code properly to generate the text file.
"""
else:
audio_transcription = (
"There is no available audio transcription tool, so you can not transcribe audio. "
)
return audio_transcription

def get_full_system_prompt(agent_code_writer_system_message, agent_system_site_packages, system_prompt, base_url,
api_key, model, text_context_list, image_file, temp_dir, query):
agent_code_writer_system_message = agent_system_prompt(agent_code_writer_system_message,
Expand All @@ -535,7 +556,7 @@ def get_full_system_prompt(agent_code_writer_system_message, agent_system_site_p
image_query_helper = get_image_query_helper(base_url, api_key, model)
mermaid_renderer_helper = get_mermaid_renderer_helper()
image_generation_helper = get_image_generation_helper()
print("image_generation_helper: ", image_generation_helper)
audio_transcription_helper = get_audio_transcription_helper()

chat_doc_query, internal_file_names = get_chat_doc_context(text_context_list, image_file,
temp_dir,
Expand All @@ -552,6 +573,6 @@ def get_full_system_prompt(agent_code_writer_system_message, agent_system_site_p

agent_tools_note = f"\nDo not hallucinate agent_tools tools. The only files in the {path_agent_tools} directory are as follows: {list_dir}\n"

system_message = agent_code_writer_system_message + image_query_helper + mermaid_renderer_helper + image_generation_helper + agent_tools_note + chat_doc_query
# TODO: Also return image_generation_helper ?
system_message = agent_code_writer_system_message + image_query_helper + mermaid_renderer_helper + image_generation_helper + audio_transcription_helper + agent_tools_note + chat_doc_query
# TODO: Also return image_generation_helper and audio_transcription_helper ?
return system_message, internal_file_names, chat_doc_query, image_query_helper, mermaid_renderer_helper
28 changes: 28 additions & 0 deletions openai_server/agent_tools/audio_transcription.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import os
import argparse

from openai import OpenAI

def main():
parser = argparse.ArgumentParser(description="Get transcription of an audio file")
parser.add_argument("--model", type=str, default="whisper-1", help="Model name")
# File name
parser.add_argument("--file_path", type=str, required=True, help="Path to the audio file")
args = parser.parse_args()
##
stt_url = os.getenv("STT_OPENAI_BASE_URL", None)
assert stt_url is not None, "STT_OPENAI_BASE_URL environment variable is not set"
stt_api_key = os.getenv('STT_OPENAI_API_KEY', 'EMPTY')

# Read the audio file
audio_file = open(args.file_path, "rb")
client = OpenAI(base_url=stt_url, api_key=stt_api_key)
transcription = client.audio.transcriptions.create(
model=args.model,
file=audio_file
)
print(f"Audio file successfully transcribed: '{transcription.text}'")


if __name__ == "__main__":
main()
4 changes: 3 additions & 1 deletion openai_server/autogen_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,9 @@ def output_guardrail(ret: CommandLineCodeResult) -> CommandLineCodeResult:
'SLACK_API_TOKEN', 'MISTRAL_API_KEY', 'TOGETHERAI_API_TOKEN', 'GITHUB_TOKEN', 'SECRET_KEY',
'GOOGLE_API_KEY', 'REPLICATE_API_TOKEN', 'GOOGLE_CLIENT_SECRET', 'GROQ_API_KEY',
'AWS_SERVER_SECRET_KEY', 'H2OGPT_OPENAI_BASE_URL', 'H2OGPT_OPENAI_API_KEY',
'H2OGPT_MAIN_KWARGS', 'GRADIO_H2OGPT_H2OGPT_KEY', 'IMAGEGEN_OPENAI_BASE_URL', 'IMAGEGEN_OPENAI_API_KEY']
'H2OGPT_MAIN_KWARGS', 'GRADIO_H2OGPT_H2OGPT_KEY', 'IMAGEGEN_OPENAI_BASE_URL', 'IMAGEGEN_OPENAI_API_KEY',
'STT_OPENAI_BASE_URL', 'STT_OPENAI_API_KEY'
]

# Get the values of these environment variables
set_api_key_names = set(api_key_names)
Expand Down

0 comments on commit 7ff6764

Please sign in to comment.