From 7e8ad6ac9d54124bb5ffccb5d0dbb36b5324c657 Mon Sep 17 00:00:00 2001 From: "Jonathan C. McKinney" Date: Thu, 19 Sep 2024 13:53:36 -0700 Subject: [PATCH] Fix markdown geneated by LLM if didn't follow rule that should only have base name in markdown --- openai_server/agent_prompting.py | 1 - .../agent_tools/audio_transcription.py | 4 +- openai_server/agent_tools/image_generation.py | 4 +- openai_server/agent_utils.py | 53 +++++++++++++++++++ src/version.py | 2 +- 5 files changed, 58 insertions(+), 6 deletions(-) diff --git a/openai_server/agent_prompting.py b/openai_server/agent_prompting.py index 6fe6b3351..ff62a6b1e 100644 --- a/openai_server/agent_prompting.py +++ b/openai_server/agent_prompting.py @@ -451,7 +451,6 @@ def get_image_query_helper(base_url, api_key, model): model_list = client.models.list() image_models = [x.id for x in model_list if x.model_extra['actually_image']] we_are_vision_model = len([x for x in model_list if x.id == model]) > 0 - image_query_helper = '' if we_are_vision_model: vision_model = model elif not we_are_vision_model and len(image_models) > 0: diff --git a/openai_server/agent_tools/audio_transcription.py b/openai_server/agent_tools/audio_transcription.py index c3bcef730..14944f988 100644 --- a/openai_server/agent_tools/audio_transcription.py +++ b/openai_server/agent_tools/audio_transcription.py @@ -10,7 +10,7 @@ def main(): # Model parser.add_argument("--model", type=str, required=False, help="Model name") # File name - parser.add_argument("--output", type=str, required=False, help="Path (ensure unique) to the audio file") + parser.add_argument("--output", type=str, default='', required=False, help="Path (ensure unique) to the audio file") args = parser.parse_args() ## stt_url = os.getenv("STT_OPENAI_BASE_URL", None) @@ -31,7 +31,7 @@ def main(): ) # Save the image to a file if not args.output: - args.output = f"transcription_{uuid.uuid4()}.txt" + args.output = f"transcription_{uuid.uuid4()[:6]}.txt" # Write the transcription to a file with open(args.output, "wt") as txt_file: txt_file.write(transcription.text) diff --git a/openai_server/agent_tools/image_generation.py b/openai_server/agent_tools/image_generation.py index 153ef9567..2ad33b6b9 100644 --- a/openai_server/agent_tools/image_generation.py +++ b/openai_server/agent_tools/image_generation.py @@ -9,7 +9,7 @@ def main(): parser = argparse.ArgumentParser(description="Generate images from text prompts") parser.add_argument("--prompt", type=str, required=True, help="User prompt") parser.add_argument("--model", type=str, required=False, help="Model name") - parser.add_argument("--output", type=str, required=False, default="output.jpg", help="Name of the output file") + parser.add_argument("--output", type=str, required=False, default="", help="Name (unique) of the output file") parser.add_argument("--quality", type=str, required=False, choices=['standard', 'hd', 'quick', 'manual'], default='standard', help="Image quality") parser.add_argument("--size", type=str, required=False, default="1024x1024", help="Image size (height x width)") @@ -120,7 +120,7 @@ def main(): # Save the image to a file if not args.output: - args.output = f"transcription_{uuid.uuid4()}.txt" + args.output = f"image_{uuid.uuid4()[:6]}.txt" # Write the image data to a file with open(args.output, "wb") as img_file: diff --git a/openai_server/agent_utils.py b/openai_server/agent_utils.py index 1d97ee2ac..50ddabd8c 100644 --- a/openai_server/agent_utils.py +++ b/openai_server/agent_utils.py @@ -1,6 +1,7 @@ import functools import inspect import os +import re import shutil import sys import time @@ -139,6 +140,53 @@ def set_dummy_term(): plt.ioff() +def fix_markdown_image_paths(text): + def replace_path(match): + alt_text = match.group(1) + full_path = match.group(2) + base_name = os.path.basename(full_path) + return f"![{alt_text}]({base_name})" + + # Pattern for inline images: ![alt text](path/to/image.jpg) + inline_pattern = r'!\[(.*?)\]\s*\((.*?)\)' + text = re.sub(inline_pattern, replace_path, text) + + # Pattern for reference-style images: ![alt text][ref] + ref_pattern = r'!\[(.*?)\]\s*\[(.*?)\]' + + def collect_references(text): + ref_dict = {} + ref_def_pattern = r'^\s*\[(.*?)\]:\s*(.*?)$' + for match in re.finditer(ref_def_pattern, text, re.MULTILINE): + ref_dict[match.group(1)] = match.group(2) + return ref_dict + + ref_dict = collect_references(text) + + def replace_ref_image(match): + alt_text = match.group(1) + ref = match.group(2) + if ref in ref_dict: + full_path = ref_dict[ref] + base_name = os.path.basename(full_path) + ref_dict[ref] = base_name # Update reference + return f"![{alt_text}][{ref}]" + return match.group(0) # If reference not found, leave unchanged + + text = re.sub(ref_pattern, replace_ref_image, text) + + # Update reference definitions + def replace_ref_def(match): + ref = match.group(1) + if ref in ref_dict: + return f"[{ref}]: {ref_dict[ref]}" + return match.group(0) + + text = re.sub(r'^\s*\[(.*?)\]:\s*(.*?)$', replace_ref_def, text, flags=re.MULTILINE) + + return text + + def get_ret_dict_and_handle_files(chat_result, temp_dir, agent_verbose, internal_file_names, authorization, autogen_run_code_in_docker, autogen_stop_docker_executor, executor, agent_venv_dir, agent_code_writer_system_message, agent_system_site_packages, @@ -230,6 +278,11 @@ def get_ret_dict_and_handle_files(chat_result, temp_dir, agent_verbose, internal latest_image_file = image_files[-1] if image_files else None if latest_image_file: chat_result.summary += f'\n![image]({os.path.basename(latest_image_file)})' + else: + try: + chat_result.summary = fix_markdown_image_paths(chat_result.summary) + except: + print("Failed to fix markdown image paths", file=sys.stderr) ret_dict.update(dict(summary=chat_result.summary)) if agent_venv_dir is not None: diff --git a/src/version.py b/src/version.py index 5a961c6fb..fd61b0c09 100644 --- a/src/version.py +++ b/src/version.py @@ -1 +1 @@ -__version__ = "ba14e9a2ec86ead64ab3cf0d770ef2d3a5b545c9" +__version__ = "c457e918e1fdf8554355d877220b23f4af23a0b7"