From 66d96dd887c3a56ffcb2cf2cc82d685db9fdf857 Mon Sep 17 00:00:00 2001 From: Wael Karkoub Date: Tue, 26 Mar 2024 19:46:44 +0100 Subject: [PATCH] Parse Any HTML-esh Style Tags (#2046) * tried implementing my own regex * improves tests * finally works * removes prints * fixed test * adds start and end * delete unused imports * refactored to use new tool * significantly improved algo * tag content -> tag attr * fix tests + adds new field * return full match * return remove start and end * update docstrings * update docstrings * update docstrings --------- Co-authored-by: Beibin Li Co-authored-by: Chi Wang --- autogen/agentchat/contrib/img_utils.py | 17 ++-- autogen/agentchat/utils.py | 109 ++++++++++++++++++++++++- test/agentchat/test_agentchat_utils.py | 76 +++++++++++++++++ 3 files changed, 191 insertions(+), 11 deletions(-) create mode 100644 test/agentchat/test_agentchat_utils.py diff --git a/autogen/agentchat/contrib/img_utils.py b/autogen/agentchat/contrib/img_utils.py index 2d2592418747..a389c74b064d 100644 --- a/autogen/agentchat/contrib/img_utils.py +++ b/autogen/agentchat/contrib/img_utils.py @@ -1,14 +1,15 @@ import base64 import copy -import mimetypes import os import re from io import BytesIO -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Dict, List, Tuple, Union import requests from PIL import Image +from autogen.agentchat import utils + def get_pil_image(image_file: Union[str, Image.Image]) -> Image.Image: """ @@ -179,13 +180,9 @@ def gpt4v_formatter(prompt: str, img_format: str = "uri") -> List[Union[str, dic last_index = 0 image_count = 0 - # Regular expression pattern for matching tags - img_tag_pattern = re.compile(r"]+)>") - # Find all image tags - for match in img_tag_pattern.finditer(prompt): - image_location = match.group(1) - + for parsed_tag in utils.parse_tags_from_content("img", prompt): + image_location = parsed_tag["attr"]["src"] try: if img_format == "pil": img_data = get_pil_image(image_location) @@ -202,12 +199,12 @@ def gpt4v_formatter(prompt: str, img_format: str = "uri") -> List[Union[str, dic continue # Add text before this image tag to output list - output.append({"type": "text", "text": prompt[last_index : match.start()]}) + output.append({"type": "text", "text": prompt[last_index : parsed_tag["match"].start()]}) # Add image data to output list output.append({"type": "image_url", "image_url": {"url": img_data}}) - last_index = match.end() + last_index = parsed_tag["match"].end() image_count += 1 # Add remaining text to output list diff --git a/autogen/agentchat/utils.py b/autogen/agentchat/utils.py index fde0b0b88b3a..eef3741605d8 100644 --- a/autogen/agentchat/utils.py +++ b/autogen/agentchat/utils.py @@ -1,4 +1,6 @@ -from typing import Any, List, Dict, Tuple, Callable +import re +from typing import Any, Callable, Dict, List, Tuple, Union + from .agent import Agent @@ -76,3 +78,108 @@ def aggregate_summary(usage_summary: Dict[str, Any], agent_summary: Dict[str, An aggregate_summary(actual_usage_summary, agent.client.actual_usage_summary) return total_usage_summary, actual_usage_summary + + +def parse_tags_from_content(tag: str, content: Union[str, List[Dict[str, Any]]]) -> List[Dict[str, Dict[str, str]]]: + """Parses HTML style tags from message contents. + + The parsing is done by looking for patterns in the text that match the format of HTML tags. The tag to be parsed is + specified as an argument to the function. The function looks for this tag in the text and extracts its content. The + content of a tag is everything that is inside the tag, between the opening and closing angle brackets. The content + can be a single string or a set of attribute-value pairs. + + Examples: + -> [{"tag": "img", "attr": {"src": "http://example.com/image.png"}, "match": re.Match}] +