⚒️ agent execution related utils

Leolty · Dec 20, 2023 · 445a332 · 445a332
1 parent d090ddb
commit 445a332
Showing 1 changed file with 239 additions and 0 deletions.
diff --git a/utils/execute.py b/utils/execute.py
@@ -0,0 +1,239 @@
+import pandas as pd
+import ast
+import csv
+from contextlib import redirect_stdout
+from io import StringIO
+import unicodedata
+import re
+import sqlite3
+from typing import List, Tuple, Any, Union
+import numpy as np
+from datetime import datetime
+
+###################################
+### Dataframe related functions ###
+###################################
+
+def remove_merged_suffixes(df):
+    # define a pattern to match the merged suffixes
+    pattern = re.compile(r'^(.*) \.\d+$')
+
+    # iterate over the columns
+    for col in df.columns:
+        # iterate over the values in the column
+        for idx, value in df[col].items():
+            match = pattern.match(str(value))
+            if match:
+                # if the value matches the pattern, replace it with the matched group
+                new_value = match.group(1).strip()
+                # check if the new value is in the column, including column name
+                if new_value in df[col].drop(idx).values or new_value == col:
+                    df.at[idx, col] = new_value
+    return df
+
+def markdown_to_df(markdown_string):
+    """
+    Parse a markdown table to a pandas dataframe.
+    
+    Parameters:
+    markdown_string (str): The markdown table string.
+    
+    Returns:
+    pd.DataFrame: The parsed markdown table as a pandas dataframe.
+    """
+
+    # Split the markdown string into lines
+    lines = markdown_string.strip().split("\n")
+
+    # strip leading/trailing '|'
+    lines = [line.strip('|') for line in lines]
+
+    # Check if the markdown string is empty or only contains the header and delimiter
+    if len(lines) < 2:
+        raise ValueError("Markdown string should contain at least a header, delimiter and one data row.")
+
+    # Check if the markdown string contains the correct delimiter for a table
+    if not set(lines[1].strip()) <= set(['-', '|', ' ', ':']):
+        # means the second line is not a delimiter line
+        # we do nothing
+        pass
+    # Remove the delimiter line
+    else:
+        del lines[1]
+
+    # Join the lines back into a single string, and use StringIO to make it file-like
+    markdown_file_like = StringIO("\n".join(lines))
+
+    # Use pandas to read the "file", assuming the first row is the header and the separator is '|'
+    df = pd.read_csv(markdown_file_like, sep='|', skipinitialspace=True, quoting=csv.QUOTE_NONE)
+
+    # Strip whitespace from column names and values
+    df.columns = df.columns.str.strip()
+
+    df = df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)
+
+    # normalize unicode characters
+    df = df.map(lambda x: unicodedata.normalize("NFKD", x) if isinstance(x, str) else x)
+
+    return df
+
+def print_partial_markdown(df, keep: int=3):
+    # Concatenate the first `keep` and last `keep` rows of the dataframe
+    combined_df = pd.concat([df.head(keep), df.tail(keep)])
+
+    # Convert the combined dataframe to markdown
+    markdown_output = combined_df.to_markdown(index=True)
+
+    # Insert the "..." separator in the appropriate line
+    markdown_lines = markdown_output.split('\n')
+    separator_index = len(df.head(keep).to_markdown(index=True).split('\n'))
+    markdown_lines.insert(separator_index, '...')
+
+    # Join the lines to form the final markdown string and print
+    final_output = '\n'.join(markdown_lines)
+
+    return final_output
+
+def convert_cells_to_numbers(df):
+    # Helper function to remove commas and try to convert to numeric
+    def to_numeric(cell):
+        if isinstance(cell, str):  # Check if the cell is of string type
+            no_comma = cell.replace(',', '')  # Remove commas
+            # Check if the string without commas can be a float
+            try:
+                float(no_comma)
+                return pd.to_numeric(no_comma, errors='coerce')
+            except ValueError:
+                return cell  # If it can't be a number, return the original cell
+        return pd.to_numeric(cell, errors='coerce')
+
+
+    # Apply the function to each cell in the dataframe
+    return df.map(to_numeric)
+
+def infer_dtype(df):
+    """
+    Attempt to convert columns in a DataFrame to a more appropriate data type.
+    
+    :param df: Input DataFrame
+    :return: DataFrame with updated dtypes
+    """
+
+    for col in df.columns:
+        # Try converting to numeric
+        df[col] = pd.to_numeric(df[col], errors='ignore')
+
+        # If the column type is still object (string) after trying numeric conversion, try datetime conversion
+        if df[col].dtype == 'object':
+            try:
+                df[col] = pd.to_datetime(df[col], errors='coerce', infer_datetime_format=True)
+            except:
+                pass
+
+    return df
+
+# def parse_code_from_string(input_string):
+#     """
+#     Parse executable code from a string.
+
+#     Parameters:
+#     input_string (str): The input string.
+
+#     Returns:
+#     str: The parsed code.
+#     """
+
+#     # Removes `, whitespace & python from start
+#     input_string = re.sub(r"^(\s|`)*(?i:python)?\s*", "", input_string, flags=re.IGNORECASE)
+#     # Removes whitespace & ` from end
+#     input_string = re.sub(r"(\s|`)*$", "", input_string)
+
+#     return input_string
+def parse_code_from_string(input_string):
+    """
+    Parse executable code from a string, handling various markdown-like code block formats.
+
+    Parameters:
+    input_string (str): The input string.
+
+    Returns:
+    str: The parsed code.
+    """
+
+    # Pattern to match code blocks wrapped in triple backticks, with optional language specification
+    triple_backtick_pattern = r"```(\w*\s*)?(.*?)```"
+    match = re.search(triple_backtick_pattern, input_string, flags=re.DOTALL | re.IGNORECASE)
+    if match:
+        return match.group(2).strip()
+
+    # Pattern to match code blocks wrapped in single backticks
+    single_backtick_pattern = r"`(.*?)`"
+    match = re.search(single_backtick_pattern, input_string, flags=re.DOTALL)
+    if match:
+        return match.group(1).strip()
+
+    # Default return if no code block patterns are matched
+    return input_string.strip()
+
+
+def python_repl_ast(code, custom_globals=None, custom_locals=None, memory=None):
+    """
+    Run command with own globals/locals and returns anything printed.
+
+    Parameters:
+    code (str): The code to execute.
+    custom_globals (dict): The globals to use.
+    custom_locals (dict): The locals to use.
+    memory (dict): The state/memory to retain between invocations.
+
+    Returns:
+    tuple: (str: The output of the code, dict: updated memory).
+    """
+
+    if memory is None:
+        memory = {}
+
+    if custom_globals is None:
+        custom_globals = globals().copy()
+    else:
+        custom_globals = {**globals(), **custom_globals}
+
+    if custom_locals is None:
+        custom_locals = memory.copy()
+    else:
+        custom_locals = {**custom_locals, **memory}
+
+    try:
+        tree = ast.parse(code)
+        module = ast.Module(tree.body[:-1], type_ignores=[])
+
+        # Execute all lines except the last
+        exec(ast.unparse(module), custom_globals, custom_locals)
+
+        # Prepare the last line
+        module_end = ast.Module(tree.body[-1:], type_ignores=[])
+        module_end_str = ast.unparse(module_end)
+
+        io_buffer = StringIO()
+
+        # Redirect stdout to our buffer and attempt to evaluate the last line
+        with redirect_stdout(io_buffer):
+            try:
+                ret = eval(module_end_str, custom_globals, custom_locals)
+                if ret is not None:
+                    output = str(ret)
+                else:
+                    output = io_buffer.getvalue()
+            except Exception:
+                # If evaluating fails, try executing it instead
+                exec(module_end_str, custom_globals, custom_locals)
+                output = io_buffer.getvalue()
+
+        # Update memory with new variable states
+        memory.update(custom_locals)
+
+        # Return any output captured during execution along with the updated memory
+        return output, memory
+
+    except Exception as e:
+        return "{}: {}".format(type(e).__name__, str(e)), memory