langchain-ai · hinthornw · Dec 17, 2023 · Dec 16, 2023
diff --git a/docs/source/notebooks/tool_usage/benchmark_all_tasks.ipynb b/docs/source/notebooks/tool_usage/benchmark_all_tasks.ipynb
@@ -14,7 +14,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "id": "13a7483b-d08f-49fa-83da-619863171e5b",
    "metadata": {
     "tags": []
@@ -36,6 +36,7 @@
     "    AnthropicToolUserFactory,\n",
     "    CustomAgentFactory,\n",
     "    OpenAIAgentFactory,\n",
+    "    OpenAIAssistantFactory,\n",
     ")"
    ]
   },
@@ -58,7 +59,8 @@
    "outputs": [],
    "source": [
     "task = registry[\"Tool Usage - Typewriter (1 tool)\"]\n",
-    "agent_factory = OpenAIAgentFactory(task, model=model)\n",
+    "model = \"gpt-3.5-turbo-1106\"\n",
+    "agent_factory = OpenAIAssistantFactory(task, model=model)\n",
     "\n",
     "agent_factory().invoke({\"question\": \"abc\"})"
    ]
@@ -80,7 +82,9 @@
    },
    "outputs": [],
    "source": [
-    "experiment_uuid = \"3f3e\"  # Or generate ranom using uuid.uuid4().hex[:4]"
+    "import uuid\n",
+    "\n",
+    "experiment_uuid = uuid.uuid4().hex[:4]"
    ]
   },
   {
@@ -107,6 +111,8 @@
     "    (\"openai_functions\", \"gpt-3.5-turbo-0613\"),\n",
     "    (\"openai_functions\", \"gpt-4-1106-preview\"),\n",
     "    (\"openai_functions\", \"gpt-4-0613\"),\n",
+    "    (\"openai_assistant\", \"gpt-3.5-turbo-1106\"),\n",
+    "    (\"openai_assistant\", \"gpt-4-1106-preview\"),\n",
     "]"
    ]
   },
@@ -147,6 +153,10 @@
     "            agent_factory = OpenAIAgentFactory(\n",
     "                task, model=model, rate_limiter=rate_limiter\n",
     "            )\n",
+    "        elif arch == \"openai_assistant\":\n",
+    "            agent_factory = OpenAIAssistantFactory(\n",
+    "                task, model=model, rate_limiter=rate_limiter\n",
+    "            )\n",
     "        elif arch == \"custom_agent\":\n",
     "            agent_factory = CustomAgentFactory(\n",
     "                task, model=model, rate_limiter=rate_limiter\n",
@@ -390,7 +400,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.4"
+   "version": "3.11.2"
   }
  },
  "nbformat": 4,

diff --git a/langchain_benchmarks/tool_usage/agents/__init__.py b/langchain_benchmarks/tool_usage/agents/__init__.py
@@ -5,10 +5,14 @@
 from langchain_benchmarks.tool_usage.agents.experimental.factory import (
     CustomAgentFactory,
 )
+from langchain_benchmarks.tool_usage.agents.openai_assistant import (
+    OpenAIAssistantFactory,
+)
 from langchain_benchmarks.tool_usage.agents.openai_functions import OpenAIAgentFactory
 
 __all__ = [
     "OpenAIAgentFactory",
+    "OpenAIAssistantFactory",
     "apply_agent_executor_adapter",
     "CustomAgentFactory",
     "AnthropicToolUserFactory",

diff --git a/langchain_benchmarks/tool_usage/agents/anthropic_tool_user.py b/langchain_benchmarks/tool_usage/agents/anthropic_tool_user.py
@@ -227,9 +227,9 @@ def __init__(
         self.rate_limiter = rate_limiter
         if not find_spec("tool_use_package"):
             raise ImportError(
-                f'Could not import "tool_use_package". Please '
-                f"follow instructions here to install "
-                f"https://github.com/anthropics/anthropic-tools/tree/main"
+                'Could not import "tool_use_package". Please '
+                "follow instructions here to install "
+                "https://github.com/anthropics/anthropic-tools/tree/main"
             )
 
     def __call__(self, **kwargs: Any) -> Runnable:

diff --git a/langchain_benchmarks/tool_usage/agents/openai_assistant.py b/langchain_benchmarks/tool_usage/agents/openai_assistant.py
@@ -0,0 +1,69 @@
+"""Code for creating an assistant factory for evaluating tool usage tasks.
+
+See: https://platform.openai.com/docs/assistants/how-it-works/creating-assistants
+"""
+from typing import Optional
+
+from langchain.agents import AgentExecutor
+from langchain.agents.openai_assistant.base import OpenAIAssistantRunnable
+from langchain.schema.runnable import Runnable
+
+from langchain_benchmarks import rate_limiting
+from langchain_benchmarks.schema import ToolUsageTask
+from langchain_benchmarks.tool_usage.agents.adapters import apply_agent_executor_adapter
+
+
+class OpenAIAssistantFactory:
+    def __init__(
+        self,
+        task: ToolUsageTask,
+        *,
+        model: str,
+        rate_limiter: Optional[rate_limiting.RateLimiter] = None,
+    ) -> None:
+        """Create an OpenAI agent factory for the given task.
+
+        Args:
+            task: The task to create an agent factory for.
+            model: The model to use -- this must be an open AI model.
+            rate_limiter: The rate limiter to use
+        """
+        if not isinstance(model, str):
+            raise ValueError(f"Expected str for model, got {type(model)}")
+        self.task = task
+        tools = task.create_environment().tools
+        # Stateless, so we only need to create it once
+        self.agent = OpenAIAssistantRunnable.create_assistant(
+            name=f"{task.name} assistant",
+            instructions=self.task.instructions,
+            tools=tools,
+            model=model,
+            as_agent=True,
+        )
+        self.rate_limiter = rate_limiter
+
+    def __call__(self) -> Runnable:
+        env = self.task.create_environment()
+
+        agent = self.agent
+        if self.rate_limiter is not None:
+            # Rate limited model
+            agent = rate_limiting.with_rate_limit(agent, self.rate_limiter)
+
+        def _map_key(x: dict):
+            # Assistant expects the 'content' key explicitly
+            return {
+                "content": x["input"],
+                **{k: v for k, v in x.items() if k != "input"},
+            }
+
+        runnable = AgentExecutor(
+            agent=_map_key | self.agent,
+            tools=env.tools,
+            handle_parsing_errors=True,
+            return_intermediate_steps=True,
+        )
+
+        # Returns `state` in the output if the environment has a state reader
+        # makes sure that `output` is always in the output
+        return apply_agent_executor_adapter(runnable, state_reader=env.read_state)
diff --git a/langchain_benchmarks/tool_usage/agents/openai_functions.py b/langchain_benchmarks/tool_usage/agents/openai_functions.py
@@ -6,7 +6,6 @@
     format_to_openai_tool_messages,
 )
 from langchain.agents.output_parsers.openai_tools import OpenAIToolsAgentOutputParser
-from langchain.chat_models import ChatOpenAI
 from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
 from langchain.schema.runnable import Runnable
 from langchain.tools.render import format_tool_to_openai_tool

diff --git a/langchain_benchmarks/utils/__init__.py b/langchain_benchmarks/utils/__init__.py
@@ -1 +1,3 @@
 from langchain_benchmarks.utils._langsmith import run_without_langsmith
+
+__all__ = ["run_without_langsmith"]
Original file line number	Diff line number	Diff line change
		@@ -1 +1,3 @@
		from langchain_benchmarks.utils._langsmith import run_without_langsmith

		__all__ = ["run_without_langsmith"]