Fix summarizer tests

letta-ai · mattzh72 · Dec 20, 2024 · Dec 19, 2024 · Dec 20, 2024 · Dec 20, 2024
commit 0b5bebcf4db66d2d421a33baf4afff80ba60b56a
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -33,7 +33,6 @@ jobs:
           - "test_memory.py"
           - "test_utils.py"
           - "test_stream_buffer_readers.py"
-          - "test_summarize.py"
     services:
       qdrant:
         image: qdrant/qdrant

diff --git a/letta/services/agent_manager.py b/letta/services/agent_manager.py
@@ -374,7 +374,7 @@ def set_in_context_messages(self, agent_id: str, message_ids: List[str], actor:
     def trim_older_in_context_messages(self, num: int, agent_id: str, actor: PydanticUser) -> PydanticAgentState:
         message_ids = self.get_agent_by_id(agent_id=agent_id, actor=actor).message_ids
         new_messages = [message_ids[0]] + message_ids[num:]  # 0 is system message
-        return self.set_in_context_messages(agent_id=agent_id, message_ids=[m.id for m in new_messages], actor=actor)
+        return self.set_in_context_messages(agent_id=agent_id, message_ids=new_messages, actor=actor)
 
     @enforce_types
     def prepend_to_in_context_messages(self, messages: List[PydanticMessage], agent_id: str, actor: PydanticUser) -> PydanticAgentState:

diff --git a/tests/integration_test_summarizer.py b/tests/integration_test_summarizer.py
@@ -1,13 +1,16 @@
 import json
 import os
 import uuid
+from typing import List
 
 import pytest
 
 from letta import create_client
 from letta.agent import Agent
+from letta.client.client import LocalClient
 from letta.schemas.embedding_config import EmbeddingConfig
 from letta.schemas.llm_config import LLMConfig
+from letta.schemas.message import Message
 from letta.streaming_interface import StreamingRefreshCLIInterface
 from tests.helpers.endpoints_helper import EMBEDDING_CONFIG_PATH
 from tests.helpers.utils import cleanup
@@ -16,6 +19,110 @@
 LLM_CONFIG_DIR = "tests/configs/llm_model_configs"
 SUMMARY_KEY_PHRASE = "The following is a summary"
 
+test_agent_name = f"test_client_{str(uuid.uuid4())}"
+
+# TODO: these tests should include looping through LLM providers, since behavior may vary across providers
+# TODO: these tests should add function calls into the summarized message sequence:W
+
+
+@pytest.fixture(scope="module")
+def client():
+    client = create_client()
+    # client.set_default_llm_config(LLMConfig.default_config("gpt-4o-mini"))
+    client.set_default_llm_config(LLMConfig.default_config("gpt-4o-mini"))
+    client.set_default_embedding_config(EmbeddingConfig.default_config(provider="openai"))
+
+    yield client
+
+
+@pytest.fixture(scope="module")
+def agent_state(client):
+    # Generate uuid for agent name for this example
+    agent_state = client.create_agent(name=test_agent_name)
+    yield agent_state
+
+    client.delete_agent(agent_state.id)
+
+
+def test_summarize_messages_inplace(client, agent_state, mock_e2b_api_key_none):
+    """Test summarization via sending the summarize CLI command or via a direct call to the agent object"""
+    # First send a few messages (5)
+    response = client.user_message(
+        agent_id=agent_state.id,
+        message="Hey, how's it going? What do you think about this whole shindig",
+    ).messages
+    assert response is not None and len(response) > 0
+    print(f"test_summarize: response={response}")
+
+    response = client.user_message(
+        agent_id=agent_state.id,
+        message="Any thoughts on the meaning of life?",
+    ).messages
+    assert response is not None and len(response) > 0
+    print(f"test_summarize: response={response}")
+
+    response = client.user_message(agent_id=agent_state.id, message="Does the number 42 ring a bell?").messages
+    assert response is not None and len(response) > 0
+    print(f"test_summarize: response={response}")
+
+    response = client.user_message(
+        agent_id=agent_state.id,
+        message="Would you be surprised to learn that you're actually conversing with an AI right now?",
+    ).messages
+    assert response is not None and len(response) > 0
+    print(f"test_summarize: response={response}")
+
+    # reload agent object
+    agent_obj = client.server.load_agent(agent_id=agent_state.id, actor=client.user)
+
+    agent_obj.summarize_messages_inplace()
+
+
+def test_auto_summarize(client, mock_e2b_api_key_none):
+    """Test that the summarizer triggers by itself"""
+    small_context_llm_config = LLMConfig.default_config("gpt-4o-mini")
+    small_context_llm_config.context_window = 4000
+
+    small_agent_state = client.create_agent(
+        name="small_context_agent",
+        llm_config=small_context_llm_config,
+    )
+
+    try:
+
+        def summarize_message_exists(messages: List[Message]) -> bool:
+            for message in messages:
+                if message.text and "The following is a summary of the previous" in message.text:
+                    print(f"Summarize message found after {message_count} messages: \n {message.text}")
+                    return True
+            return False
+
+        MAX_ATTEMPTS = 10
+        message_count = 0
+        while True:
+
+            # send a message
+            response = client.user_message(
+                agent_id=small_agent_state.id,
+                message="What is the meaning of life?",
+            )
+            message_count += 1
+
+            print(f"Message {message_count}: \n\n{response.messages}" + "--------------------------------")
+
+            # check if the summarize message is inside the messages
+            assert isinstance(client, LocalClient), "Test only works with LocalClient"
+            in_context_messages = client.server.agent_manager.get_in_context_messages(agent_id=small_agent_state.id, actor=client.user)
+            print("SUMMARY", summarize_message_exists(in_context_messages))
+            if summarize_message_exists(in_context_messages):
+                break
+
+            if message_count > MAX_ATTEMPTS:
+                raise Exception(f"Summarize message not found after {message_count} messages")
+
+    finally:
+        client.delete_agent(small_agent_state.id)
+
 
 @pytest.mark.parametrize(
     "config_filename",
@@ -69,4 +176,5 @@ def test_summarizer(config_filename):
 
     # Invoke a summarize
     letta_agent.summarize_messages_inplace(preserve_last_N_messages=False)
-    assert SUMMARY_KEY_PHRASE in letta_agent.messages[1]["content"], f"Test failed for config: {config_filename}"
+    in_context_messages = client.get_in_context_messages(agent_state.id)
+    assert SUMMARY_KEY_PHRASE in in_context_messages[1].text, f"Test failed for config: {config_filename}"
diff --git a/tests/test_summarize.py b/tests/test_summarize.py