Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 2 additions & 7 deletions evaluation/.env-example
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,8 @@ SUPERMEMORY_API_KEY="sm_xxx"
MEMOBASE_API_KEY="xxx"
MEMOBASE_PROJECT_URL="http://***.***.***.***:8019"

# pref
PRE_SPLIT_CHUNK=false # pre split chunk in client end, for personamem and prefeval
# 1. text_mem + pref_mem + instruction_completion: set INSTRUCT_COMPLETE=true, ABLATION_PREF=false
# 2. text_mem + pref_mem: set INSTRUCT_COMPLETE=false, ABLATION_PREF=false
# 3. text_mem: set INSTRUCT_COMPLETE=false, ABLATION_PREF=true
INSTRUCT_COMPLETE=true # use instruct complete format or not
ABLATION_PREF=false # remove pref mem, only text mem
# eval settings
PRE_SPLIT_CHUNK=false

# Configuration Only For Scheduler
# RabbitMQ Configuration
Expand Down
15 changes: 8 additions & 7 deletions evaluation/scripts/PrefEval/pref_memos.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ def search_memory_for_line(line_data: tuple, mem_client, top_k_value: int) -> di
"""
Processes a single line of data, searching memory based on the question.
"""
from utils.pref_mem_utils import create_mem_string

i, line = line_data
try:
Expand All @@ -94,7 +93,13 @@ def search_memory_for_line(line_data: tuple, mem_client, top_k_value: int) -> di
start_time_search = time.monotonic()
relevant_memories = mem_client.search(query=question, user_id=user_id, top_k=top_k_value)
search_memories_duration = time.monotonic() - start_time_search
memories_str = create_mem_string(relevant_memories)
memories_str = (
"\n".join(
f"- {entry.get('memory', '')}"
for entry in relevant_memories["text_mem"][0]["memories"]
)
+ f"\n{relevant_memories['pref_mem']}"
)

memory_tokens_used = len(tokenizer.encode(memories_str))

Expand All @@ -119,7 +124,6 @@ def generate_response_for_line(line_data: tuple, openai_client: OpenAI, lib: str
"""
Generates a response for a single line of data using pre-fetched memories.
"""
from utils.pref_mem_utils import add_pref_instruction, remove_pref_mem_from_mem_string
from utils.prompts import PREFEVAL_ANSWER_PROMPT

i, line = line_data
Expand All @@ -146,10 +150,7 @@ def generate_response_for_line(line_data: tuple, openai_client: OpenAI, lib: str
)
return original_data

memories_str = remove_pref_mem_from_mem_string(memories_str, frame=lib)

template = add_pref_instruction(PREFEVAL_ANSWER_PROMPT, frame=lib)
system_prompt = template.format(context=memories_str)
system_prompt = PREFEVAL_ANSWER_PROMPT.format(context=memories_str)
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": question},
Expand Down
8 changes: 1 addition & 7 deletions evaluation/scripts/locomo/locomo_responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,7 @@ async def locomo_response(frame, llm_client, context: str, question: str) -> str
question=question,
)
else:
from utils.pref_mem_utils import add_pref_instruction

template = add_pref_instruction(ANSWER_PROMPT_MEMOS, frame=frame)
prompt = template.format(
prompt = ANSWER_PROMPT_MEMOS.format(
context=context,
question=question,
)
Expand All @@ -55,16 +52,13 @@ async def locomo_response(frame, llm_client, context: str, question: str) -> str


async def process_qa(frame, qa, search_result, oai_client):
from utils.pref_mem_utils import remove_pref_mem_from_mem_string

start = time()
query = qa.get("question")
gold_answer = qa.get("answer")
qa_category = qa.get("category")

context = search_result.get("context")

context = remove_pref_mem_from_mem_string(context, frame)
answer = await locomo_response(frame, oai_client, context, query)

response_duration_ms = (time() - start) * 1000
Expand Down
11 changes: 8 additions & 3 deletions evaluation/scripts/locomo/locomo_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,14 +100,19 @@ def memos_api_search(
client, query, speaker_a_user_id, speaker_b_user_id, top_k, speaker_a, speaker_b
):
from prompts import TEMPLATE_MEMOS
from utils.pref_mem_utils import create_mem_string

start = time()
search_a_results = client.search(query=query, user_id=speaker_a_user_id, top_k=top_k)
search_b_results = client.search(query=query, user_id=speaker_b_user_id, top_k=top_k)

speaker_a_context = create_mem_string(search_a_results)
speaker_b_context = create_mem_string(search_b_results)
speaker_a_context = (
"\n".join([i["memory"] for i in search_a_results["text_mem"][0]["memories"]])
+ f"\n{search_a_results['pref_mem']}"
)
speaker_b_context = (
"\n".join([i["memory"] for i in search_b_results["text_mem"][0]["memories"]])
+ f"\n{search_b_results['pref_mem']}"
)

context = TEMPLATE_MEMOS.format(
speaker_1=speaker_a,
Expand Down
17 changes: 1 addition & 16 deletions evaluation/scripts/locomo/prompts.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,3 @@
import os


PREF_INSTRUCTIONS = """
# Note:
Plaintext memory are summaries of facts, while preference memories are summaries of user preferences.
Your response must not violate any of the user's preferences, whether explicit or implicit, and briefly explain why you answer this way to avoid conflicts.
When encountering preference conflicts, the priority is: explicit preference > implicit preference > plaintext memory.
"""


ANSWER_PROMPT_MEM0 = """
You are an intelligent memory assistant tasked with retrieving accurate information from conversation memories.

Expand Down Expand Up @@ -114,18 +103,14 @@
5. Formulate a precise, concise answer based on the evidence from the memories (and allowed world knowledge).
6. Double-check that your answer directly addresses the question asked and adheres to all instructions.
7. Ensure your final answer is specific and avoids vague time references.
{pref_instructions}

{context}

Question: {question}

Answer:
"""

if os.getenv("INSTRUCT_COMPLETE") == "true":
ANSWER_PROMPT_MEMOS = ANSWER_PROMPT_MEMOS.replace("{pref_instructions}", PREF_INSTRUCTIONS)
else:
ANSWER_PROMPT_MEMOS = ANSWER_PROMPT_MEMOS.replace("{pref_instructions}", "")

custom_instructions = """
Generate personal memories that follow these guidelines:
Expand Down
13 changes: 5 additions & 8 deletions evaluation/scripts/longmemeval/lme_responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,11 @@


sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from utils.pref_mem_utils import add_pref_instruction, remove_pref_mem_from_mem_string
from utils.prompts import LME_ANSWER_PROMPT


def lme_response(llm_client, context, question, question_date, frame):
template = add_pref_instruction(LME_ANSWER_PROMPT, frame=frame)
prompt = template.format(
def lme_response(llm_client, context, question, question_date):
prompt = LME_ANSWER_PROMPT.format(
question=question,
question_date=question_date,
context=context,
Expand All @@ -35,14 +33,13 @@ def lme_response(llm_client, context, question, question_date, frame):
return result


def process_qa(user_id, search_result, llm_client, frame):
def process_qa(user_id, search_result, llm_client):
start = time()
search_result = search_result[0]
question = search_result.get("question")
question_date = search_result.get("date")
context = search_result.get("search_context", "")
context = remove_pref_mem_from_mem_string(context, frame=frame)
anwer = lme_response(llm_client, context, question, question_date, frame)
anwer = lme_response(llm_client, context, question, question_date)

response_duration_ms = (time() - start) * 1000

Expand Down Expand Up @@ -97,7 +94,7 @@ def main(frame, version, num_workers=4):
future_to_user_id = {}

for user_id, search_results in lme_search_results.items():
future = executor.submit(process_qa, user_id, search_results, oai_client, frame)
future = executor.submit(process_qa, user_id, search_results, oai_client)
future_to_user_id[future] = user_id

for future in tqdm(
Expand Down
6 changes: 4 additions & 2 deletions evaluation/scripts/longmemeval/lme_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
import pandas as pd

from tqdm import tqdm
from utils.pref_mem_utils import create_mem_string
from utils.prompts import (
MEM0_CONTEXT_TEMPLATE,
MEM0_GRAPH_CONTEXT_TEMPLATE,
Expand Down Expand Up @@ -45,7 +44,10 @@ def mem0_search(client, query, user_id, top_k):
def memos_search(client, query, user_id, top_k):
start = time()
results = client.search(query=query, user_id=user_id, top_k=top_k)
context = create_mem_string(results)
context = (
"\n".join([i["memory"] for i in results["text_mem"][0]["memories"]])
+ f"\n{results['pref_mem']}"
)
context = MEMOS_CONTEXT_TEMPLATE.format(user_id=user_id, memories=context)
duration_ms = (time() - start) * 1000
return context, duration_ms
Expand Down
16 changes: 5 additions & 11 deletions evaluation/scripts/personamem/pm_responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import re

from utils.pref_mem_utils import add_pref_instruction, remove_pref_mem_from_mem_string
from utils.prompts import PM_ANSWER_PROMPT


Expand Down Expand Up @@ -49,9 +48,8 @@ def _extract_only_options(text):
return False, predicted_answer


def pm_response(llm_client, context, question, options, frame):
template = add_pref_instruction(PM_ANSWER_PROMPT, frame=frame)
prompt = template.format(
def pm_response(llm_client, context, question, options):
prompt = PM_ANSWER_PROMPT.format(
question=question,
context=context,
options=options,
Expand All @@ -68,19 +66,17 @@ def pm_response(llm_client, context, question, options, frame):
return result


def process_qa(user_id, search_result, num_runs, llm_client, frame):
def process_qa(user_id, search_result, num_runs, llm_client):
search_result = search_result[0]
question = search_result.get("question")
context = search_result.get("search_context", "")
options = search_result.get("all_options", [])

context = remove_pref_mem_from_mem_string(context, frame=frame)

run_results = []

for idx in range(num_runs):
start = time()
answer = pm_response(llm_client, context, question, options, frame)
answer = pm_response(llm_client, context, question, options)
is_correct, answer = extract_choice_answer(answer, search_result.get("golden_answer", ""))
response_duration_ms = (time() - start) * 1000

Expand Down Expand Up @@ -154,9 +150,7 @@ def main(frame, version, num_runs=3, num_workers=4):
future_to_user_id = {}

for user_id, search_results in pm_search_results.items():
future = executor.submit(
process_qa, user_id, search_results, num_runs, oai_client, frame
)
future = executor.submit(process_qa, user_id, search_results, num_runs, oai_client)
future_to_user_id[future] = user_id

for future in tqdm(
Expand Down
6 changes: 4 additions & 2 deletions evaluation/scripts/personamem/pm_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from utils.pref_mem_utils import create_mem_string
from utils.prompts import (
MEM0_CONTEXT_TEMPLATE,
MEM0_GRAPH_CONTEXT_TEMPLATE,
Expand Down Expand Up @@ -83,7 +82,10 @@ def memobase_search(client, query, user_id, top_k):
def memos_search(client, user_id, query, top_k):
start = time()
results = client.search(query=query, user_id=user_id, top_k=top_k)
search_memories = create_mem_string(results)
search_memories = (
"\n".join(item["memory"] for cube in results["text_mem"] for item in cube["memories"])
+ f"\n{results['pref_mem']}"
)
context = MEMOS_CONTEXT_TEMPLATE.format(user_id=user_id, memories=search_memories)

duration_ms = (time() - start) * 1000
Expand Down
43 changes: 0 additions & 43 deletions evaluation/scripts/utils/pref_mem_utils.py

This file was deleted.

13 changes: 2 additions & 11 deletions evaluation/scripts/utils/prompts.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,3 @@
PREF_INSTRUCTIONS = """
# Note:
Plaintext memory are summaries of facts, while preference memories are summaries of user preferences.
Your response must not violate any of the user's preferences, whether explicit or implicit, and briefly explain why you answer this way to avoid conflicts.
When encountering preference conflicts, the priority is: explicit preference > implicit preference > plaintext memory.
"""


LME_ANSWER_PROMPT = """
You are an intelligent memory assistant tasked with retrieving accurate information from conversation memories.

Expand All @@ -25,7 +17,7 @@
5. Formulate a precise, concise answer based solely on the evidence in the memories.
6. Double-check that your answer directly addresses the question asked.
7. Ensure your final answer is specific and avoids vague time references.
{pref_instructions}

{context}

Current Date: {question_date}
Expand Down Expand Up @@ -55,7 +47,7 @@
- Your final answer **must use parentheses**, like (a) or (b).
- Do NOT list multiple choices. Choose only one.
- Do NOT include extra text after <final_answer>. Just output the answer.
{pref_instructions}

# QUESTION:
{question}

Expand All @@ -71,7 +63,6 @@
You are a helpful AI. Answer the question based on the query and the following memories:
User Memories:
{context}
{pref_instructions}
"""


Expand Down
2 changes: 1 addition & 1 deletion src/memos/vec_dbs/milvus.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def search(

items.append(
MilvusVecDBItem(
id=str(hit["id"]),
id=str(entity.get("id")),
memory=entity.get("memory"),
vector=entity.get("vector"),
payload=entity.get("payload", {}),
Expand Down