Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
7d33791
fix: add safe guard when parsing node memory
CaralHsi Sep 10, 2025
758cce5
feat: add filter as a parameter in tree-text searcher
CaralHsi Sep 10, 2025
f2d275b
feat: add filter for user and long-term memory
CaralHsi Sep 11, 2025
16ec612
feat: add filter in working memory
CaralHsi Sep 11, 2025
4765013
Merge branch 'dev' into feat/chat_bot_api
CaralHsi Sep 11, 2025
95c347d
add filter in task-parser
CaralHsi Sep 12, 2025
8db5752
feat: only mix-retrieve for vector-recall; TODO: mix reranker
CaralHsi Sep 12, 2025
b03d73e
feat: add 'session_id' as an optional parameter for product api
CaralHsi Sep 12, 2025
69e7829
feat: api 1.0 finish
CaralHsi Sep 15, 2025
30116c0
maintain: update gitignore
CaralHsi Sep 15, 2025
f0eff7e
maintain: update gitignore
CaralHsi Sep 15, 2025
5a1f26b
Merge branch 'feat/chat_bot_api' of github.com:CaralHsi/MemOSRealPubl…
CaralHsi Sep 15, 2025
5f7dac1
Merge branch 'dev' into feat/chat_bot_api
CaralHsi Sep 15, 2025
bd4f09d
feat: add 'type' in TextualMemory Sources
CaralHsi Sep 15, 2025
9c543bc
Merge branch 'feat/chat_bot_api' of github.com:CaralHsi/MemOSRealPubl…
CaralHsi Sep 15, 2025
ebec44b
feat: add annotation to item
CaralHsi Sep 15, 2025
ddb3524
fix: add session_id to product add
CaralHsi Sep 15, 2025
7dc386a
fix: test
CaralHsi Sep 15, 2025
15f6dc3
Merge branch 'test' into feat/chat_bot_api
CaralHsi Sep 15, 2025
c902ebc
feat: [WIP] add filter in reranker
CaralHsi Sep 16, 2025
58650c3
fix: conflict
CaralHsi Sep 16, 2025
4da163e
fix: bug in recall
CaralHsi Sep 16, 2025
6e84173
Merge branch 'test' into feat/chat_bot_api
CaralHsi Sep 16, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,9 @@ def search_single(vec, filt=None):
all_hits = []
# Path A: without filter
with ContextThreadPoolExecutor() as executor:
futures = [ex.submit(search_single, vec, None) for vec in query_embedding[:max_num]]
futures = [
executor.submit(search_single, vec, None) for vec in query_embedding[:max_num]
]
for f in concurrent.futures.as_completed(futures):
all_hits.extend(f.result() or [])

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ def _retrieve_from_working_memory(
graph_results=items,
top_k=top_k,
parsed_goal=parsed_goal,
search_filter=search_filter,
)

# --- Path B
Expand Down Expand Up @@ -292,6 +293,7 @@ def _retrieve_from_long_term_and_user(
graph_results=results,
top_k=top_k,
parsed_goal=parsed_goal,
search_filter=search_filter,
)

@timed
Expand Down
1 change: 1 addition & 0 deletions src/memos/reranker/cosine_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def __init__(
self,
level_weights: dict[str, float] | None = None,
level_field: str = "background",
**kwargs,
):
self.level_weights = level_weights or {"topic": 1.0, "concept": 1.0, "fact": 1.0}
self.level_field = level_field
Expand Down
79 changes: 77 additions & 2 deletions src/memos/reranker/http_bge.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,35 @@
if TYPE_CHECKING:
from memos.memories.textual.item import TextualMemoryItem

# Strip a leading "[...]" tag (e.g., "[2025-09-01] ..." or "[meta] ...")
# before sending text to the reranker. This keeps inputs clean and
# avoids misleading the model with bracketed prefixes.
_TAG1 = re.compile(r"^\s*\[[^\]]*\]\s*")


class HTTPBGEReranker(BaseReranker):
"""
HTTP-based BGE reranker. Mirrors your old MemoryReranker, but configurable.
HTTP-based BGE reranker.

This class sends (query, documents[]) to a remote HTTP endpoint that
performs cross-encoder-style re-ranking (e.g., BGE reranker) and returns
relevance scores. It then maps those scores back onto the original
TextualMemoryItem list and returns (item, score) pairs sorted by score.

Notes
-----
- The endpoint is expected to accept JSON:
{
"model": "<model-name>",
"query": "<query text>",
"documents": ["doc1", "doc2", ...]
}
- Two response shapes are supported:
1) {"results": [{"index": <int>, "relevance_score": <float>}, ...]}
where "index" refers to the *position in the documents array*.
2) {"data": [{"score": <float>}, ...]} (aligned by list order)
- If the service fails or responds unexpectedly, this falls back to
returning the original items with 0.0 scores (best-effort).
"""

def __init__(
Expand All @@ -35,7 +58,22 @@ def __init__(
timeout: int = 10,
headers_extra: dict | None = None,
rerank_source: list[str] | None = None,
**kwargs,
):
"""
Parameters
----------
reranker_url : str
HTTP endpoint for the reranker service.
token : str, optional
Bearer token for auth. If non-empty, added to the Authorization header.
model : str, optional
Model identifier understood by the server.
timeout : int, optional
Request timeout (seconds).
headers_extra : dict | None, optional
Additional headers to merge into the request headers.
"""
if not reranker_url:
raise ValueError("reranker_url must not be empty")
self.reranker_url = reranker_url
Expand All @@ -48,13 +86,37 @@ def __init__(
def rerank(
self,
query: str,
graph_results: list,
graph_results: list[TextualMemoryItem],
top_k: int,
search_filter: dict | None = None,
**kwargs,
) -> list[tuple[TextualMemoryItem, float]]:
"""
Rank candidate memories by relevance to the query.

Parameters
----------
query : str
The search query.
graph_results : list[TextualMemoryItem]
Candidate items to re-rank. Each item is expected to have a
`.memory` str field; non-strings are ignored.
top_k : int
Return at most this many items.
search_filter : dict | None
Currently unused. Present to keep signature compatible.

Returns
-------
list[tuple[TextualMemoryItem, float]]
Re-ranked items with scores, sorted descending by score.
"""
if not graph_results:
return []

# Build a mapping from "payload docs index" -> "original graph_results index"
# Only include items that have a non-empty string memory. This ensures that
# any index returned by the server can be mapped back correctly.
documents = []
if self.concat_source:
documents = concat_original_source(graph_results, self.concat_source)
Expand All @@ -74,6 +136,7 @@ def rerank(
payload = {"model": self.model, "query": query, "documents": documents}

try:
# Make the HTTP request to the reranker service
resp = requests.post(
self.reranker_url, headers=headers, json=payload, timeout=self.timeout
)
Expand All @@ -83,9 +146,14 @@ def rerank(
scored_items: list[tuple[TextualMemoryItem, float]] = []

if "results" in data:
# Format:
# dict("results": [{"index": int, "relevance_score": float},
# ...])
rows = data.get("results", [])
for r in rows:
idx = r.get("index")
# The returned index refers to 'documents' (i.e., our 'pairs' order),
# so we must map it back to the original graph_results index.
if isinstance(idx, int) and 0 <= idx < len(graph_results):
score = float(r.get("relevance_score", r.get("score", 0.0)))
scored_items.append((graph_results[idx], score))
Expand All @@ -94,21 +162,28 @@ def rerank(
return scored_items[: min(top_k, len(scored_items))]

elif "data" in data:
# Format: {"data": [{"score": float}, ...]} aligned by list order
rows = data.get("data", [])
# Build a list of scores aligned with our 'documents' (pairs)
score_list = [float(r.get("score", 0.0)) for r in rows]

if len(score_list) < len(graph_results):
score_list += [0.0] * (len(graph_results) - len(score_list))
elif len(score_list) > len(graph_results):
score_list = score_list[: len(graph_results)]

# Map back to original items using 'pairs'
scored_items = list(zip(graph_results, score_list, strict=False))
scored_items.sort(key=lambda x: x[1], reverse=True)
return scored_items[: min(top_k, len(scored_items))]

else:
# Unexpected response schema: return a 0.0-scored fallback of the first top_k valid docs
# Note: we use 'pairs' to keep alignment with valid (string) docs.
return [(item, 0.0) for item in graph_results[:top_k]]

except Exception as e:
# Network error, timeout, JSON decode error, etc.
# Degrade gracefully by returning first top_k valid docs with 0.0 score.
logger.error(f"[HTTPBGEReranker] request failed: {e}")
return [(item, 0.0) for item in graph_results[:top_k]]
Loading