Skip to content

Commit 0360c6c

Browse files
authored
Merge pull request #11 from memfuse/dev
Dev
2 parents 20b3ea6 + 3bf6120 commit 0360c6c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

70 files changed

+10675
-1143
lines changed

.env.example

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22
OPENAI_API_KEY=sk-your-openai-api-key-here
33
OPENAI_BASE_URL=https://api.openai.com/v1
44

5+
# OpenAI Compatible Model Configuration
6+
# Model name to use for benchmarks (defaults to gpt-4o-mini if not set)
7+
OPENAI_COMPATIBLE_MODEL=gpt-4o-mini
8+
59
# Anthropic API Key
610
ANTHROPIC_API_KEY=sk-ant-your-anthropic-api-key-here
711

.gitignore

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -174,5 +174,10 @@ cython_debug/
174174
.pypirc
175175

176176
.DS_Store
177-
scripts/
178-
benchmarks/
177+
CLAUDE.md
178+
179+
.cursor/
180+
results/
181+
182+
benchmarks/results/
183+
.claude/

README.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,13 @@ When the context window reaches its limit, previous conversations, user preferen
5555

5656
This repository contains the official Python SDK for seamless integration with MemFuse servers. For comprehensive information about the MemFuse server architecture and advanced features, please visit the [MemFuse Server repository](https://github.com/memfuse/memfuse).
5757

58+
## Recent Updates
59+
60+
- **Enhanced Testing:** Comprehensive E2E testing with semantic memory validation
61+
- **Better Error Handling:** Improved error messages and logging for easier debugging
62+
- **Prompt Templates:** Structured prompt management system for consistent LLM interactions
63+
- **Performance Benchmarks:** MSC dataset accuracy testing with 95% validation threshold
64+
5865
## Installation
5966

6067
> **Note:** This is the standalone Client SDK repository. If you need to install and run the MemFuse server, which is essential to use the SDK, please visit the [MemFuse Server repository](https://github.com/memfuse/memfuse).
@@ -129,6 +136,22 @@ print(f"Follow-up: {followup_response.choices[0].message.content}")
129136

130137
MemFuse automatically manages the retrieval of relevant information and storage of new memories from conversations within the specified `memory` scope.
131138

139+
## Advanced Features
140+
141+
### Memory Validation & Testing
142+
The SDK includes comprehensive testing capabilities to validate memory accuracy:
143+
144+
- **E2E Memory Tests:** Automated tests that verify conversational context retention
145+
- **Semantic Similarity Validation:** Uses RAGAS framework for intelligent response verification
146+
- **Performance Benchmarks:** MSC (Multi-Session Chat) dataset testing with accuracy metrics
147+
148+
### Error Handling & Debugging
149+
Enhanced error messages provide clear guidance:
150+
151+
- **Connection Issues:** Helpful instructions for starting the MemFuse server
152+
- **API Errors:** Detailed error responses with actionable information
153+
- **Logging:** Comprehensive logging for troubleshooting and monitoring
154+
132155
## Examples
133156

134157
Explore comprehensive examples in the [examples/](examples/) directory of this repository, featuring:

benchmarks/__init__.py

Whitespace-only changes.

benchmarks/check_api_ready.py

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
#!/usr/bin/env python
2+
import os
3+
import sys
4+
import logging
5+
from dotenv import load_dotenv
6+
import google.genai as genai
7+
from google.genai.errors import ClientError
8+
from openai import OpenAI
9+
from openai import OpenAIError
10+
11+
load_dotenv(override=True)
12+
13+
logging.basicConfig(
14+
level=logging.INFO,
15+
format="%(asctime)s - %(levelname)s - %(message)s"
16+
)
17+
logger = logging.getLogger(__name__)
18+
19+
def check_gemini_api():
20+
"""Check if Gemini API is ready and accessible."""
21+
22+
api_key = os.getenv("GEMINI_API_KEY")
23+
base_url = os.getenv("GEMINI_BASE_URL")
24+
25+
if not api_key:
26+
logger.error("GEMINI_API_KEY not found in environment variables")
27+
return False
28+
29+
try:
30+
# Configure the client
31+
from google.genai import types
32+
33+
if base_url:
34+
logger.info(f"Using custom base URL: {base_url}")
35+
http_options = types.HttpOptions(base_url=base_url)
36+
client = genai.Client(api_key=api_key, http_options=http_options)
37+
else:
38+
client = genai.Client(api_key=api_key)
39+
40+
# Test with a simple model list or generation call
41+
logger.info("Testing Gemini API connection...")
42+
43+
# Try to make a simple generation request
44+
response = client.models.generate_content(
45+
model="gemini-2.5-flash-lite",
46+
contents="Hello, can you respond with just 'OK'?"
47+
)
48+
49+
if response and response.text:
50+
logger.info("SUCCESS: Gemini API is ready and responding correctly")
51+
logger.info(f"Response: {response.text.strip()}")
52+
return True
53+
else:
54+
logger.error("ERROR: Gemini API responded but with empty content")
55+
return False
56+
57+
except ClientError as e:
58+
# Check if it's a quota/rate limit error, which means the API is accessible
59+
if "RESOURCE_EXHAUSTED" in str(e) or "RATE_LIMIT_EXCEEDED" in str(e) or "429" in str(e):
60+
logger.info("SUCCESS: Gemini API is accessible (quota/rate limit reached)")
61+
logger.info(f"Rate limit details: {e}")
62+
return True
63+
else:
64+
logger.error(f"ERROR: Gemini API client error: {e}")
65+
return False
66+
except Exception as e:
67+
logger.error(f"ERROR: Unexpected error testing Gemini API: {e}")
68+
return False
69+
70+
def check_openai_api():
71+
"""Check if OpenAI API is ready and accessible."""
72+
73+
api_key = os.getenv("OPENAI_API_KEY")
74+
base_url = os.getenv("OPENAI_BASE_URL")
75+
openai_model = os.getenv("OPENAI_COMPATIBLE_MODEL")
76+
77+
if not api_key:
78+
logger.error("OPENAI_API_KEY not found in environment variables")
79+
return False
80+
81+
try:
82+
# Configure the client
83+
if base_url:
84+
logger.info(f"Using custom base URL: {base_url}")
85+
client = OpenAI(api_key=api_key, base_url=base_url)
86+
else:
87+
client = OpenAI(api_key=api_key)
88+
89+
# Test with a simple completion request
90+
logger.info("Testing OpenAI API connection...")
91+
92+
response = client.chat.completions.create(
93+
model=openai_model,
94+
messages=[{"role": "user", "content": "Hello, can you respond with just 'OK'?"}],
95+
max_tokens=10
96+
)
97+
98+
if response and response.choices and response.choices[0].message.content:
99+
logger.info("SUCCESS: OpenAI API is ready and responding correctly")
100+
logger.info(f"Response: {response.choices[0].message.content.strip()}")
101+
return True
102+
else:
103+
logger.error("ERROR: OpenAI API responded but with empty content")
104+
return False
105+
106+
except OpenAIError as e:
107+
# Check if it's a quota/rate limit error, which means the API is accessible
108+
if "rate_limit_exceeded" in str(e).lower() or "quota" in str(e).lower() or "429" in str(e):
109+
logger.info("SUCCESS: OpenAI API is accessible (quota/rate limit reached)")
110+
logger.info(f"Rate limit details: {e}")
111+
return True
112+
else:
113+
logger.error(f"ERROR: OpenAI API client error: {e}")
114+
return False
115+
except Exception as e:
116+
logger.error(f"ERROR: Unexpected error testing OpenAI API: {e}")
117+
return False
118+
119+
def main():
120+
"""Main function to check API readiness."""
121+
logger.info("Checking API readiness...")
122+
123+
gemini_success = check_gemini_api()
124+
openai_success = check_openai_api()
125+
126+
if gemini_success and openai_success:
127+
logger.info("All checks passed - Both APIs are ready!")
128+
sys.exit(0)
129+
else:
130+
if not gemini_success:
131+
logger.error("Gemini API check failed")
132+
if not openai_success:
133+
logger.error("OpenAI API check failed")
134+
sys.exit(1)
135+
136+
if __name__ == "__main__":
137+
main()

benchmarks/config.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Dataset configurations
2+
DATASET_CONFIGS = {
3+
"msc": {
4+
"name": "MSC (Multiple-choice Situation Comprehension)",
5+
"dataset_id": "Percena/msc-memfuse-mc10",
6+
"data_file": "data/msc_memfuse_mc10.json",
7+
"description": "Multiple-choice Situation Comprehension dataset",
8+
"question_type": "conversation"
9+
},
10+
"lme": {
11+
"name": "LME (LongMemEval)",
12+
"dataset_id": "Percena/lme-mc10",
13+
"data_file": "data/lme_s_mc10.json",
14+
"description": "LongMemEval dataset",
15+
"question_type": "factual"
16+
},
17+
"locomo": {
18+
"name": "LoCoMo (Long Conversation Memory)",
19+
"dataset_id": "Percena/locomo-mc10",
20+
"data_file": "data/locomo_mc10.json",
21+
"description": "Long Conversation Memory dataset",
22+
"question_type": "conversation"
23+
}
24+
}

0 commit comments

Comments
 (0)