Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions hindsight-api/hindsight_api/engine/memory_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from ..tracing import create_operation_span
from ..utils import mask_network_location
from .db_budget import budgeted_operation
from .providers.openai_compatible_llm import llm_user
from .operation_metadata import (
BatchRetainChildMetadata,
BatchRetainParentMetadata,
Expand Down Expand Up @@ -933,6 +934,11 @@ async def execute_task(self, task_dict: dict[str, Any]):
if schema:
_current_schema.set(schema)

# Set llm_user so LLM calls include it for upstream proxy attribution
bank_id = task_dict.get("bank_id")
if bank_id:
llm_user.set(bank_id)

# Check if operation was cancelled (only for tasks with operation_id)
if operation_id:
try:
Expand Down Expand Up @@ -1739,6 +1745,9 @@ async def retain_batch_async(
)
# Returns: [["unit-id-1"], ["unit-id-2"]]
"""
# Set llm_user so LLM calls include it for upstream proxy attribution
llm_user.set(bank_id)

start_time = time.time()

if not contents:
Expand Down Expand Up @@ -4234,6 +4243,9 @@ async def reflect_async(
- based_on: Empty dict (agent retrieves facts dynamically)
- structured_output: None (not yet supported for agentic reflect)
"""
# Set llm_user so LLM calls include it for upstream proxy attribution
llm_user.set(bank_id)

# Use cached LLM config
if self._reflect_llm_config is None:
raise ValueError("Memory LLM API key not set. Set HINDSIGHT_API_LLM_API_KEY environment variable.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
"""

import asyncio
import contextvars
import io
import json
import logging
Expand All @@ -34,6 +35,11 @@

logger = logging.getLogger(__name__)

# Context variable for upstream proxy attribution — injected as the "user" field
# in OpenAI API requests. Set this before making LLM calls to enable per-caller
# cost tracking when running behind a proxy.
llm_user: contextvars.ContextVar[str | None] = contextvars.ContextVar("llm_user", default=None)

# Seed applied to every Groq request for deterministic behavior
DEFAULT_LLM_SEED = 4242

Expand Down Expand Up @@ -216,6 +222,11 @@ async def call(
"messages": messages,
}

# Inject llm_user as "user" for upstream proxy attribution
_llm_user = llm_user.get()
if _llm_user:
call_params["user"] = _llm_user

# Check if model supports reasoning parameter
is_reasoning_model = self._supports_reasoning_model()

Expand Down Expand Up @@ -530,6 +541,11 @@ async def call_with_tools(
"tool_choice": tool_choice,
}

# Inject llm_user as "user" for upstream proxy attribution
_llm_user = llm_user.get()
if _llm_user:
call_params["user"] = _llm_user

if max_completion_tokens is not None:
call_params["max_completion_tokens"] = max_completion_tokens
if temperature is not None:
Expand Down