vectorize-io · AlexanderZaytsev · Feb 23, 2026
diff --git a/hindsight-api/hindsight_api/engine/memory_engine.py b/hindsight-api/hindsight_api/engine/memory_engine.py
@@ -25,6 +25,7 @@
 from ..tracing import create_operation_span
 from ..utils import mask_network_location
 from .db_budget import budgeted_operation
+from .providers.openai_compatible_llm import llm_user
 from .operation_metadata import (
     BatchRetainChildMetadata,
     BatchRetainParentMetadata,
@@ -933,6 +934,11 @@ async def execute_task(self, task_dict: dict[str, Any]):
         if schema:
             _current_schema.set(schema)
 
+        # Set llm_user so LLM calls include it for upstream proxy attribution
+        bank_id = task_dict.get("bank_id")
+        if bank_id:
+            llm_user.set(bank_id)
+
         # Check if operation was cancelled (only for tasks with operation_id)
         if operation_id:
             try:
@@ -1739,6 +1745,9 @@ async def retain_batch_async(
             )
             # Returns: [["unit-id-1"], ["unit-id-2"]]
         """
+        # Set llm_user so LLM calls include it for upstream proxy attribution
+        llm_user.set(bank_id)
+
         start_time = time.time()
 
         if not contents:
@@ -4234,6 +4243,9 @@ async def reflect_async(
                 - based_on: Empty dict (agent retrieves facts dynamically)
                 - structured_output: None (not yet supported for agentic reflect)
         """
+        # Set llm_user so LLM calls include it for upstream proxy attribution
+        llm_user.set(bank_id)
+
         # Use cached LLM config
         if self._reflect_llm_config is None:
             raise ValueError("Memory LLM API key not set. Set HINDSIGHT_API_LLM_API_KEY environment variable.")

diff --git a/hindsight-api/hindsight_api/engine/providers/openai_compatible_llm.py b/hindsight-api/hindsight_api/engine/providers/openai_compatible_llm.py
@@ -16,6 +16,7 @@
 """
 
 import asyncio
+import contextvars
 import io
 import json
 import logging
@@ -34,6 +35,11 @@
 
 logger = logging.getLogger(__name__)
 
+# Context variable for upstream proxy attribution — injected as the "user" field
+# in OpenAI API requests. Set this before making LLM calls to enable per-caller
+# cost tracking when running behind a proxy.
+llm_user: contextvars.ContextVar[str | None] = contextvars.ContextVar("llm_user", default=None)
+
 # Seed applied to every Groq request for deterministic behavior
 DEFAULT_LLM_SEED = 4242
 
@@ -216,6 +222,11 @@ async def call(
             "messages": messages,
         }
 
+        # Inject llm_user as "user" for upstream proxy attribution
+        _llm_user = llm_user.get()
+        if _llm_user:
+            call_params["user"] = _llm_user
+
         # Check if model supports reasoning parameter
         is_reasoning_model = self._supports_reasoning_model()
 
@@ -530,6 +541,11 @@ async def call_with_tools(
             "tool_choice": tool_choice,
         }
 
+        # Inject llm_user as "user" for upstream proxy attribution
+        _llm_user = llm_user.get()
+        if _llm_user:
+            call_params["user"] = _llm_user
+
         if max_completion_tokens is not None:
             call_params["max_completion_tokens"] = max_completion_tokens
         if temperature is not None: