Skip to content

Commit a7e3d13

Browse files
authored
Fix: Populate spend_logs_metadata in batch and files endpoints (#16921)
* Add spend-logs-metadata to the metadata * Add tests for spend logs metadata in batches
1 parent 05d67c1 commit a7e3d13

File tree

3 files changed

+340
-0
lines changed

3 files changed

+340
-0
lines changed

litellm/proxy/litellm_pre_call_utils.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -894,6 +894,22 @@ async def add_litellm_data_to_request( # noqa: PLR0915
894894
data["metadata"]
895895
)
896896

897+
# Parse litellm_metadata if it's a string (e.g., from multipart/form-data or extra_body)
898+
if "litellm_metadata" in data and data["litellm_metadata"] is not None:
899+
if isinstance(data["litellm_metadata"], str):
900+
parsed_litellm_metadata = safe_json_loads(data["litellm_metadata"])
901+
if not isinstance(parsed_litellm_metadata, dict):
902+
verbose_proxy_logger.warning(
903+
f"Failed to parse 'litellm_metadata' as JSON dict. Received value: {data['litellm_metadata']}"
904+
)
905+
else:
906+
data["litellm_metadata"] = parsed_litellm_metadata
907+
# Merge litellm_metadata into the metadata variable (preserving existing values)
908+
if isinstance(data["litellm_metadata"], dict):
909+
for key, value in data["litellm_metadata"].items():
910+
if key not in data[_metadata_variable_name]:
911+
data[_metadata_variable_name][key] = value
912+
897913
data = LiteLLMProxyRequestSetup.add_user_api_key_auth_to_request_metadata(
898914
data=data,
899915
user_api_key_dict=user_api_key_dict,

litellm/proxy/openai_files_endpoints/files_endpoints.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ async def create_file(
206206
provider: Optional[str] = None,
207207
custom_llm_provider: str = Form(default="openai"),
208208
file: UploadFile = File(...),
209+
litellm_metadata: Optional[str] = Form(default=None),
209210
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
210211
):
211212
"""
@@ -264,6 +265,10 @@ async def create_file(
264265
purpose = cast(OpenAIFilesPurpose, purpose)
265266

266267
data = {}
268+
269+
# Add litellm_metadata to data if provided (from form field)
270+
if litellm_metadata is not None:
271+
data["litellm_metadata"] = litellm_metadata
267272

268273
# Include original request and headers in the data
269274
data = await add_litellm_data_to_request(
Lines changed: 319 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,319 @@
1+
"""
2+
Test cases for spend_logs_metadata propagation in batch and files endpoints.
3+
4+
Tests that litellm_metadata with spend_logs_metadata is properly:
5+
1. Accepted in files.create() via extra_body (multipart form data)
6+
2. Accepted in batches.create() via extra_body (JSON body)
7+
3. Parsed from JSON string format
8+
4. Merged into request metadata
9+
5. Appears in logging output
10+
"""
11+
12+
import pytest
13+
import asyncio
14+
import aiohttp
15+
import json
16+
import os
17+
from openai import OpenAI, AsyncOpenAI
18+
from unittest.mock import patch, MagicMock
19+
20+
BASE_URL = "http://localhost:4000"
21+
API_KEY = "sk-1234"
22+
23+
24+
@pytest.fixture
25+
def spend_logs_metadata():
26+
"""Sample spend logs metadata for testing."""
27+
return {
28+
"owner": "team-data-ai-ml",
29+
"product": "litellm",
30+
"feature": "test_batching",
31+
"environment": "development",
32+
}
33+
34+
35+
@pytest.mark.asyncio
36+
async def test_files_create_with_litellm_metadata(spend_logs_metadata):
37+
"""
38+
Test that files.create() properly handles litellm_metadata in extra_body.
39+
40+
This tests the fix for multipart form data handling where litellm_metadata
41+
is sent as a form field and needs to be parsed from JSON string.
42+
"""
43+
client = AsyncOpenAI(api_key=API_KEY, base_url=BASE_URL)
44+
45+
# Create a simple JSONL file content for batch
46+
file_content = b'{"custom_id": "test-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "gpt-4o-mini", "messages": [{"role": "user", "content": "Hi"}]}}'
47+
48+
# Mock the logging to capture metadata
49+
with patch('litellm.proxy.proxy_server.proxy_logging_obj') as mock_logging:
50+
mock_logging.post_call_success_hook = AsyncMock(return_value=None)
51+
52+
# Upload file with litellm_metadata
53+
uploaded_file = await client.files.create(
54+
purpose="batch",
55+
file=file_content,
56+
extra_body={
57+
"litellm_metadata": {
58+
"spend_logs_metadata": spend_logs_metadata,
59+
}
60+
},
61+
)
62+
63+
assert uploaded_file.id is not None
64+
print(f"✓ File created with ID: {uploaded_file.id}")
65+
66+
# Clean up
67+
await client.files.delete(file_id=uploaded_file.id)
68+
69+
# Verify the logging hook was called (metadata should be in the call)
70+
assert mock_logging.post_call_success_hook.called
71+
72+
73+
@pytest.mark.asyncio
74+
async def test_batches_create_with_litellm_metadata(spend_logs_metadata):
75+
"""
76+
Test that batches.create() properly handles litellm_metadata in extra_body.
77+
78+
This tests JSON body handling where litellm_metadata is part of the request data.
79+
"""
80+
client = AsyncOpenAI(api_key=API_KEY, base_url=BASE_URL)
81+
82+
# First create a file for the batch
83+
_current_dir = os.path.dirname(os.path.abspath(__file__))
84+
input_file_path = os.path.join(_current_dir, "input.jsonl")
85+
86+
# Create file without metadata first
87+
file_obj = await client.files.create(
88+
file=open(input_file_path, "rb"),
89+
purpose="batch",
90+
)
91+
92+
# Create batch with litellm_metadata
93+
batch = await client.batches.create(
94+
input_file_id=file_obj.id,
95+
endpoint="/v1/chat/completions",
96+
completion_window="24h",
97+
extra_body={
98+
"litellm_metadata": {
99+
"spend_logs_metadata": spend_logs_metadata,
100+
}
101+
}
102+
)
103+
104+
assert batch.id is not None
105+
print(f"✓ Batch created with ID: {batch.id}")
106+
107+
# Clean up
108+
await client.batches.cancel(batch_id=batch.id)
109+
await client.files.delete(file_id=file_obj.id)
110+
111+
112+
@pytest.mark.asyncio
113+
async def test_files_create_with_raw_http_request(spend_logs_metadata):
114+
"""
115+
Test files.create() with litellm_metadata using raw HTTP to verify form data handling.
116+
117+
This directly tests that the form field 'litellm_metadata' is properly
118+
extracted and parsed from the multipart form data.
119+
"""
120+
async with aiohttp.ClientSession() as session:
121+
url = f"{BASE_URL}/v1/files"
122+
headers = {"Authorization": f"Bearer {API_KEY}"}
123+
124+
data = aiohttp.FormData()
125+
data.add_field("purpose", "batch")
126+
data.add_field(
127+
"file",
128+
b'{"custom_id": "test-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "gpt-4o-mini", "messages": [{"role": "user", "content": "Test"}]}}',
129+
filename="test.jsonl"
130+
)
131+
# Add litellm_metadata as a JSON string (as OpenAI SDK does)
132+
data.add_field(
133+
"litellm_metadata",
134+
json.dumps({"spend_logs_metadata": spend_logs_metadata})
135+
)
136+
137+
async with session.post(url, headers=headers, data=data) as response:
138+
assert response.status == 200
139+
result = await response.json()
140+
assert "id" in result
141+
file_id = result["id"]
142+
print(f"✓ File created via raw HTTP with ID: {file_id}")
143+
144+
# Clean up
145+
delete_url = f"{BASE_URL}/v1/files/{file_id}"
146+
async with session.delete(delete_url, headers=headers) as delete_response:
147+
assert delete_response.status == 200
148+
149+
150+
@pytest.mark.asyncio
151+
async def test_batches_retrieve_with_header_metadata(spend_logs_metadata):
152+
"""
153+
Test that batches.retrieve() properly handles spend_logs_metadata via headers.
154+
155+
Since retrieve is a GET request, metadata must be passed via headers
156+
using x-litellm-spend-logs-metadata.
157+
"""
158+
client = AsyncOpenAI(api_key=API_KEY, base_url=BASE_URL)
159+
160+
# First create a batch
161+
_current_dir = os.path.dirname(os.path.abspath(__file__))
162+
input_file_path = os.path.join(_current_dir, "input.jsonl")
163+
164+
file_obj = await client.files.create(
165+
file=open(input_file_path, "rb"),
166+
purpose="batch",
167+
)
168+
169+
batch = await client.batches.create(
170+
input_file_id=file_obj.id,
171+
endpoint="/v1/chat/completions",
172+
completion_window="24h",
173+
)
174+
175+
# Retrieve with metadata in headers
176+
retrieved_batch = await client.batches.retrieve(
177+
batch_id=batch.id,
178+
extra_headers={
179+
"x-litellm-spend-logs-metadata": json.dumps(spend_logs_metadata)
180+
}
181+
)
182+
183+
assert retrieved_batch.id == batch.id
184+
print(f"✓ Batch retrieved with metadata headers: {batch.id}")
185+
186+
# Clean up
187+
await client.batches.cancel(batch_id=batch.id)
188+
await client.files.delete(file_id=file_obj.id)
189+
190+
191+
@pytest.mark.asyncio
192+
async def test_metadata_parsing_from_string():
193+
"""
194+
Test that litellm_metadata is properly parsed when received as a JSON string.
195+
196+
This tests the core parsing logic in add_litellm_data_to_request.
197+
"""
198+
from litellm.proxy.litellm_pre_call_utils import add_litellm_data_to_request
199+
from litellm.proxy._types import UserAPIKeyAuth
200+
from unittest.mock import Mock
201+
202+
# Mock request and user_api_key_dict
203+
mock_request = Mock()
204+
mock_request.headers = {}
205+
mock_request.url.path = "/v1/files"
206+
mock_request.method = "POST"
207+
208+
mock_user_api_key_dict = UserAPIKeyAuth(
209+
api_key="test-key",
210+
user_id="test-user",
211+
)
212+
213+
mock_proxy_config = Mock()
214+
215+
# Test data with litellm_metadata as a string (as it comes from form data)
216+
test_metadata = {
217+
"spend_logs_metadata": {
218+
"owner": "test-team",
219+
"product": "test-product",
220+
}
221+
}
222+
223+
data = {
224+
"litellm_metadata": json.dumps(test_metadata) # String format
225+
}
226+
227+
# Process the data
228+
result = await add_litellm_data_to_request(
229+
data=data,
230+
request=mock_request,
231+
user_api_key_dict=mock_user_api_key_dict,
232+
proxy_config=mock_proxy_config,
233+
general_settings={},
234+
version="test",
235+
)
236+
237+
# Verify litellm_metadata was parsed from string to dict
238+
assert isinstance(result["litellm_metadata"], dict)
239+
assert result["litellm_metadata"]["spend_logs_metadata"]["owner"] == "test-team"
240+
241+
# Verify it was merged into the metadata variable
242+
assert "spend_logs_metadata" in result["litellm_metadata"]
243+
assert result["litellm_metadata"]["spend_logs_metadata"]["product"] == "test-product"
244+
245+
print("✓ Metadata parsing from string works correctly")
246+
247+
248+
@pytest.mark.asyncio
249+
async def test_metadata_merging_preserves_user_values():
250+
"""
251+
Test that user-provided metadata takes precedence over defaults.
252+
253+
When both user and team provide spend_logs_metadata, user values should win.
254+
"""
255+
from litellm.proxy.litellm_pre_call_utils import add_litellm_data_to_request
256+
from litellm.proxy._types import UserAPIKeyAuth
257+
from unittest.mock import Mock
258+
259+
# Mock request
260+
mock_request = Mock()
261+
mock_request.headers = {}
262+
mock_request.url.path = "/v1/batches"
263+
mock_request.method = "POST"
264+
265+
# Mock user with team metadata
266+
mock_user_api_key_dict = UserAPIKeyAuth(
267+
api_key="test-key",
268+
user_id="test-user",
269+
team_metadata={
270+
"spend_logs_metadata": {
271+
"owner": "team-default",
272+
"product": "team-product",
273+
}
274+
}
275+
)
276+
277+
mock_proxy_config = Mock()
278+
279+
# User provides their own spend_logs_metadata
280+
data = {
281+
"litellm_metadata": {
282+
"spend_logs_metadata": {
283+
"owner": "user-override", # User value should win
284+
"feature": "user-feature", # New key from user
285+
}
286+
}
287+
}
288+
289+
# Process the data
290+
result = await add_litellm_data_to_request(
291+
data=data,
292+
request=mock_request,
293+
user_api_key_dict=mock_user_api_key_dict,
294+
proxy_config=mock_proxy_config,
295+
general_settings={},
296+
version="test",
297+
)
298+
299+
# Verify user values take precedence
300+
spend_logs = result["litellm_metadata"]["spend_logs_metadata"]
301+
assert spend_logs["owner"] == "user-override" # User value preserved
302+
assert spend_logs["feature"] == "user-feature" # User value added
303+
# Team values should NOT override user values
304+
305+
print("✓ User metadata values are preserved over defaults")
306+
307+
308+
if __name__ == "__main__":
309+
"""Run tests directly for debugging."""
310+
import sys
311+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
312+
313+
asyncio.run(test_files_create_with_litellm_metadata({
314+
"owner": "team-data-ai-ml",
315+
"product": "litellm",
316+
"feature": "test_batching",
317+
"environment": "development",
318+
}))
319+

0 commit comments

Comments
 (0)