2
2
Tests for usage tracking functionality.
3
3
"""
4
4
5
- import io
6
- from contextlib import redirect_stdout
7
- from decimal import Decimal
8
5
from datetime import datetime
6
+ from decimal import Decimal
9
7
from unittest .mock import patch
10
8
9
+ from agents_api .clients .pg import create_db_pool
10
+ from agents_api .common .utils .usage import track_embedding_usage , track_usage
11
+ from agents_api .queries .usage .create_usage_record import create_usage_record
12
+ from litellm import cost_per_token
13
+ from litellm .utils import Message , ModelResponse , Usage , token_counter
11
14
from ward import test
12
15
13
- from agents_api .common .utils .usage import track_usage , track_embedding_usage
14
- from agents_api .queries .usage .create_usage_record import create_usage_record
15
- from litellm .utils import ModelResponse , Usage , Choices , Message
16
- from agents_api .clients .pg import create_db_pool
17
16
from .fixtures import pg_dsn , test_developer_id
18
- from litellm import cost_per_token
19
- from litellm .utils import token_counter
17
+
20
18
21
19
@test ("query: create_usage_record creates a record with correct parameters" )
22
20
async def _ (dsn = pg_dsn , developer_id = test_developer_id ) -> None :
@@ -31,10 +29,10 @@ async def _(dsn=pg_dsn, developer_id=test_developer_id) -> None:
31
29
assert len (response ) == 1
32
30
record = response [0 ]
33
31
assert record ["developer_id" ] == developer_id
34
- assert record ["model" ] == ' gpt-4o-mini'
32
+ assert record ["model" ] == " gpt-4o-mini"
35
33
assert record ["prompt_tokens" ] == 100
36
34
assert record ["completion_tokens" ] == 100
37
- assert record ["cost" ] == Decimal (' 0.000075' )
35
+ assert record ["cost" ] == Decimal (" 0.000075" )
38
36
assert record ["estimated" ] is False
39
37
assert record ["custom_api_used" ] is False
40
38
assert record ["metadata" ] == {}
@@ -60,7 +58,7 @@ async def _(dsn=pg_dsn, developer_id=test_developer_id) -> None:
60
58
"meta-llama/llama-4-maverick:free" ,
61
59
"qwen/qwen-2.5-72b-instruct" ,
62
60
"sao10k/l3.3-euryale-70b" ,
63
- "sao10k/l3.1-euryale-70b"
61
+ "sao10k/l3.1-euryale-70b" ,
64
62
]
65
63
for model in models :
66
64
response = await create_usage_record (
@@ -86,9 +84,11 @@ async def _(dsn=pg_dsn, developer_id=test_developer_id) -> None:
86
84
connection_pool = pool ,
87
85
)
88
86
89
- input_cost , completion_cost = cost_per_token ("gpt-4o-mini" , prompt_tokens = 2041 , completion_tokens = 34198 )
87
+ input_cost , completion_cost = cost_per_token (
88
+ "gpt-4o-mini" , prompt_tokens = 2041 , completion_tokens = 34198
89
+ )
90
90
cost = input_cost + completion_cost
91
- cost = Decimal (str (cost )).quantize (Decimal (' 0.000001' ))
91
+ cost = Decimal (str (cost )).quantize (Decimal (" 0.000001" ))
92
92
93
93
assert len (response ) == 1
94
94
record = response [0 ]
@@ -125,13 +125,14 @@ async def _(dsn=pg_dsn, developer_id=test_developer_id) -> None:
125
125
126
126
assert len (response ) == 1
127
127
record = response [0 ]
128
- assert record ["cost" ] == Decimal (' 0.000000' )
128
+ assert record ["cost" ] == Decimal (" 0.000000" )
129
129
assert record ["estimated" ] is True
130
130
131
+
131
132
@test ("query: create_usage_record with fallback pricing with model not in fallback pricing" )
132
133
async def _ (dsn = pg_dsn , developer_id = test_developer_id ) -> None :
133
134
pool = await create_db_pool (dsn = dsn )
134
-
135
+
135
136
with patch ("builtins.print" ) as mock_print :
136
137
unknown_model = "unknown-model-name"
137
138
response = await create_usage_record (
@@ -146,7 +147,7 @@ async def _(dsn=pg_dsn, developer_id=test_developer_id) -> None:
146
147
147
148
assert len (response ) == 1
148
149
record = response [0 ]
149
- assert record ["cost" ] == Decimal (' 0.000000' )
150
+ assert record ["cost" ] == Decimal (" 0.000000" )
150
151
assert record ["estimated" ] is True
151
152
assert expected_call == actual_call
152
153
@@ -171,23 +172,28 @@ async def _(developer_id=test_developer_id) -> None:
171
172
assert call_args ["prompt_tokens" ] == 100
172
173
assert call_args ["completion_tokens" ] == 100
173
174
174
-
175
+
175
176
@test ("utils: track_usage without response.usage" )
176
177
async def _ (developer_id = test_developer_id ) -> None :
177
178
with patch ("agents_api.common.utils.usage.create_usage_record" ) as mock_create_usage_record :
178
179
response = ModelResponse (
179
180
usage = None ,
180
- choices = [{
181
- "finish_reason" : "stop" ,
182
- "index" : 0 ,
183
- "message" : Message (content = "Hello, world!" , role = "assistant" )
184
- }]
181
+ choices = [
182
+ {
183
+ "finish_reason" : "stop" ,
184
+ "index" : 0 ,
185
+ "message" : Message (content = "Hello, world!" , role = "assistant" ),
186
+ }
187
+ ],
185
188
)
186
189
response .usage = None
187
190
messages = [{"role" : "user" , "content" : "Hello, world!" }]
188
191
189
192
prompt_tokens = token_counter (model = "gpt-4o-mini" , messages = messages )
190
- completion_tokens = token_counter (model = "gpt-4o-mini" , messages = [{"content" : choice .message .content } for choice in response .choices ])
193
+ completion_tokens = token_counter (
194
+ model = "gpt-4o-mini" ,
195
+ messages = [{"content" : choice .message .content } for choice in response .choices ],
196
+ )
191
197
192
198
await track_usage (
193
199
developer_id = developer_id ,
@@ -210,16 +216,16 @@ async def _(developer_id=test_developer_id) -> None:
210
216
completion_tokens = 0 ,
211
217
),
212
218
)
213
-
219
+
214
220
inputs = ["This is a test input for embedding" ]
215
-
221
+
216
222
await track_embedding_usage (
217
223
developer_id = developer_id ,
218
224
model = "text-embedding-3-large" ,
219
225
inputs = inputs ,
220
226
response = response ,
221
227
)
222
-
228
+
223
229
call_args = mock_create_usage_record .call_args [1 ]
224
230
assert call_args ["prompt_tokens" ] == 150
225
231
assert call_args ["completion_tokens" ] == 0
@@ -231,20 +237,23 @@ async def _(developer_id=test_developer_id) -> None:
231
237
with patch ("agents_api.common.utils.usage.create_usage_record" ) as mock_create_usage_record :
232
238
response = ModelResponse ()
233
239
response .usage = None
234
-
240
+
235
241
inputs = ["First test input" , "Second test input" ]
236
-
242
+
237
243
# Calculate expected tokens manually
238
- expected_tokens = sum (token_counter (model = "text-embedding-3-large" , text = input_text ) for input_text in inputs )
239
-
244
+ expected_tokens = sum (
245
+ token_counter (model = "text-embedding-3-large" , text = input_text )
246
+ for input_text in inputs
247
+ )
248
+
240
249
await track_embedding_usage (
241
250
developer_id = developer_id ,
242
251
model = "text-embedding-3-large" ,
243
252
inputs = inputs ,
244
253
response = response ,
245
254
)
246
-
255
+
247
256
call_args = mock_create_usage_record .call_args [1 ]
248
257
assert call_args ["prompt_tokens" ] == expected_tokens
249
258
assert call_args ["completion_tokens" ] == 0
250
- assert call_args ["model" ] == "text-embedding-3-large"
259
+ assert call_args ["model" ] == "text-embedding-3-large"
0 commit comments