@@ -166,9 +166,10 @@ async def test_single_completion(server, client: openai.AsyncOpenAI,
166
166
167
167
assert completion .id is not None
168
168
assert completion .choices is not None and len (completion .choices ) == 1
169
- assert completion .choices [0 ].text is not None and len (
170
- completion .choices [0 ].text ) >= 5
171
- assert completion .choices [0 ].finish_reason == "length"
169
+
170
+ choice = completion .choices [0 ]
171
+ assert len (choice .text ) >= 5
172
+ assert choice .finish_reason == "length"
172
173
assert completion .usage == openai .types .CompletionUsage (
173
174
completion_tokens = 5 , prompt_tokens = 6 , total_tokens = 11 )
174
175
@@ -179,8 +180,7 @@ async def test_single_completion(server, client: openai.AsyncOpenAI,
179
180
max_tokens = 5 ,
180
181
temperature = 0.0 ,
181
182
)
182
- assert completion .choices [0 ].text is not None and len (
183
- completion .choices [0 ].text ) >= 5
183
+ assert len (completion .choices [0 ].text ) >= 5
184
184
185
185
186
186
@pytest .mark .asyncio
@@ -205,9 +205,9 @@ async def test_no_logprobs(server, client: openai.AsyncOpenAI,
205
205
206
206
@pytest .mark .asyncio
207
207
@pytest .mark .parametrize (
208
- # first test base model, then test loras
208
+ # just test 1 lora hereafter
209
209
"model_name" ,
210
- [MODEL_NAME , "zephyr-lora" , "zephyr-lora2" ],
210
+ [MODEL_NAME , "zephyr-lora" ],
211
211
)
212
212
async def test_zero_logprobs (server , client : openai .AsyncOpenAI ,
213
213
model_name : str ):
@@ -290,55 +290,7 @@ async def test_too_many_completion_logprobs(server, client: openai.AsyncOpenAI,
290
290
max_tokens = 5 ,
291
291
temperature = 0.0 ,
292
292
)
293
- completion = completion .choices [0 ].text
294
- assert completion is not None and len (completion ) >= 0
295
-
296
-
297
- @pytest .mark .asyncio
298
- @pytest .mark .parametrize (
299
- # just test 1 lora hereafter
300
- "model_name" ,
301
- [MODEL_NAME , "zephyr-lora" ],
302
- )
303
- async def test_single_chat_session (server , client : openai .AsyncOpenAI ,
304
- model_name : str ):
305
- messages = [{
306
- "role" : "system" ,
307
- "content" : "you are a helpful assistant"
308
- }, {
309
- "role" : "user" ,
310
- "content" : "what is 1+1?"
311
- }]
312
-
313
- # test single completion
314
- chat_completion = await client .chat .completions .create (model = model_name ,
315
- messages = messages ,
316
- max_tokens = 10 ,
317
- logprobs = True ,
318
- top_logprobs = 5 )
319
- assert chat_completion .id is not None
320
- assert chat_completion .choices is not None and len (
321
- chat_completion .choices ) == 1
322
- assert chat_completion .choices [0 ].message is not None
323
- assert chat_completion .choices [0 ].logprobs is not None
324
- assert chat_completion .choices [0 ].logprobs .content [
325
- 0 ].top_logprobs is not None
326
- assert len (
327
- chat_completion .choices [0 ].logprobs .content [0 ].top_logprobs ) == 5
328
- message = chat_completion .choices [0 ].message
329
- assert message .content is not None and len (message .content ) >= 10
330
- assert message .role == "assistant"
331
- messages .append ({"role" : "assistant" , "content" : message .content })
332
-
333
- # test multi-turn dialogue
334
- messages .append ({"role" : "user" , "content" : "express your result in json" })
335
- chat_completion = await client .chat .completions .create (
336
- model = model_name ,
337
- messages = messages ,
338
- max_tokens = 10 ,
339
- )
340
- message = chat_completion .choices [0 ].message
341
- assert message .content is not None and len (message .content ) >= 0
293
+ assert len (completion .choices [0 ].text ) >= 0
342
294
343
295
344
296
@pytest .mark .asyncio
@@ -393,7 +345,7 @@ async def test_zero_logprobs_chat(server, client: openai.AsyncOpenAI,
393
345
choice = chat_completion .choices [0 ]
394
346
assert choice .logprobs is not None
395
347
assert choice .logprobs .content is not None
396
- assert len (choice .logprobs .content [0 ].top_logprobs ) <= 1
348
+ assert len (choice .logprobs .content [0 ].top_logprobs ) == 0
397
349
398
350
399
351
@pytest .mark .asyncio
@@ -421,11 +373,14 @@ async def test_some_logprobs_chat(server, client: openai.AsyncOpenAI,
421
373
choice = chat_completion .choices [0 ]
422
374
assert choice .logprobs is not None
423
375
assert choice .logprobs .content is not None
424
- assert len (choice .logprobs .content [0 ].top_logprobs ) <= 6
376
+ assert len (choice .logprobs .content [0 ].top_logprobs ) == 5
425
377
426
378
427
379
@pytest .mark .asyncio
428
- @pytest .mark .parametrize ("model_name" , [MODEL_NAME ])
380
+ @pytest .mark .parametrize (
381
+ "model_name" ,
382
+ [MODEL_NAME , "zephyr-lora" ],
383
+ )
429
384
async def test_too_many_chat_logprobs (server , client : openai .AsyncOpenAI ,
430
385
model_name : str ):
431
386
messages = [{
@@ -466,7 +421,51 @@ async def test_too_many_chat_logprobs(server, client: openai.AsyncOpenAI,
466
421
467
422
@pytest .mark .asyncio
468
423
@pytest .mark .parametrize (
469
- # just test 1 lora hereafter
424
+ "model_name" ,
425
+ [MODEL_NAME , "zephyr-lora" ],
426
+ )
427
+ async def test_single_chat_session (server , client : openai .AsyncOpenAI ,
428
+ model_name : str ):
429
+ messages = [{
430
+ "role" : "system" ,
431
+ "content" : "you are a helpful assistant"
432
+ }, {
433
+ "role" : "user" ,
434
+ "content" : "what is 1+1?"
435
+ }]
436
+
437
+ # test single completion
438
+ chat_completion = await client .chat .completions .create (model = model_name ,
439
+ messages = messages ,
440
+ max_tokens = 10 ,
441
+ logprobs = True ,
442
+ top_logprobs = 5 )
443
+ assert chat_completion .id is not None
444
+ assert len (chat_completion .choices ) == 1
445
+
446
+ choice = chat_completion .choices [0 ]
447
+ assert choice .finish_reason == "length"
448
+ assert chat_completion .usage == openai .types .CompletionUsage (
449
+ completion_tokens = 10 , prompt_tokens = 37 , total_tokens = 47 )
450
+
451
+ message = choice .message
452
+ assert message .content is not None and len (message .content ) >= 10
453
+ assert message .role == "assistant"
454
+ messages .append ({"role" : "assistant" , "content" : message .content })
455
+
456
+ # test multi-turn dialogue
457
+ messages .append ({"role" : "user" , "content" : "express your result in json" })
458
+ chat_completion = await client .chat .completions .create (
459
+ model = model_name ,
460
+ messages = messages ,
461
+ max_tokens = 10 ,
462
+ )
463
+ message = chat_completion .choices [0 ].message
464
+ assert message .content is not None and len (message .content ) >= 0
465
+
466
+
467
+ @pytest .mark .asyncio
468
+ @pytest .mark .parametrize (
470
469
"model_name" ,
471
470
[MODEL_NAME , "zephyr-lora" ],
472
471
)
@@ -752,8 +751,7 @@ async def test_logits_bias(server, client: openai.AsyncOpenAI):
752
751
logit_bias = {str (token_id ): 100 },
753
752
seed = 42 ,
754
753
)
755
- assert completion .choices [0 ].text is not None and len (
756
- completion .choices [0 ].text ) >= 5
754
+ assert len (completion .choices [0 ].text ) >= 5
757
755
response_tokens = tokenizer (completion .choices [0 ].text ,
758
756
add_special_tokens = False )["input_ids" ]
759
757
expected_tokens = tokenizer (tokenizer .decode ([token_id ] * 5 ),
@@ -800,9 +798,8 @@ async def test_guided_json_completion(server, client: openai.AsyncOpenAI,
800
798
guided_decoding_backend = guided_decoding_backend ))
801
799
802
800
assert completion .id is not None
803
- assert completion . choices is not None and len (completion .choices ) == 3
801
+ assert len (completion .choices ) == 3
804
802
for i in range (3 ):
805
- assert completion .choices [i ].text is not None
806
803
output_json = json .loads (completion .choices [i ].text )
807
804
jsonschema .validate (instance = output_json , schema = TEST_SCHEMA )
808
805
@@ -869,9 +866,8 @@ async def test_guided_regex_completion(server, client: openai.AsyncOpenAI,
869
866
guided_decoding_backend = guided_decoding_backend ))
870
867
871
868
assert completion .id is not None
872
- assert completion . choices is not None and len (completion .choices ) == 3
869
+ assert len (completion .choices ) == 3
873
870
for i in range (3 ):
874
- assert completion .choices [i ].text is not None
875
871
assert re .fullmatch (TEST_REGEX , completion .choices [i ].text ) is not None
876
872
877
873
@@ -928,7 +924,7 @@ async def test_guided_choice_completion(server, client: openai.AsyncOpenAI,
928
924
guided_decoding_backend = guided_decoding_backend ))
929
925
930
926
assert completion .id is not None
931
- assert completion . choices is not None and len (completion .choices ) == 2
927
+ assert len (completion .choices ) == 2
932
928
for i in range (2 ):
933
929
assert completion .choices [i ].text in TEST_CHOICE
934
930
@@ -1030,12 +1026,14 @@ async def test_guided_choice_chat_logprobs(server, client: openai.AsyncOpenAI,
1030
1026
top_logprobs = 5 ,
1031
1027
extra_body = dict (guided_choice = TEST_CHOICE ,
1032
1028
guided_decoding_backend = guided_decoding_backend ))
1029
+
1030
+ assert chat_completion .choices [0 ].logprobs is not None
1031
+ assert chat_completion .choices [0 ].logprobs .content is not None
1033
1032
top_logprobs = chat_completion .choices [0 ].logprobs .content [0 ].top_logprobs
1034
1033
1035
1034
# -9999.0 is the minimum logprob returned by OpenAI
1036
- assert all (
1037
- isinstance (token .logprob , float ) and token .logprob >= - 9999.0
1038
- for token in top_logprobs )
1035
+ for item in top_logprobs :
1036
+ assert item .logprob >= - 9999.0 , f"Failed (top_logprobs={ top_logprobs } )"
1039
1037
1040
1038
1041
1039
@pytest .mark .asyncio
@@ -1237,6 +1235,8 @@ async def test_response_format_json_object(server, client: openai.AsyncOpenAI):
1237
1235
response_format = {"type" : "json_object" })
1238
1236
1239
1237
content = resp .choices [0 ].message .content
1238
+ assert content is not None
1239
+
1240
1240
loaded = json .loads (content )
1241
1241
assert loaded == {"result" : 2 }, loaded
1242
1242
@@ -1364,8 +1364,7 @@ async def test_echo_logprob_completion(server, client: openai.AsyncOpenAI,
1364
1364
1365
1365
prompt_text = tokenizer .decode (prompt ) if isinstance (prompt ,
1366
1366
list ) else prompt
1367
- assert (completion .choices [0 ].text is not None
1368
- and re .search (r"^" + prompt_text , completion .choices [0 ].text ))
1367
+ assert re .search (r"^" + prompt_text , completion .choices [0 ].text )
1369
1368
logprobs = completion .choices [0 ].logprobs
1370
1369
assert logprobs is not None
1371
1370
assert len (logprobs .text_offset ) > 5
@@ -1406,32 +1405,32 @@ async def test_long_seed(server, client: openai.AsyncOpenAI):
1406
1405
)
1407
1406
async def test_single_embedding (embedding_server , client : openai .AsyncOpenAI ,
1408
1407
model_name : str ):
1409
- input = [
1408
+ input_texts = [
1410
1409
"The chef prepared a delicious meal." ,
1411
1410
]
1412
1411
1413
1412
# test single embedding
1414
1413
embeddings = await client .embeddings .create (
1415
1414
model = model_name ,
1416
- input = input ,
1415
+ input = input_texts ,
1417
1416
encoding_format = "float" ,
1418
1417
)
1419
1418
assert embeddings .id is not None
1420
- assert embeddings . data is not None and len (embeddings .data ) == 1
1419
+ assert len (embeddings .data ) == 1
1421
1420
assert len (embeddings .data [0 ].embedding ) == 4096
1422
1421
assert embeddings .usage .completion_tokens == 0
1423
1422
assert embeddings .usage .prompt_tokens == 9
1424
1423
assert embeddings .usage .total_tokens == 9
1425
1424
1426
1425
# test using token IDs
1427
- input = [1 , 1 , 1 , 1 , 1 ]
1426
+ input_tokens = [1 , 1 , 1 , 1 , 1 ]
1428
1427
embeddings = await client .embeddings .create (
1429
1428
model = model_name ,
1430
- input = input ,
1429
+ input = input_tokens ,
1431
1430
encoding_format = "float" ,
1432
1431
)
1433
1432
assert embeddings .id is not None
1434
- assert embeddings . data is not None and len (embeddings .data ) == 1
1433
+ assert len (embeddings .data ) == 1
1435
1434
assert len (embeddings .data [0 ].embedding ) == 4096
1436
1435
assert embeddings .usage .completion_tokens == 0
1437
1436
assert embeddings .usage .prompt_tokens == 5
@@ -1446,29 +1445,29 @@ async def test_single_embedding(embedding_server, client: openai.AsyncOpenAI,
1446
1445
async def test_batch_embedding (embedding_server , client : openai .AsyncOpenAI ,
1447
1446
model_name : str ):
1448
1447
# test List[str]
1449
- inputs = [
1448
+ input_texts = [
1450
1449
"The cat sat on the mat." , "A feline was resting on a rug." ,
1451
1450
"Stars twinkle brightly in the night sky."
1452
1451
]
1453
1452
embeddings = await client .embeddings .create (
1454
1453
model = model_name ,
1455
- input = inputs ,
1454
+ input = input_texts ,
1456
1455
encoding_format = "float" ,
1457
1456
)
1458
1457
assert embeddings .id is not None
1459
- assert embeddings . data is not None and len (embeddings .data ) == 3
1458
+ assert len (embeddings .data ) == 3
1460
1459
assert len (embeddings .data [0 ].embedding ) == 4096
1461
1460
1462
1461
# test List[List[int]]
1463
- inputs = [[4 , 5 , 7 , 9 , 20 ], [15 , 29 , 499 ], [24 , 24 , 24 , 24 , 24 ],
1464
- [25 , 32 , 64 , 77 ]]
1462
+ input_tokens = [[4 , 5 , 7 , 9 , 20 ], [15 , 29 , 499 ], [24 , 24 , 24 , 24 , 24 ],
1463
+ [25 , 32 , 64 , 77 ]]
1465
1464
embeddings = await client .embeddings .create (
1466
1465
model = model_name ,
1467
- input = inputs ,
1466
+ input = input_tokens ,
1468
1467
encoding_format = "float" ,
1469
1468
)
1470
1469
assert embeddings .id is not None
1471
- assert embeddings . data is not None and len (embeddings .data ) == 4
1470
+ assert len (embeddings .data ) == 4
1472
1471
assert len (embeddings .data [0 ].embedding ) == 4096
1473
1472
assert embeddings .usage .completion_tokens == 0
1474
1473
assert embeddings .usage .prompt_tokens == 17
0 commit comments