Skip to content
This repository was archived by the owner on Oct 11, 2024. It is now read-only.

Commit 4be9ac5

Browse files
DarkLight1337Robert Shaw
authored andcommitted
[Bugfix][Frontend] Cleanup "fix chat logprobs" (vllm-project#5026)
1 parent 74eb6ab commit 4be9ac5

File tree

6 files changed

+122
-123
lines changed

6 files changed

+122
-123
lines changed

tests/async_engine/test_openapi_server_ray.py

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,8 @@ async def test_single_completion(server, client: openai.AsyncOpenAI):
5555
temperature=0.0)
5656

5757
assert completion.id is not None
58-
assert completion.choices is not None and len(completion.choices) == 1
59-
assert completion.choices[0].text is not None and len(
60-
completion.choices[0].text) >= 5
58+
assert len(completion.choices) == 1
59+
assert len(completion.choices[0].text) >= 5
6160
assert completion.choices[0].finish_reason == "length"
6261
assert completion.usage == openai.types.CompletionUsage(
6362
completion_tokens=5, prompt_tokens=6, total_tokens=11)
@@ -69,8 +68,7 @@ async def test_single_completion(server, client: openai.AsyncOpenAI):
6968
max_tokens=5,
7069
temperature=0.0,
7170
)
72-
assert completion.choices[0].text is not None and len(
73-
completion.choices[0].text) >= 5
71+
assert len(completion.choices[0].text) >= 5
7472

7573

7674
@pytest.mark.asyncio
@@ -90,15 +88,14 @@ async def test_single_chat_session(server, client: openai.AsyncOpenAI):
9088
logprobs=True,
9189
top_logprobs=5)
9290
assert chat_completion.id is not None
93-
assert chat_completion.choices is not None and len(
94-
chat_completion.choices) == 1
95-
assert chat_completion.choices[0].message is not None
96-
assert chat_completion.choices[0].logprobs is not None
97-
assert chat_completion.choices[0].logprobs.content[
98-
0].top_logprobs is not None
99-
assert len(
100-
chat_completion.choices[0].logprobs.content[0].top_logprobs) == 5
101-
message = chat_completion.choices[0].message
91+
assert len(chat_completion.choices) == 1
92+
93+
choice = chat_completion.choices[0]
94+
assert choice.finish_reason == "length"
95+
assert chat_completion.usage == openai.types.CompletionUsage(
96+
completion_tokens=10, prompt_tokens=13, total_tokens=23)
97+
98+
message = choice.message
10299
assert message.content is not None and len(message.content) >= 10
103100
assert message.role == "assistant"
104101
messages.append({"role": "assistant", "content": message.content})

tests/entrypoints/test_openai_server.py

Lines changed: 84 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -166,9 +166,10 @@ async def test_single_completion(server, client: openai.AsyncOpenAI,
166166

167167
assert completion.id is not None
168168
assert completion.choices is not None and len(completion.choices) == 1
169-
assert completion.choices[0].text is not None and len(
170-
completion.choices[0].text) >= 5
171-
assert completion.choices[0].finish_reason == "length"
169+
170+
choice = completion.choices[0]
171+
assert len(choice.text) >= 5
172+
assert choice.finish_reason == "length"
172173
assert completion.usage == openai.types.CompletionUsage(
173174
completion_tokens=5, prompt_tokens=6, total_tokens=11)
174175

@@ -179,8 +180,7 @@ async def test_single_completion(server, client: openai.AsyncOpenAI,
179180
max_tokens=5,
180181
temperature=0.0,
181182
)
182-
assert completion.choices[0].text is not None and len(
183-
completion.choices[0].text) >= 5
183+
assert len(completion.choices[0].text) >= 5
184184

185185

186186
@pytest.mark.asyncio
@@ -205,9 +205,9 @@ async def test_no_logprobs(server, client: openai.AsyncOpenAI,
205205

206206
@pytest.mark.asyncio
207207
@pytest.mark.parametrize(
208-
# first test base model, then test loras
208+
# just test 1 lora hereafter
209209
"model_name",
210-
[MODEL_NAME, "zephyr-lora", "zephyr-lora2"],
210+
[MODEL_NAME, "zephyr-lora"],
211211
)
212212
async def test_zero_logprobs(server, client: openai.AsyncOpenAI,
213213
model_name: str):
@@ -290,55 +290,7 @@ async def test_too_many_completion_logprobs(server, client: openai.AsyncOpenAI,
290290
max_tokens=5,
291291
temperature=0.0,
292292
)
293-
completion = completion.choices[0].text
294-
assert completion is not None and len(completion) >= 0
295-
296-
297-
@pytest.mark.asyncio
298-
@pytest.mark.parametrize(
299-
# just test 1 lora hereafter
300-
"model_name",
301-
[MODEL_NAME, "zephyr-lora"],
302-
)
303-
async def test_single_chat_session(server, client: openai.AsyncOpenAI,
304-
model_name: str):
305-
messages = [{
306-
"role": "system",
307-
"content": "you are a helpful assistant"
308-
}, {
309-
"role": "user",
310-
"content": "what is 1+1?"
311-
}]
312-
313-
# test single completion
314-
chat_completion = await client.chat.completions.create(model=model_name,
315-
messages=messages,
316-
max_tokens=10,
317-
logprobs=True,
318-
top_logprobs=5)
319-
assert chat_completion.id is not None
320-
assert chat_completion.choices is not None and len(
321-
chat_completion.choices) == 1
322-
assert chat_completion.choices[0].message is not None
323-
assert chat_completion.choices[0].logprobs is not None
324-
assert chat_completion.choices[0].logprobs.content[
325-
0].top_logprobs is not None
326-
assert len(
327-
chat_completion.choices[0].logprobs.content[0].top_logprobs) == 5
328-
message = chat_completion.choices[0].message
329-
assert message.content is not None and len(message.content) >= 10
330-
assert message.role == "assistant"
331-
messages.append({"role": "assistant", "content": message.content})
332-
333-
# test multi-turn dialogue
334-
messages.append({"role": "user", "content": "express your result in json"})
335-
chat_completion = await client.chat.completions.create(
336-
model=model_name,
337-
messages=messages,
338-
max_tokens=10,
339-
)
340-
message = chat_completion.choices[0].message
341-
assert message.content is not None and len(message.content) >= 0
293+
assert len(completion.choices[0].text) >= 0
342294

343295

344296
@pytest.mark.asyncio
@@ -393,7 +345,7 @@ async def test_zero_logprobs_chat(server, client: openai.AsyncOpenAI,
393345
choice = chat_completion.choices[0]
394346
assert choice.logprobs is not None
395347
assert choice.logprobs.content is not None
396-
assert len(choice.logprobs.content[0].top_logprobs) <= 1
348+
assert len(choice.logprobs.content[0].top_logprobs) == 0
397349

398350

399351
@pytest.mark.asyncio
@@ -421,11 +373,14 @@ async def test_some_logprobs_chat(server, client: openai.AsyncOpenAI,
421373
choice = chat_completion.choices[0]
422374
assert choice.logprobs is not None
423375
assert choice.logprobs.content is not None
424-
assert len(choice.logprobs.content[0].top_logprobs) <= 6
376+
assert len(choice.logprobs.content[0].top_logprobs) == 5
425377

426378

427379
@pytest.mark.asyncio
428-
@pytest.mark.parametrize("model_name", [MODEL_NAME])
380+
@pytest.mark.parametrize(
381+
"model_name",
382+
[MODEL_NAME, "zephyr-lora"],
383+
)
429384
async def test_too_many_chat_logprobs(server, client: openai.AsyncOpenAI,
430385
model_name: str):
431386
messages = [{
@@ -466,7 +421,51 @@ async def test_too_many_chat_logprobs(server, client: openai.AsyncOpenAI,
466421

467422
@pytest.mark.asyncio
468423
@pytest.mark.parametrize(
469-
# just test 1 lora hereafter
424+
"model_name",
425+
[MODEL_NAME, "zephyr-lora"],
426+
)
427+
async def test_single_chat_session(server, client: openai.AsyncOpenAI,
428+
model_name: str):
429+
messages = [{
430+
"role": "system",
431+
"content": "you are a helpful assistant"
432+
}, {
433+
"role": "user",
434+
"content": "what is 1+1?"
435+
}]
436+
437+
# test single completion
438+
chat_completion = await client.chat.completions.create(model=model_name,
439+
messages=messages,
440+
max_tokens=10,
441+
logprobs=True,
442+
top_logprobs=5)
443+
assert chat_completion.id is not None
444+
assert len(chat_completion.choices) == 1
445+
446+
choice = chat_completion.choices[0]
447+
assert choice.finish_reason == "length"
448+
assert chat_completion.usage == openai.types.CompletionUsage(
449+
completion_tokens=10, prompt_tokens=37, total_tokens=47)
450+
451+
message = choice.message
452+
assert message.content is not None and len(message.content) >= 10
453+
assert message.role == "assistant"
454+
messages.append({"role": "assistant", "content": message.content})
455+
456+
# test multi-turn dialogue
457+
messages.append({"role": "user", "content": "express your result in json"})
458+
chat_completion = await client.chat.completions.create(
459+
model=model_name,
460+
messages=messages,
461+
max_tokens=10,
462+
)
463+
message = chat_completion.choices[0].message
464+
assert message.content is not None and len(message.content) >= 0
465+
466+
467+
@pytest.mark.asyncio
468+
@pytest.mark.parametrize(
470469
"model_name",
471470
[MODEL_NAME, "zephyr-lora"],
472471
)
@@ -752,8 +751,7 @@ async def test_logits_bias(server, client: openai.AsyncOpenAI):
752751
logit_bias={str(token_id): 100},
753752
seed=42,
754753
)
755-
assert completion.choices[0].text is not None and len(
756-
completion.choices[0].text) >= 5
754+
assert len(completion.choices[0].text) >= 5
757755
response_tokens = tokenizer(completion.choices[0].text,
758756
add_special_tokens=False)["input_ids"]
759757
expected_tokens = tokenizer(tokenizer.decode([token_id] * 5),
@@ -800,9 +798,8 @@ async def test_guided_json_completion(server, client: openai.AsyncOpenAI,
800798
guided_decoding_backend=guided_decoding_backend))
801799

802800
assert completion.id is not None
803-
assert completion.choices is not None and len(completion.choices) == 3
801+
assert len(completion.choices) == 3
804802
for i in range(3):
805-
assert completion.choices[i].text is not None
806803
output_json = json.loads(completion.choices[i].text)
807804
jsonschema.validate(instance=output_json, schema=TEST_SCHEMA)
808805

@@ -869,9 +866,8 @@ async def test_guided_regex_completion(server, client: openai.AsyncOpenAI,
869866
guided_decoding_backend=guided_decoding_backend))
870867

871868
assert completion.id is not None
872-
assert completion.choices is not None and len(completion.choices) == 3
869+
assert len(completion.choices) == 3
873870
for i in range(3):
874-
assert completion.choices[i].text is not None
875871
assert re.fullmatch(TEST_REGEX, completion.choices[i].text) is not None
876872

877873

@@ -928,7 +924,7 @@ async def test_guided_choice_completion(server, client: openai.AsyncOpenAI,
928924
guided_decoding_backend=guided_decoding_backend))
929925

930926
assert completion.id is not None
931-
assert completion.choices is not None and len(completion.choices) == 2
927+
assert len(completion.choices) == 2
932928
for i in range(2):
933929
assert completion.choices[i].text in TEST_CHOICE
934930

@@ -1030,12 +1026,14 @@ async def test_guided_choice_chat_logprobs(server, client: openai.AsyncOpenAI,
10301026
top_logprobs=5,
10311027
extra_body=dict(guided_choice=TEST_CHOICE,
10321028
guided_decoding_backend=guided_decoding_backend))
1029+
1030+
assert chat_completion.choices[0].logprobs is not None
1031+
assert chat_completion.choices[0].logprobs.content is not None
10331032
top_logprobs = chat_completion.choices[0].logprobs.content[0].top_logprobs
10341033

10351034
# -9999.0 is the minimum logprob returned by OpenAI
1036-
assert all(
1037-
isinstance(token.logprob, float) and token.logprob >= -9999.0
1038-
for token in top_logprobs)
1035+
for item in top_logprobs:
1036+
assert item.logprob >= -9999.0, f"Failed (top_logprobs={top_logprobs})"
10391037

10401038

10411039
@pytest.mark.asyncio
@@ -1237,6 +1235,8 @@ async def test_response_format_json_object(server, client: openai.AsyncOpenAI):
12371235
response_format={"type": "json_object"})
12381236

12391237
content = resp.choices[0].message.content
1238+
assert content is not None
1239+
12401240
loaded = json.loads(content)
12411241
assert loaded == {"result": 2}, loaded
12421242

@@ -1364,8 +1364,7 @@ async def test_echo_logprob_completion(server, client: openai.AsyncOpenAI,
13641364

13651365
prompt_text = tokenizer.decode(prompt) if isinstance(prompt,
13661366
list) else prompt
1367-
assert (completion.choices[0].text is not None
1368-
and re.search(r"^" + prompt_text, completion.choices[0].text))
1367+
assert re.search(r"^" + prompt_text, completion.choices[0].text)
13691368
logprobs = completion.choices[0].logprobs
13701369
assert logprobs is not None
13711370
assert len(logprobs.text_offset) > 5
@@ -1406,32 +1405,32 @@ async def test_long_seed(server, client: openai.AsyncOpenAI):
14061405
)
14071406
async def test_single_embedding(embedding_server, client: openai.AsyncOpenAI,
14081407
model_name: str):
1409-
input = [
1408+
input_texts = [
14101409
"The chef prepared a delicious meal.",
14111410
]
14121411

14131412
# test single embedding
14141413
embeddings = await client.embeddings.create(
14151414
model=model_name,
1416-
input=input,
1415+
input=input_texts,
14171416
encoding_format="float",
14181417
)
14191418
assert embeddings.id is not None
1420-
assert embeddings.data is not None and len(embeddings.data) == 1
1419+
assert len(embeddings.data) == 1
14211420
assert len(embeddings.data[0].embedding) == 4096
14221421
assert embeddings.usage.completion_tokens == 0
14231422
assert embeddings.usage.prompt_tokens == 9
14241423
assert embeddings.usage.total_tokens == 9
14251424

14261425
# test using token IDs
1427-
input = [1, 1, 1, 1, 1]
1426+
input_tokens = [1, 1, 1, 1, 1]
14281427
embeddings = await client.embeddings.create(
14291428
model=model_name,
1430-
input=input,
1429+
input=input_tokens,
14311430
encoding_format="float",
14321431
)
14331432
assert embeddings.id is not None
1434-
assert embeddings.data is not None and len(embeddings.data) == 1
1433+
assert len(embeddings.data) == 1
14351434
assert len(embeddings.data[0].embedding) == 4096
14361435
assert embeddings.usage.completion_tokens == 0
14371436
assert embeddings.usage.prompt_tokens == 5
@@ -1446,29 +1445,29 @@ async def test_single_embedding(embedding_server, client: openai.AsyncOpenAI,
14461445
async def test_batch_embedding(embedding_server, client: openai.AsyncOpenAI,
14471446
model_name: str):
14481447
# test List[str]
1449-
inputs = [
1448+
input_texts = [
14501449
"The cat sat on the mat.", "A feline was resting on a rug.",
14511450
"Stars twinkle brightly in the night sky."
14521451
]
14531452
embeddings = await client.embeddings.create(
14541453
model=model_name,
1455-
input=inputs,
1454+
input=input_texts,
14561455
encoding_format="float",
14571456
)
14581457
assert embeddings.id is not None
1459-
assert embeddings.data is not None and len(embeddings.data) == 3
1458+
assert len(embeddings.data) == 3
14601459
assert len(embeddings.data[0].embedding) == 4096
14611460

14621461
# test List[List[int]]
1463-
inputs = [[4, 5, 7, 9, 20], [15, 29, 499], [24, 24, 24, 24, 24],
1464-
[25, 32, 64, 77]]
1462+
input_tokens = [[4, 5, 7, 9, 20], [15, 29, 499], [24, 24, 24, 24, 24],
1463+
[25, 32, 64, 77]]
14651464
embeddings = await client.embeddings.create(
14661465
model=model_name,
1467-
input=inputs,
1466+
input=input_tokens,
14681467
encoding_format="float",
14691468
)
14701469
assert embeddings.id is not None
1471-
assert embeddings.data is not None and len(embeddings.data) == 4
1470+
assert len(embeddings.data) == 4
14721471
assert len(embeddings.data[0].embedding) == 4096
14731472
assert embeddings.usage.completion_tokens == 0
14741473
assert embeddings.usage.prompt_tokens == 17

tests/tensorizer_loader/test_tensorizer.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -214,9 +214,8 @@ def test_openai_apiserver_with_tensorizer(vllm_runner, tmp_path):
214214
temperature=0.0)
215215

216216
assert completion.id is not None
217-
assert completion.choices is not None and len(completion.choices) == 1
218-
assert completion.choices[0].text is not None and len(
219-
completion.choices[0].text) >= 5
217+
assert len(completion.choices) == 1
218+
assert len(completion.choices[0].text) >= 5
220219
assert completion.choices[0].finish_reason == "length"
221220
assert completion.usage == openai.types.CompletionUsage(
222221
completion_tokens=5, prompt_tokens=6, total_tokens=11)

0 commit comments

Comments
 (0)