Skip to content

Commit 6c4908d

Browse files
authored
fixes
1 parent f2d8307 commit 6c4908d

File tree

2 files changed

+33
-11
lines changed

2 files changed

+33
-11
lines changed

tests/entrypoints/test_openai_server.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1367,37 +1367,51 @@ async def test_long_seed(client: openai.AsyncOpenAI):
13671367
or "less_than_equal" in exc_info.value.message)
13681368

13691369

1370+
@pytest.mark.asyncio
13701371
@pytest.mark.parametrize(
13711372
"model_name",
13721373
[MODEL_NAME],
13731374
)
13741375
async def test_tokenize(server, client: openai.AsyncOpenAI, model_name: str):
1376+
base_url = str(client.base_url)[:-3]
13751377
tokenizer = get_tokenizer(tokenizer_name=MODEL_NAME, tokenizer_mode="fast")
13761378

13771379
for add_special in [False, True]:
13781380
prompt = "This is a test prompt."
13791381
tokens = tokenizer.encode(prompt, add_special_tokens=add_special)
13801382

1381-
response = requests.post("http://localhost:8000/tokenize",
1383+
response = requests.post(base_url + "/tokenize",
13821384
json={
13831385
"add_special_tokens": add_special,
1386+
"model": model_name,
13841387
"prompt": prompt
13851388
})
1386-
assert response.json() == {"tokens": tokens}
1389+
response.raise_for_status()
1390+
assert response.json() == {
1391+
"tokens": tokens,
1392+
"count": len(tokens),
1393+
"max_model_len": 8192
1394+
}
13871395

13881396

1397+
@pytest.mark.asyncio
13891398
@pytest.mark.parametrize(
13901399
"model_name",
13911400
[MODEL_NAME],
13921401
)
13931402
async def test_detokenize(server, client: openai.AsyncOpenAI, model_name: str):
1394-
tokenizer = get_tokenizer(tokenizer_name=MODEL_NAME)
1403+
base_url = str(client.base_url)[:-3]
1404+
tokenizer = get_tokenizer(tokenizer_name=MODEL_NAME, tokenizer_mode="fast")
13951405

13961406
prompt = "This is a test prompt."
13971407
tokens = tokenizer.encode(prompt, add_special_tokens=False)
13981408

1399-
response = requests.post("http://localhost:8000/detokenize",
1400-
json={"tokens": tokens})
1409+
response = requests.post(base_url + "detokenize",
1410+
json={
1411+
"model": model_name,
1412+
"tokens": tokens
1413+
})
1414+
response.raise_for_status()
14011415
assert response.json() == {"prompt": prompt}
14021416

14031417

vllm/entrypoints/openai/api_server.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -94,16 +94,24 @@ async def health() -> Response:
9494

9595
@app.post("/tokenize")
9696
async def tokenize(request: TokenizeRequest):
97-
response = openai_serving_completion.create_tokenize(request)
98-
assert isinstance(response, TokenizeResponse)
99-
return JSONResponse(content=response.model_dump())
97+
generator = openai_serving_completion.create_tokenize(request)
98+
if isinstance(generator, ErrorResponse):
99+
return JSONResponse(content=generator.model_dump(),
100+
status_code=generator.code)
101+
else:
102+
assert isinstance(generator, TokenizeResponse)
103+
return JSONResponse(content=generator.model_dump())
100104

101105

102106
@app.post("/detokenize")
103107
async def detokenize(request: DetokenizeRequest):
104-
response = openai_serving_completion.create_detokenize(request)
105-
assert isinstance(response, DetokenizeResponse)
106-
return JSONResponse(content=response.model_dump())
108+
generator = openai_serving_completion.create_detokenize(request)
109+
if isinstance(generator, ErrorResponse):
110+
return JSONResponse(content=generator.model_dump(),
111+
status_code=generator.code)
112+
else:
113+
assert isinstance(generator, DetokenizeResponse)
114+
return JSONResponse(content=generator.model_dump())
107115

108116

109117
@app.get("/v1/models")

0 commit comments

Comments
 (0)