Closed
Description
Your current environment
docker pull vllm/vllm-openai:latest
docker stop pixtral ; docker remove pixtral
docker run -d --restart=always \
--runtime=nvidia \
--gpus '"device=MIG-2ea01c20-8e9b-54a7-a91b-f308cd216a95"' \
--shm-size=10.24gb \
-p 5001:5001 \
-e NCCL_IGNORE_DISABLED_P2P=1 \
-e HUGGING_FACE_HUB_TOKEN=$HUGGING_FACE_HUB_TOKEN \
-e VLLM_NCCL_SO_PATH=/usr/local/lib/python3.10/dist-packages/nvidia/nccl/lib/libnccl.so.2 \
-v /etc/passwd:/etc/passwd:ro \
-v /etc/group:/etc/group:ro \
-u `id -u`:`id -g` \
-v "${HOME}"/.cache:$HOME/.cache/ \
-v "${HOME}"/.cache/huggingface:$HOME/.cache/huggingface \
-v "${HOME}"/.cache/huggingface/hub:$HOME/.cache/huggingface/hub \
-v "${HOME}"/.config:$HOME/.config/ -v "${HOME}"/.triton:$HOME/.triton/ \
--network host \
--name pixtral \
vllm/vllm-openai:latest \
--port=5001 \
--host=0.0.0.0 \
--model=mistralai/Pixtral-12B-2409 \
--seed 1234 \
--tensor-parallel-size=1 \
--gpu-memory-utilization 0.98 \
--enforce-eager \
--tokenizer_mode mistral \
--limit_mm_per_prompt 'image=8' \
--max-model-len=32768 \
--max-num-batched-tokens=32768 \
--max-log-len=100 \
--download-dir=$HOME/.cache/huggingface/hub &>> logs.vllm_server.pixtral.txt
All seems to work for sending an image query. But as soon as I try any simple guided_json or guided_choice, it always fails.
from openai import OpenAI
base_url = 'http://IP:80/v1' # replace IP
api_key = "EMPTY"
client_args = dict(base_url=base_url, api_key=api_key)
openai_client = OpenAI(**client_args)
prompt = """<all_documents>
<doc>
<name>roses.pdf</name>
<page>1</page>
<text>
I like red roses, and red elephants.
</text>
</doc>
</all_documents>
<response_format_instructions>
Ensure you follow this JSON schema, and ensure to use the same key names as the schema:
\`\`\`json
{"color": {"type": "string"}}
\`\`\`
</response_format_instructions>
What do I like?"""
guided_json = {"type": "object",
"properties": {
"color": {
"type": "string"
}
}
}
messages = [{'role': 'user', 'content': prompt}]
stream = False
client_kwargs = dict(model='mistralai/Pixtral-12B-2409',
max_tokens=2048, stream=stream, messages=messages,
response_format=dict(type='json_object'),
extra_body=dict(guided_json=guided_json))
client = openai_client.chat.completions
responses = client.create(**client_kwargs)
text = responses.choices[0].message.content
print(text)
gives:
Traceback (most recent call last):
File "/home/jon/h2ogpt/check_openai_json1.py", line 51, in <module>
responses = client.create(**client_kwargs)
File "/home/jon/miniconda3/envs/h2ogpt/lib/python3.10/site-packages/openai/_utils/_utils.py", line 274, in wrapper
return func(*args, **kwargs)
File "/home/jon/miniconda3/envs/h2ogpt/lib/python3.10/site-packages/openai/resources/chat/completions.py", line 668, in create
return self._post(
File "/home/jon/miniconda3/envs/h2ogpt/lib/python3.10/site-packages/openai/_base_client.py", line 1259, in post
return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
File "/home/jon/miniconda3/envs/h2ogpt/lib/python3.10/site-packages/openai/_base_client.py", line 936, in request
return self._request(
File "/home/jon/miniconda3/envs/h2ogpt/lib/python3.10/site-packages/openai/_base_client.py", line 1025, in _request
return self._retry_request(
File "/home/jon/miniconda3/envs/h2ogpt/lib/python3.10/site-packages/openai/_base_client.py", line 1074, in _retry_request
return self._request(
File "/home/jon/miniconda3/envs/h2ogpt/lib/python3.10/site-packages/openai/_base_client.py", line 1025, in _request
return self._retry_request(
File "/home/jon/miniconda3/envs/h2ogpt/lib/python3.10/site-packages/openai/_base_client.py", line 1074, in _retry_request
return self._request(
File "/home/jon/miniconda3/envs/h2ogpt/lib/python3.10/site-packages/openai/_base_client.py", line 1040, in _request
raise self._make_status_error_from_response(err.response) from None
openai.InternalServerError: Error code: 500
and vllm shows:
INFO: 172.16.0.101:3146 - "GET /v1/models HTTP/1.1" 200 OK
INFO: 172.16.0.101:3146 - "GET /v1/models HTTP/1.1" 200 OK
INFO: 172.16.0.101:3146 - "POST /v1/chat/completions HTTP/1.1" 500 Internal Server Error
Model Input Dumps
No response
🐛 Describe the bug
See above.
Before submitting a new issue...
- Make sure you already searched for relevant issues, and asked the chatbot living at the bottom right corner of the documentation page, which can answer lots of frequently asked questions.