Skip to content

Commit

Permalink
[Fix] Add chat completion Example and simplify dependencies (vllm-pro…
Browse files Browse the repository at this point in the history
  • Loading branch information
zhuohan123 authored Jul 26, 2023
1 parent df5dd3c commit 82ad323
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 11 deletions.
33 changes: 33 additions & 0 deletions examples/openai_chatcompletion_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import openai

# Modify OpenAI's API key and API base to use vLLM's API server.
openai.api_key = "EMPTY"
openai.api_base = "http://localhost:8000/v1"

# List models API
models = openai.Model.list()
print("Models:", models)

model = models["data"][0]["id"]

# Chat completion API
chat_completion = openai.ChatCompletion.create(
model=model,
messages=[{
"role": "system",
"content": "You are a helpful assistant."
}, {
"role": "user",
"content": "Who won the world series in 2020?"
}, {
"role":
"assistant",
"content":
"The Los Angeles Dodgers won the World Series in 2020."
}, {
"role": "user",
"content": "Where was it played?"
}])

print("Chat completion results:")
print(chat_completion)
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,26 @@
# Modify OpenAI's API key and API base to use vLLM's API server.
openai.api_key = "EMPTY"
openai.api_base = "http://localhost:8000/v1"
model = "facebook/opt-125m"

# Test list models API
# List models API
models = openai.Model.list()
print("Models:", models)

# Test completion API
stream = True
model = models["data"][0]["id"]

# Completion API
stream = False
completion = openai.Completion.create(
model=model,
prompt="A robot may not injure a human being",
echo=False,
n=2,
best_of=3,
stream=stream,
logprobs=3)

# print the completion
print("Completion results:")
if stream:
for c in completion:
print(c)
else:
print("Completion result:", completion)
print(completion)
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,3 @@ xformers >= 0.0.19
fastapi
uvicorn
pydantic < 2 # Required for OpenAI server.
fschat # Required for OpenAI ChatCompletion Endpoint.
15 changes: 12 additions & 3 deletions vllm/entrypoints/openai/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,6 @@
from fastapi.exceptions import RequestValidationError
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, StreamingResponse
from fastchat.conversation import Conversation, SeparatorStyle
from fastchat.model.model_adapter import get_conversation_template

import uvicorn

from vllm.engine.arg_utils import AsyncEngineArgs
Expand All @@ -33,6 +30,13 @@
from vllm.transformers_utils.tokenizer import get_tokenizer
from vllm.utils import random_uuid

try:
from fastchat.conversation import Conversation, SeparatorStyle
from fastchat.model.model_adapter import get_conversation_template
_fastchat_available = True
except ImportError:
_fastchat_available = False

TIMEOUT_KEEP_ALIVE = 5 # seconds

logger = init_logger(__name__)
Expand Down Expand Up @@ -63,6 +67,11 @@ async def check_model(request) -> Optional[JSONResponse]:


async def get_gen_prompt(request) -> str:
if not _fastchat_available:
raise ModuleNotFoundError(
"fastchat is not installed. Please install fastchat to use "
"the chat completion and conversation APIs: `$ pip install fschat`"
)
conv = get_conversation_template(request.model)
conv = Conversation(
name=conv.name,
Expand Down

0 comments on commit 82ad323

Please sign in to comment.