Skip to content

Commit

Permalink
feat: support stop_token_ids parameter. (vllm-project#1097)
Browse files Browse the repository at this point in the history
  • Loading branch information
gesanqiu authored Sep 21, 2023
1 parent 2d1e86f commit f98b745
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 0 deletions.
3 changes: 3 additions & 0 deletions vllm/engine/llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -650,6 +650,9 @@ def _check_stop(self, seq: Sequence,
seq.output_text = seq.output_text[:-len(stop_str)]
seq.status = SequenceStatus.FINISHED_STOPPED
return
if seq.get_last_token_id() in sampling_params.stop_token_ids:
seq.status = SequenceStatus.FINISHED_STOPPED
return

# Check if the sequence has reached max_model_len.
if seq.get_len() > self.scheduler_config.max_model_len:
Expand Down
2 changes: 2 additions & 0 deletions vllm/entrypoints/openai/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ async def create_chat_completion(request: ChatCompletionRequest,
temperature=request.temperature,
top_p=request.top_p,
stop=request.stop,
stop_token_ids=request.stop_token_ids,
max_tokens=request.max_tokens,
best_of=request.best_of,
top_k=request.top_k,
Expand Down Expand Up @@ -418,6 +419,7 @@ async def create_completion(request: CompletionRequest, raw_request: Request):
top_p=request.top_p,
top_k=request.top_k,
stop=request.stop,
stop_token_ids=request.stop_token_ids,
ignore_eos=request.ignore_eos,
max_tokens=request.max_tokens,
logprobs=request.logprobs,
Expand Down
2 changes: 2 additions & 0 deletions vllm/entrypoints/openai/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ class ChatCompletionRequest(BaseModel):
top_k: Optional[int] = -1
ignore_eos: Optional[bool] = False
use_beam_search: Optional[bool] = False
stop_token_ids: Optional[List[int]] = Field(default_factory=list)


class CompletionRequest(BaseModel):
Expand All @@ -94,6 +95,7 @@ class CompletionRequest(BaseModel):
top_k: Optional[int] = -1
ignore_eos: Optional[bool] = False
use_beam_search: Optional[bool] = False
stop_token_ids: Optional[List[int]] = Field(default_factory=list)


class LogProbs(BaseModel):
Expand Down
8 changes: 8 additions & 0 deletions vllm/sampling_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ class SamplingParams:
(canonical beam search algorithm).
stop: List of strings that stop the generation when they are generated.
The returned output will not contain the stop strings.
stop_token_ids: List of tokens that stop the generation when they are
generated. The returned output will contain the stop tokens unless
the stop tokens are sepcial tokens.
ignore_eos: Whether to ignore the EOS token and continue generating
tokens after the EOS token is generated.
max_tokens: Maximum number of tokens to generate per output sequence.
Expand All @@ -64,6 +67,7 @@ def __init__(
length_penalty: float = 1.0,
early_stopping: Union[bool, str] = False,
stop: Union[None, str, List[str]] = None,
stop_token_ids: List[int] = None,
ignore_eos: bool = False,
max_tokens: int = 16,
logprobs: Optional[int] = None,
Expand All @@ -84,6 +88,10 @@ def __init__(
self.stop = [stop]
else:
self.stop = list(stop)
if stop_token_ids is None:
self.stop_token_ids = []
else:
self.stop_token_ids = list(stop_token_ids)
self.ignore_eos = ignore_eos
self.max_tokens = max_tokens
self.logprobs = logprobs
Expand Down

0 comments on commit f98b745

Please sign in to comment.