Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[mypy] Pass type checking in vllm/inputs #11680

Merged
merged 15 commits into from
Jan 2, 2025
Next Next commit
fix types for vllm/inputs
Signed-off-by: Tobias Pitters <tobias.pitters@gmail.com>
  • Loading branch information
CloseChoice committed Jan 2, 2025
commit 677ea96ad4e2614813409ea504152666c621d324
1 change: 1 addition & 0 deletions tools/mypy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,4 @@ run_mypy vllm/prompt_adapter
run_mypy vllm/spec_decode
run_mypy vllm/worker
run_mypy vllm/v1
run_mypy vllm/inputs
28 changes: 17 additions & 11 deletions vllm/inputs/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@
Optional, Tuple, Union, cast)

import torch
from typing_extensions import NotRequired, TypedDict, TypeVar, assert_never
from typing_extensions import NotRequired, TypedDict, TypeVar, assert_never, TypeGuard

Check failure on line 7 in vllm/inputs/data.py

View workflow job for this annotation

GitHub Actions / ruff (3.12)

Ruff (E501)

vllm/inputs/data.py:7:81: E501 Line too long (86 > 80)

if TYPE_CHECKING:
from vllm.multimodal import (MultiModalDataDict, MultiModalKwargs,
MultiModalPlaceholderDict)
from vllm.multimodal.inputs import MultiModalInputsV2



class TextPrompt(TypedDict):
"""Schema for a text prompt."""

Expand Down Expand Up @@ -175,6 +176,10 @@
to pass the mm_processor_kwargs to each of them.
"""

def is_multimodal_inputs(inputs: Union[TokenInputs, MultiModalInputsV2]) -> TypeGuard[MultiModalInputsV2]:

Check failure on line 179 in vllm/inputs/data.py

View workflow job for this annotation

GitHub Actions / ruff (3.12)

Ruff (E501)

vllm/inputs/data.py:179:81: E501 Line too long (106 > 80)
"""Helper function to make sure mypy narrows down the type."""
return inputs["type"] == "multimodal"


def token_inputs(
prompt_token_ids: List[int],
Expand Down Expand Up @@ -250,7 +255,7 @@
if inputs["type"] == "token" or inputs["type"] == "multimodal":
return inputs.get("prompt")

assert_never(inputs)
assert_never(inputs) # type: ignore[arg-type]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, if we still need to ignore this line, then I think there isn't much point in adding those two functions...

Copy link
Contributor Author

@CloseChoice CloseChoice Jan 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need the functions here: https://github.com/vllm-project/vllm/pull/11680/files#diff-e55f6ffbb4ac8db75ad60f0b92d2ab311493184b32bb039c2d991742f53f5c56R332 to narrow down the types. Assert_never won't work since mypy expects a "Never" object here and this is still an open issue (as mentioned in the description). I just thought, since we need the functions anyway, I consistently use them everywhere but can revert where they are not used if you prefer that.

Copy link
Member

@DarkLight1337 DarkLight1337 Jan 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean that even after you added is_token_inputs and is_multimodal_inputs, you still have type ignore on the assert_never lines. In that case, what is the benefit of adding those two functions?

Copy link
Contributor Author

@CloseChoice CloseChoice Jan 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The point is that we need to narrow down the functions to MultiModalInputsV2 here since TokenInputs does not have the "mm_hashes" key and this results then in a mypy error. So to narrow the type (actually we just need one function to narrow down to MultiModalInputsV2) I added the function and then added the other to handle these cases consistently.

Copy link
Member

@DarkLight1337 DarkLight1337 Jan 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Running mypy on the code prior to this PR, apart from assert_never I only get one additional error (vllm/inputs/data.py:314). So, I think it's better to just type ignore that extra line rather than using these two functions.

Btw, pyright passes on this file without any changes needed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

alright, I just removed the functions and ignored the line.


@cached_property
def prompt_token_ids(self) -> List[int]:
Expand All @@ -259,7 +264,7 @@
if inputs["type"] == "token" or inputs["type"] == "multimodal":
return inputs.get("prompt_token_ids", [])

assert_never(inputs)
assert_never(inputs) # type: ignore[arg-type]

@cached_property
def token_type_ids(self) -> List[int]:
Expand All @@ -268,7 +273,7 @@
if inputs["type"] == "token" or inputs["type"] == "multimodal":
return inputs.get("token_type_ids", [])

assert_never(inputs)
assert_never(inputs) # type: ignore[arg-type]

@cached_property
def prompt_embeds(self) -> Optional[torch.Tensor]:
Expand All @@ -277,7 +282,7 @@
if inputs["type"] == "token" or inputs["type"] == "multimodal":
return None

assert_never(inputs)
assert_never(inputs) # type: ignore[arg-type]

@cached_property
def multi_modal_data(self) -> "MultiModalDataDict":
Expand All @@ -289,7 +294,7 @@
if inputs["type"] == "multimodal":
return inputs.get("mm_kwargs", {})

assert_never(inputs)
assert_never(inputs) # type: ignore[arg-type]

@cached_property
def multi_modal_inputs(self) -> Union[Dict, "MultiModalKwargs"]:
Expand All @@ -301,7 +306,7 @@
if inputs["type"] == "multimodal":
return inputs.get("mm_kwargs", {})

assert_never(inputs)
assert_never(inputs) # type: ignore[arg-type]

@cached_property
def multi_modal_hashes(self) -> List[str]:
Expand All @@ -310,10 +315,11 @@
if inputs["type"] == "token":
return inputs.get("multi_modal_hashes", [])

if inputs["type"] == "multimodal":
elif is_multimodal_inputs(inputs):
# only the case when we use MultiModalInputsV2
return inputs.get("mm_hashes", [])

assert_never(inputs)
assert_never(inputs) # type: ignore[arg-type]

@cached_property
def multi_modal_placeholders(self) -> "MultiModalPlaceholderDict":
Expand All @@ -325,7 +331,7 @@
if inputs["type"] == "multimodal":
return inputs.get("mm_placeholders", {})

assert_never(inputs)
assert_never(inputs) # type: ignore[arg-type]

@cached_property
def mm_processor_kwargs(self) -> Dict[str, Any]:
Expand All @@ -337,7 +343,7 @@
if inputs["type"] == "multimodal":
return {}

assert_never(inputs)
assert_never(inputs) # type: ignore[arg-type]


ProcessorInputs = Union[DecoderOnlyInputs, EncoderDecoderInputs]
Expand Down
6 changes: 3 additions & 3 deletions vllm/inputs/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,7 @@ def _build_enc_dec_llm_inputs(
or encoder_inputs["type"] == "multimodal"):
pass
else:
assert_never(encoder_inputs)
assert_never(encoder_inputs) # type: ignore[arg-type]

if decoder_inputs is None:
dec_token_ids = self._prepare_decoder_input_ids_for_generation(
Expand All @@ -452,7 +452,7 @@ def _build_enc_dec_llm_inputs(
raise ValueError("Multi-modal decoder inputs of encoder-"
"decoder models are not supported yet")
else:
assert_never(encoder_inputs)
assert_never(encoder_inputs) # type: ignore[arg-type]

return EncoderDecoderInputs(
encoder=encoder_inputs,
Expand Down Expand Up @@ -569,7 +569,7 @@ def _build_decoder_only_llm_inputs(
prompt_adapter_request=prompt_adapter_request,
)
else:
assert_never(prompt_inputs)
assert_never(prompt_inputs) # type: ignore[arg-type]

return prompt_inputs

Expand Down
2 changes: 1 addition & 1 deletion vllm/inputs/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ def _ensure_mm_kwargs(
# Be more strict in V2
assert "mm_kwargs" in inputs
else:
assert_never(inputs["type"])
assert_never(inputs["type"]) # type: ignore[arg-type]

def process_input(self, model_config: "ModelConfig",
inputs: ProcessorInputs) -> ProcessorInputs:
Expand Down
Loading