Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[mypy] Enable type checking for test directory #5017

Merged
merged 31 commits into from
Jun 15, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
c2e23b5
Small improvements in type annotations
DarkLight1337 May 23, 2024
7be8fa5
Add missing type annotations
DarkLight1337 May 23, 2024
f6e5c2f
Add type annotation for list elements in tests
DarkLight1337 May 24, 2024
5da8d85
Add type annotation for list elements in main code
DarkLight1337 May 24, 2024
2e26ac5
Fix yapf
DarkLight1337 May 24, 2024
c9c0bca
Remove unnecessary type hint
DarkLight1337 May 24, 2024
490c78b
Apply formatter
DarkLight1337 May 24, 2024
2322145
Add type annotation mainly regarding dict elements
DarkLight1337 May 24, 2024
6a95e53
Fix some type errors in tests
DarkLight1337 May 24, 2024
c7922bb
More fixes
DarkLight1337 May 24, 2024
5c9a055
Fix incorrect dtype
DarkLight1337 May 24, 2024
04a40f0
Merge branch 'upstream' into improve-types
DarkLight1337 May 25, 2024
05ab69f
Fix types related to `tolist`
DarkLight1337 May 25, 2024
0f61f48
Merge branch 'upstream' into improve-types
DarkLight1337 May 29, 2024
58af1f6
Fix bad merge
DarkLight1337 May 29, 2024
b68fa6c
Merge branch 'upstream' into improve-types
DarkLight1337 May 29, 2024
a54f6e3
Merge branch 'upstream' into improve-types
DarkLight1337 May 30, 2024
9cd38f7
Fix wrong type
DarkLight1337 May 30, 2024
8081f85
Merge branch 'upstream' into improve-types
DarkLight1337 Jun 3, 2024
01fb52b
Merge branch 'upstream' into improve-types
DarkLight1337 Jun 3, 2024
5ef7804
Enable type checking for tests
DarkLight1337 Jun 3, 2024
71ace6c
Fix incorrect return type annotation
DarkLight1337 Jun 3, 2024
2e19d09
Merge branch 'upstream' into improve-types
DarkLight1337 Jun 4, 2024
c3fe67c
Merge branch 'upstream' into improve-types
DarkLight1337 Jun 6, 2024
1138733
Merge branch 'upstream' into improve-types
DarkLight1337 Jun 7, 2024
ab68e8f
Merge branch 'upstream' into improve-types
DarkLight1337 Jun 11, 2024
ac3708b
Merge branch 'upstream' into improve-types
DarkLight1337 Jun 14, 2024
2732d0b
Fix type errors
DarkLight1337 Jun 14, 2024
28e470d
Merge branch 'upstream' into improve-types
DarkLight1337 Jun 15, 2024
2c79f5f
Fix mypy error
DarkLight1337 Jun 15, 2024
5185058
Fix mypy error
DarkLight1337 Jun 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions benchmarks/benchmark_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,11 +197,11 @@ def calculate_metrics(
dur_s: float,
tokenizer: PreTrainedTokenizerBase,
) -> Tuple[BenchmarkMetrics, List[int]]:
actual_output_lens = []
actual_output_lens: List[int] = []
total_input = 0
completed = 0
tpots = []
ttfts = []
tpots: List[float] = []
ttfts: List[float] = []
for i in range(len(outputs)):
if outputs[i].success:
output_len = len(tokenizer(outputs[i].generated_text).input_ids)
Expand Down Expand Up @@ -246,7 +246,7 @@ async def benchmark(
disable_tqdm: bool,
):
if backend in ASYNC_REQUEST_FUNCS:
request_func = ASYNC_REQUEST_FUNCS.get(backend)
request_func = ASYNC_REQUEST_FUNCS[backend]
else:
raise ValueError(f"Unknown backend: {backend}")

Expand All @@ -255,7 +255,7 @@ async def benchmark(
pbar = None if disable_tqdm else tqdm(total=len(input_requests))

benchmark_start_time = time.perf_counter()
tasks = []
tasks: List[asyncio.Task] = []
async for request in get_request(input_requests, request_rate):
prompt, prompt_len, output_len = request
request_func_input = RequestFuncInput(
Expand All @@ -273,7 +273,7 @@ async def benchmark(
pbar=pbar)))
outputs: List[RequestFuncOutput] = await asyncio.gather(*tasks)

if not disable_tqdm:
if pbar is not None:
pbar.close()

benchmark_duration = time.perf_counter() - benchmark_start_time
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/benchmark_throughput.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,8 @@ def run_vllm(
)

# Add the requests to the engine.
prompts = []
sampling_params = []
prompts: List[str] = []
sampling_params: List[SamplingParams] = []
for prompt, _, output_len in requests:
prompts.append(prompt)
sampling_params.append(
Expand Down
10 changes: 5 additions & 5 deletions benchmarks/kernels/benchmark_aqlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,9 @@ def dequant_no_scale(
# Compare the optimized 1x16 and 2x8 cuda decompression/dequant kernels against
# the generic pytorch version.
# Just visual comparison.
def dequant_test(k: int, parts: torch.tensor, nbooks: int, bits: int) -> None:
def dequant_test(k: int, parts: torch.Tensor, nbooks: int, bits: int) -> None:

n = parts.sum().item()
n = int(parts.sum().item())

device = torch.device('cuda:0')

Expand Down Expand Up @@ -204,7 +204,7 @@ def main():
sys.stdout = sys.__stdout__


def run_grid(m: int, k: int, parts: torch.tensor, nbooks: int, bits: int,
def run_grid(m: int, k: int, parts: torch.Tensor, nbooks: int, bits: int,
methods):

# I didn't see visible improvements from increasing these, but feel free :)
Expand Down Expand Up @@ -252,10 +252,10 @@ def run_grid(m: int, k: int, parts: torch.tensor, nbooks: int, bits: int,
print('')


def run_timing(num_calls: int, m: int, k: int, parts: torch.tensor,
def run_timing(num_calls: int, m: int, k: int, parts: torch.Tensor,
nbooks: int, bits: int, method) -> float:

n = parts.sum().item()
n = int(parts.sum().item())

device = torch.device('cuda:0')

Expand Down
8 changes: 5 additions & 3 deletions benchmarks/kernels/benchmark_marlin.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import argparse
from typing import List

import torch
import torch.utils.benchmark as benchmark
Expand All @@ -23,8 +24,9 @@
K_FULL_OPTS = [False, True]


def bench_run(results, model, act_order, is_k_full, num_bits, group_size,
size_m, size_k, size_n):
def bench_run(results: List[benchmark.Measurement], model: str,
act_order: bool, is_k_full: bool, num_bits: int, group_size: int,
size_m: int, size_k: int, size_n: int):
label = "Quant Matmul"

sub_label = ("{}, act={} k_full={}, b={}, g={}, "
Expand Down Expand Up @@ -156,7 +158,7 @@ def main(args):
for i, model in enumerate(args.models):
print(f"[{i}] {model}")

results = []
results: List[benchmark.Measurement] = []

for model in args.models:
for layer in WEIGHT_SHAPES[model]:
Expand Down
11 changes: 7 additions & 4 deletions benchmarks/kernels/benchmark_paged_attention.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import argparse
import random
import time
from typing import Optional
from typing import List, Optional

import torch

Expand Down Expand Up @@ -54,14 +54,17 @@ def main(

# Create the block tables.
max_num_blocks_per_seq = (max_seq_len + block_size - 1) // block_size
block_tables = []
block_tables_lst: List[List[int]] = []
for _ in range(num_seqs):
block_table = [
random.randint(0, NUM_BLOCKS - 1)
for _ in range(max_num_blocks_per_seq)
]
block_tables.append(block_table)
block_tables = torch.tensor(block_tables, dtype=torch.int, device=device)
block_tables_lst.append(block_table)

block_tables = torch.tensor(block_tables_lst,
dtype=torch.int,
device=device)

# Create the KV cache.
key_caches, value_caches = create_kv_caches_with_random(NUM_BLOCKS,
Expand Down
7 changes: 4 additions & 3 deletions benchmarks/kernels/benchmark_rope.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import argparse
from itertools import accumulate
from typing import Optional
from typing import List, Optional

import nvtx
import torch

from vllm.model_executor.layers.rotary_embedding import get_rope
from vllm.model_executor.layers.rotary_embedding import (RotaryEmbedding,
get_rope)


def benchmark_rope_kernels_multi_lora(
Expand Down Expand Up @@ -37,7 +38,7 @@ def benchmark_rope_kernels_multi_lora(
})
# non-batched RoPE takes only one scaling factor, we create multiple
# instances to simulate the same behavior
non_batched_ropes = []
non_batched_ropes: List[RotaryEmbedding] = []
for scaling_factor in scaling_factors:
non_batched_ropes.append(
get_rope(head_size, rotary_dim, max_position, base, is_neox_style,
Expand Down
6 changes: 3 additions & 3 deletions examples/offline_inference_distributed.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
Learn more about Ray Data in https://docs.ray.io/en/latest/data/data.html
"""

from typing import Dict
from typing import Dict, List

import numpy as np
import ray
Expand Down Expand Up @@ -40,8 +40,8 @@ def __call__(self, batch: Dict[str, np.ndarray]) -> Dict[str, list]:
# The output is a list of RequestOutput objects that contain the prompt,
# generated text, and other information.
outputs = self.llm.generate(batch["text"], sampling_params)
prompt = []
generated_text = []
prompt: List[str] = []
generated_text: List[str] = []
for output in outputs:
prompt.append(output.prompt)
generated_text.append(' '.join([o.text for o in output.outputs]))
Expand Down
8 changes: 5 additions & 3 deletions tests/core/block/test_block_table.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import List

import pytest

from vllm.core.block.block_table import BlockTable
Expand Down Expand Up @@ -28,7 +30,7 @@ def test_allocate_naive(block_size: int, sequence_len: int):
token_ids = list(range(sequence_len))
num_blocks_per_alloc = len(list(chunk_list(token_ids, block_size)))

block_tables = []
block_tables: List[BlockTable] = []
for i in range(5):
assert allocator.get_num_free_blocks(
device=Device.GPU) == num_gpu_blocks - i * num_blocks_per_alloc
Expand Down Expand Up @@ -73,7 +75,7 @@ def test_allocate_prefix_caching(block_size: int, sequence_len: int):
num_immutable_blocks_per_alloc = len(
chunked_tokens) - num_mutable_blocks_per_alloc

block_tables = []
block_tables: List[BlockTable] = []
for alloc_i in range(1, 6):

block_tables.append(
Expand Down Expand Up @@ -268,7 +270,7 @@ def test_append_token_ids_correct_content(block_size: int, sequence_len: int,
)
block_table.allocate(token_ids=token_ids, device=Device.GPU)

appended_so_far = []
appended_so_far: List[int] = []
for append in chunk_list(token_ids_to_append, append_size):
block_table.append_token_ids(append)
appended_so_far.extend(append)
Expand Down
4 changes: 2 additions & 2 deletions tests/core/block/test_prefix_caching_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def create_chain(block_size: int,
num_empty_trailing_blocks=0) -> List[PrefixCachingBlock]:
"""Helper method which creates a chain of blocks.
"""
blocks = []
blocks: List[PrefixCachingBlock] = []
num_blocks = math.ceil(
len(token_ids) / block_size) + num_empty_trailing_blocks

Expand Down Expand Up @@ -491,7 +491,7 @@ def create_immutable_chain(
) -> List[PrefixCachingBlock]:
"""Helper method which creates a chain of blocks.
"""
blocks = []
blocks: List[Block] = []
num_blocks = math.ceil(len(token_ids) / block_size)

if num_blocks == 0:
Expand Down
2 changes: 1 addition & 1 deletion tests/core/test_chunked_prefill_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -530,7 +530,7 @@ def test_chunked_prefill_max_seqs():
cache_config.num_cpu_blocks = 8
cache_config.num_gpu_blocks = 8
scheduler = Scheduler(scheduler_config, cache_config, None)
running = []
running: List[SequenceGroup] = []

_, seq_group = create_dummy_prompt("1", prompt_length=65)
scheduler.add_seq_group(seq_group)
Expand Down
16 changes: 8 additions & 8 deletions tests/core/test_scheduler.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import time
from collections import deque
from typing import List
from typing import List, Tuple
from unittest.mock import MagicMock

import pytest # noqa
Expand Down Expand Up @@ -659,7 +659,7 @@ def test_schedule_swapped_simple():
swapped = deque()
policy = PolicyFactory.get_policy(policy_name="fcfs")
curr_loras = None
blocks_to_swap_out = []
blocks_to_swap_out: List[Tuple[int, int]] = []
_, seq_group = create_dummy_prompt("1", prompt_length=60, best_of=2)
scheduler._allocate_and_set_running(seq_group)
append_new_token_seq_group(60, seq_group, 1)
Expand All @@ -686,7 +686,7 @@ def test_schedule_swapped_max_token_budget():
swapped = deque()
policy = PolicyFactory.get_policy(policy_name="fcfs")
curr_loras = None
blocks_to_swap_out = []
blocks_to_swap_out: List[Tuple[int, int]] = []
for _ in range(2):
_, seq_group = create_dummy_prompt("1", prompt_length=60, best_of=2)
scheduler._allocate_and_set_running(seq_group)
Expand Down Expand Up @@ -720,7 +720,7 @@ def test_schedule_swapped_max_seqs():
swapped = deque()
policy = PolicyFactory.get_policy(policy_name="fcfs")
curr_loras = None
blocks_to_swap_out = []
blocks_to_swap_out: List[Tuple[int, int]] = []
for i in range(4):
_, seq_group = create_dummy_prompt(str(i), prompt_length=60)
scheduler._allocate_and_set_running(seq_group)
Expand Down Expand Up @@ -753,7 +753,7 @@ def test_schedule_swapped_max_loras():
swapped = deque()
policy = PolicyFactory.get_policy(policy_name="fcfs")
curr_loras = set()
blocks_to_swap_out = []
blocks_to_swap_out: List[Tuple[int, int]] = []
for i in range(2):
_, seq_group = create_dummy_prompt(str(i),
prompt_length=60,
Expand Down Expand Up @@ -782,7 +782,7 @@ def test_schedule_swapped_cannot_swap_in():
swapped = deque()
policy = PolicyFactory.get_policy(policy_name="fcfs")
curr_loras = None
blocks_to_swap_out = []
blocks_to_swap_out: List[Tuple[int, int]] = []
for _ in range(2):
_, seq_group = create_dummy_prompt("1", prompt_length=60, best_of=2)
scheduler._allocate_and_set_running(seq_group)
Expand All @@ -809,7 +809,7 @@ def test_infeasible_swap():
swapped = deque()
policy = PolicyFactory.get_policy(policy_name="fcfs")
curr_loras = None
blocks_to_swap_out = []
blocks_to_swap_out: List[Tuple[int, int]] = []
for _ in range(2):
_, seq_group = create_dummy_prompt("1", prompt_length=60, best_of=2)
scheduler._allocate_and_set_running(seq_group)
Expand Down Expand Up @@ -840,7 +840,7 @@ def test_schedule_swapped_blocks_to_copy():
_, seq_group = create_dummy_prompt("1", prompt_length=60, best_of=2)
scheduler._allocate_and_set_running(seq_group)
append_new_token_seq_group(60, seq_group, 1)
blocks_to_swap_out = []
blocks_to_swap_out: List[Tuple[int, int]] = []
scheduler._swap_out(seq_group, blocks_to_swap_out)
swapped.append(seq_group)

Expand Down
4 changes: 2 additions & 2 deletions tests/core/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import time
from typing import Iterable, Optional, Tuple
from typing import Iterable, List, Optional, Tuple

from vllm import SamplingParams
from vllm.lora.request import LoRARequest
Expand Down Expand Up @@ -47,7 +47,7 @@ def create_seq_group(

prompt_token_ids = [0] * seq_prompt_len

seqs = []
seqs: List[Sequence] = []
for seq_id_offset, output_len in enumerate(seq_output_lens):
seq = Sequence(
seq_id=seq_id_start + seq_id_offset,
Expand Down
3 changes: 2 additions & 1 deletion tests/distributed/test_pynccl.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import multiprocessing
import os
from typing import List

import pytest
import torch
Expand All @@ -15,7 +16,7 @@

def distributed_run(fn, world_size):
number_of_processes = world_size
processes = []
processes: List[multiprocessing.Process] = []
for i in range(number_of_processes):
env = {}
env['RANK'] = str(i)
Expand Down
3 changes: 2 additions & 1 deletion tests/distributed/test_pynccl_library.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import multiprocessing
import tempfile
from typing import Dict


def target_fn(env, filepath):
def target_fn(env: Dict[str, str], filepath: str):
from vllm.utils import update_environment_variables
update_environment_variables(env)
from vllm.utils import nccl_integrity_check
Expand Down
5 changes: 3 additions & 2 deletions tests/entrypoints/test_openai_server.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# imports for guided decoding tests
import json
import re
from typing import List

import jsonschema
import openai # use the official client for correctness check
Expand Down Expand Up @@ -321,7 +322,7 @@ async def test_completion_streaming(server, client: openai.AsyncOpenAI,
max_tokens=5,
temperature=0.0,
stream=True)
chunks = []
chunks: List[str] = []
finish_reason_count = 0
async for chunk in stream:
chunks.append(chunk.choices[0].text)
Expand Down Expand Up @@ -368,7 +369,7 @@ async def test_chat_streaming(server, client: openai.AsyncOpenAI,
temperature=0.0,
stream=True,
)
chunks = []
chunks: List[str] = []
finish_reason_count = 0
async for chunk in stream:
delta = chunk.choices[0].delta
Expand Down
Loading
Loading