Skip to content

Commit a283ec2

Browse files
authored
Add contributing guideline and mypy config (vllm-project#122)
1 parent 3f942ac commit a283ec2

File tree

16 files changed

+128
-44
lines changed

16 files changed

+128
-44
lines changed

CONTRIBUTING.md

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
# Contributing to CacheFlow
2+
3+
Thank you for your interest in contributing to CacheFlow!
4+
Our community is open to everyone and welcomes all kinds of contributions, no matter how small or large.
5+
There are several ways you can contribute to the project:
6+
7+
- Identify and report any issues or bugs.
8+
- Request or add a new model.
9+
- Suggest or implement new features.
10+
11+
However, remember that contributions aren't just about code.
12+
We believe in the power of community support; thus, answering queries, assisting others, and enhancing the documentation are highly regarded and beneficial contributions.
13+
14+
Finally, one of the most impactful ways to support us is by raising awareness about CacheFlow.
15+
Talk about it in your blog posts, highlighting how it's driving your incredible projects.
16+
Express your support on Twitter if CacheFlow aids you, or simply offer your appreciation by starring our repository.
17+
18+
19+
## Setup for development
20+
21+
### Build from source
22+
23+
```bash
24+
pip install -r requirements.txt
25+
pip install -e . # This may take several minutes.
26+
```
27+
28+
### Testing
29+
30+
```bash
31+
pip install -r requirements-dev.txt
32+
33+
# Static type checking
34+
mypy
35+
# Unit tests
36+
pytest tests/
37+
```
38+
**Note:** Currently, the repository does not pass the mypy tests.
39+
40+
41+
## Contributing Guidelines
42+
43+
### Issue Reporting
44+
45+
If you encounter a bug or have a feature request, please check our issues page first to see if someone else has already reported it.
46+
If not, please file a new issue, providing as much relevant information as possible.
47+
48+
### Coding Style Guide
49+
50+
In general, we adhere to [Google Python style guide](https://google.github.io/styleguide/pyguide.html) and [Google C++ style guide](https://google.github.io/styleguide/cppguide.html).
51+
52+
### Pull Requests
53+
54+
When submitting a pull request:
55+
56+
1. Make sure your code has been rebased on top of the latest commit on the main branch.
57+
2. Include a detailed description of the changes in the pull request.
58+
Explain why you made the changes you did.
59+
If your pull request fixes an open issue, please include a reference to it in the description.
60+
61+
### Code Reviews
62+
63+
All submissions, including submissions by project members, require a code review.
64+
To make the review process as smooth as possible, please:
65+
66+
1. Keep your changes as concise as possible.
67+
If your pull request involves multiple unrelated changes, consider splitting it into separate pull requests.
68+
2. Respond to all comments within a reasonable time frame.
69+
If a comment isn't clear or you disagree with a suggestion, feel free to ask for clarification or discuss the suggestion.
70+
71+
### Thank You
72+
73+
Finally, thank you for taking the time to read these guidelines and for your interest in contributing to CacheFlow.
74+
Your contributions make CacheFlow a great tool for everyone!

cacheflow/core/scheduler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def add_seq_group(self, seq_group: SequenceGroup) -> None:
8787
def has_unfinished_seqs(self) -> bool:
8888
return self.waiting or self.running or self.swapped
8989

90-
def _schedule(self) -> Tuple[SchedulerOutputs, List[int]]:
90+
def _schedule(self) -> Tuple[SchedulerOutputs, List[str]]:
9191
# Blocks that need to be swaped or copied before model execution.
9292
blocks_to_swap_in: Dict[int, int] = {}
9393
blocks_to_swap_out: Dict[int, int] = {}

cacheflow/model_executor/layers/attention.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def multi_query_kv_attention(
6161
key: torch.Tensor, # [num_prompt_tokens, num_heads, head_size]
6262
value: torch.Tensor, # [num_prompt_tokens, num_heads, head_size]
6363
attn_bias: xops.AttentionBias,
64-
) -> None:
64+
) -> torch.Tensor:
6565
# TODO(woosuk): The unsqueeze op may incur some CPU overhead. Optimize.
6666
out = xops.memory_efficient_attention_forward(
6767
query.unsqueeze(0),
@@ -197,7 +197,7 @@ def __init__(
197197

198198
def forward(
199199
self,
200-
positions: torch.LongTensor, # [num_tokens]
200+
positions: torch.Tensor, # [num_tokens]
201201
query: torch.Tensor, # [num_tokens, num_heads * head_size]
202202
key: torch.Tensor, # [num_tokens, num_heads * head_size]
203203
value: torch.Tensor, # [num_tokens, num_heads * head_size]

cacheflow/model_executor/layers/sampler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,7 @@ def _sample_from_generation_tokens(
347347
# Greedy sampling.
348348
assert len(seq_ids) == 1
349349
next_token_id = torch.argmax(probs, dim=-1)
350-
next_token_ids = [next_token_id.item()]
350+
next_token_ids = [int(next_token_id.item())]
351351
parent_seq_ids = seq_ids
352352
else:
353353
# Random sampling.

cacheflow/model_executor/model_loader.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
"""Utilities for selecting and loading models."""
2+
from typing import Type
3+
24
import torch
35
import torch.nn as nn
46
from transformers import PretrainedConfig
@@ -17,7 +19,7 @@
1719
}
1820

1921

20-
def _get_model_architecture(config: PretrainedConfig) -> nn.Module:
22+
def _get_model_architecture(config: PretrainedConfig) -> Type[nn.Module]:
2123
architectures = getattr(config, "architectures", [])
2224
for arch in architectures:
2325
if arch in _MODEL_REGISTRY:

cacheflow/model_executor/models/gpt2.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -168,8 +168,8 @@ def __init__(self, config: GPT2Config):
168168

169169
def forward(
170170
self,
171-
input_ids: torch.LongTensor,
172-
position_ids: torch.LongTensor,
171+
input_ids: torch.Tensor,
172+
position_ids: torch.Tensor,
173173
kv_caches: List[KVCache],
174174
input_metadata: InputMetadata,
175175
cache_events: Optional[List[torch.cuda.Event]],
@@ -204,8 +204,8 @@ def __init__(self, config: GPT2Config):
204204

205205
def forward(
206206
self,
207-
input_ids: torch.LongTensor,
208-
positions: torch.LongTensor,
207+
input_ids: torch.Tensor,
208+
positions: torch.Tensor,
209209
kv_caches: List[KVCache],
210210
input_metadata: InputMetadata,
211211
cache_events: Optional[List[torch.cuda.Event]],

cacheflow/model_executor/models/gpt_neox.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def __init__(self, config: GPTNeoXConfig):
6767

6868
def forward(
6969
self,
70-
position_ids: torch.LongTensor,
70+
position_ids: torch.Tensor,
7171
hidden_states: torch.Tensor,
7272
kv_cache: KVCache,
7373
input_metadata: InputMetadata,
@@ -118,7 +118,7 @@ def __init__(self, config: GPTNeoXConfig):
118118

119119
def forward(
120120
self,
121-
position_ids: torch.LongTensor,
121+
position_ids: torch.Tensor,
122122
hidden_states: torch.Tensor,
123123
kv_cache: KVCache,
124124
input_metadata: InputMetadata,
@@ -162,8 +162,8 @@ def __init__(self, config: GPTNeoXConfig):
162162

163163
def forward(
164164
self,
165-
input_ids: torch.LongTensor,
166-
position_ids: torch.LongTensor,
165+
input_ids: torch.Tensor,
166+
position_ids: torch.Tensor,
167167
kv_caches: List[KVCache],
168168
input_metadata: InputMetadata,
169169
cache_events: Optional[List[torch.cuda.Event]],
@@ -199,8 +199,8 @@ def __init__(self, config):
199199

200200
def forward(
201201
self,
202-
input_ids: torch.LongTensor,
203-
positions: torch.LongTensor,
202+
input_ids: torch.Tensor,
203+
positions: torch.Tensor,
204204
kv_caches: List[KVCache],
205205
input_metadata: InputMetadata,
206206
cache_events: Optional[List[torch.cuda.Event]],

cacheflow/model_executor/models/llama.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def __init__(
109109

110110
def forward(
111111
self,
112-
positions: torch.LongTensor,
112+
positions: torch.Tensor,
113113
hidden_states: torch.Tensor,
114114
kv_cache: KVCache,
115115
input_metadata: InputMetadata,
@@ -143,7 +143,7 @@ def __init__(self, config: LlamaConfig):
143143

144144
def forward(
145145
self,
146-
positions: torch.LongTensor,
146+
positions: torch.Tensor,
147147
hidden_states: torch.Tensor,
148148
kv_cache: KVCache,
149149
input_metadata: InputMetadata,
@@ -184,8 +184,8 @@ def __init__(self, config: LlamaConfig):
184184

185185
def forward(
186186
self,
187-
input_ids: torch.LongTensor,
188-
positions: torch.LongTensor,
187+
input_ids: torch.Tensor,
188+
positions: torch.Tensor,
189189
kv_caches: List[KVCache],
190190
input_metadata: InputMetadata,
191191
cache_events: Optional[List[torch.cuda.Event]],
@@ -222,8 +222,8 @@ def __init__(self, config):
222222

223223
def forward(
224224
self,
225-
input_ids: torch.LongTensor,
226-
positions: torch.LongTensor,
225+
input_ids: torch.Tensor,
226+
positions: torch.Tensor,
227227
kv_caches: List[KVCache],
228228
input_metadata: InputMetadata,
229229
cache_events: Optional[List[torch.cuda.Event]],

cacheflow/model_executor/models/opt.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def __init__(self, num_embeddings: int, embedding_dim: int):
4747
self.offset = 2
4848
super().__init__(num_embeddings + self.offset, embedding_dim)
4949

50-
def forward(self, positions: torch.LongTensor):
50+
def forward(self, positions: torch.Tensor):
5151
return super().forward(positions + self.offset)
5252

5353

@@ -199,8 +199,8 @@ def __init__(self, config: OPTConfig):
199199

200200
def forward(
201201
self,
202-
input_ids: torch.LongTensor,
203-
positions: torch.LongTensor,
202+
input_ids: torch.Tensor,
203+
positions: torch.Tensor,
204204
kv_caches: List[KVCache],
205205
input_metadata: InputMetadata,
206206
cache_events: Optional[List[torch.cuda.Event]],
@@ -235,8 +235,8 @@ def __init__(self, config: OPTConfig):
235235

236236
def forward(
237237
self,
238-
input_ids: torch.LongTensor,
239-
positions: torch.LongTensor,
238+
input_ids: torch.Tensor,
239+
positions: torch.Tensor,
240240
kv_caches: List[KVCache],
241241
input_metadata: InputMetadata,
242242
cache_events: Optional[List[torch.cuda.Event]],
@@ -258,8 +258,8 @@ def __init__(self, config):
258258

259259
def forward(
260260
self,
261-
input_ids: torch.LongTensor,
262-
positions: torch.LongTensor,
261+
input_ids: torch.Tensor,
262+
positions: torch.Tensor,
263263
kv_caches: List[KVCache],
264264
input_metadata: InputMetadata,
265265
cache_events: Optional[List[torch.cuda.Event]],

cacheflow/outputs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ class RequestOutput:
3131

3232
def __init__(
3333
self,
34-
request_id: int,
34+
request_id: str,
3535
prompt: str,
3636
prompt_token_ids: List[int],
3737
outputs: List[CompletionOutput],

cacheflow/sequence.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,10 +116,11 @@ def get_output_token_ids(self) -> List[int]:
116116
def get_cumulative_logprob(self) -> float:
117117
return self.data.cumulative_logprob
118118

119-
def fork(self, child_seq: 'Sequence') -> 'Sequence':
119+
def fork(self, child_seq: 'Sequence') -> None:
120120
child_seq.logical_token_blocks = copy.deepcopy(self.logical_token_blocks)
121121
child_seq.output_logprobs = copy.deepcopy(self.output_logprobs)
122122
child_seq.data = copy.deepcopy(self.data)
123+
return None
123124

124125
def __repr__(self) -> str:
125126
return (f'Sequence(seq_id={self.seq_id}, '
@@ -205,7 +206,9 @@ def __repr__(self) -> str:
205206
f'output_token={self.output_token}), '
206207
f'logprobs={self.logprobs}')
207208

208-
def __eq__(self, other: 'SequenceOutputs') -> bool:
209+
def __eq__(self, other: object) -> bool:
210+
if not isinstance(other, SequenceOutputs):
211+
return NotImplemented
209212
return (self.seq_id == other.seq_id and
210213
self.parent_seq_id == other.parent_seq_id and
211214
self.output_token == other.output_token and

cacheflow/server/ray_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
from cacheflow.config import ParallelConfig
1010

11-
DeviceID = Tuple[int, str, int] # rank, node resource (node IP), device id
11+
DeviceID = Tuple[int, Optional[str], int] # rank, node resource (node IP), device id
1212

1313

1414
def initialize_cluster(

cacheflow/worker/worker.py

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ def init_cache_engine(self, cache_config: CacheConfig) -> None:
132132
def _prepare_inputs(
133133
self,
134134
seq_group_metadata_list: List[SequenceGroupMetadata],
135-
) -> Tuple[torch.LongTensor, torch.LongTensor, InputMetadata]:
135+
) -> Tuple[torch.Tensor, torch.Tensor, InputMetadata]:
136136
seq_groups: List[Tuple[List[int], SamplingParams]] = []
137137
input_tokens: List[int] = []
138138
input_positions: List[int] = []
@@ -216,19 +216,14 @@ def _prepare_inputs(
216216
input_positions = _pad_to_alignment(input_positions, multiple_of=8)
217217

218218
# Convert to tensors.
219-
tokens_tensor = torch.tensor(
220-
input_tokens, dtype=torch.long, device='cuda')
221-
positions_tensor = torch.tensor(
222-
input_positions, dtype=torch.long, device='cuda')
223-
slot_mapping_tensor = torch.tensor(
224-
slot_mapping, dtype=torch.int, device='cuda')
225-
context_lens_tensor = torch.tensor(
226-
context_lens, dtype=torch.int, device='cuda')
219+
tokens_tensor = torch.cuda.LongTensor(input_tokens)
220+
positions_tensor = torch.cuda.LongTensor(input_positions)
221+
slot_mapping_tensor = torch.cuda.IntTensor(slot_mapping)
222+
context_lens_tensor = torch.cuda.IntTensor(context_lens)
227223
padded_block_tables = [
228224
_pad_to_max(block_table, max_num_blocks_per_seq)
229225
for block_table in generation_block_tables]
230-
block_tables_tensor = torch.tensor(
231-
padded_block_tables, dtype=torch.int, device='cuda')
226+
block_tables_tensor = torch.cuda.IntTensor(padded_block_tables)
232227

233228
seq_data: Dict[int, SequenceData] = {}
234229
for seq_group_metadata in seq_group_metadata_list:

mypy.ini

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
[mypy]
2+
python_version = 3.8
3+
4+
ignore_missing_imports = True
5+
6+
files = cacheflow
7+
# TODO(woosuk): Include the code from Megatron and HuggingFace.
8+
exclude = cacheflow/model_executor/parallel_utils/|cacheflow/model_executor/models/

requirements-dev.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
mypy
2+
pytest

tests/kernels/test_pos_encoding.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def __init__(
4949

5050
def forward(
5151
self,
52-
positions: torch.LongTensor, # [num_tokens]
52+
positions: torch.Tensor, # [num_tokens]
5353
query: torch.Tensor, # [num_tokens, num_heads, head_size]
5454
key: torch.Tensor, # [num_tokens, num_heads, head_size]
5555
) -> Tuple[torch.Tensor, torch.Tensor]:

0 commit comments

Comments
 (0)