File tree 2 files changed +9
-4
lines changed 2 files changed +9
-4
lines changed Original file line number Diff line number Diff line change 7
7
from torch .nn .functional import scaled_dot_product_attention
8
8
9
9
from vllm .attention .backends .abstract import (AttentionBackend , AttentionImpl ,
10
- AttentionMetadata , AttentionMetadataPerStage )
10
+ AttentionMetadata ,
11
+ AttentionMetadataPerStage )
11
12
from vllm .attention .ops .paged_attn import (PagedAttention ,
12
13
PagedAttentionMetadata )
13
14
@@ -49,7 +50,8 @@ def copy_blocks(
49
50
50
51
51
52
@dataclass
52
- class TorchSDPAMetadata (AttentionMetadata , PagedAttentionMetadata , AttentionMetadataPerStage ):
53
+ class TorchSDPAMetadata (AttentionMetadata , PagedAttentionMetadata ,
54
+ AttentionMetadataPerStage ):
53
55
"""Metadata for TorchSDPABackend.
54
56
"""
55
57
# Currently, input sequences can only contain all prompts
@@ -244,4 +246,4 @@ def _make_sliding_window_bias(
244
246
mask = torch .log (mask )
245
247
attn_biases .append (mask .to (dtype ))
246
248
247
- return attn_biases
249
+ return attn_biases
Original file line number Diff line number Diff line change @@ -116,13 +116,16 @@ def _verify_and_get_model_config(config: ModelConfig) -> ModelConfig:
116
116
config .enforce_eager = True
117
117
return config
118
118
119
- def _verify_and_get_scheduler_config (config : SchedulerConfig ) -> SchedulerConfig :
119
+
120
+ def _verify_and_get_scheduler_config (
121
+ config : SchedulerConfig ) -> SchedulerConfig :
120
122
if config .chunked_prefill_enabled :
121
123
logger .warning ("Chunked prefill is not supported on CPU, disable it." )
122
124
config .chunked_prefill_enabled = False
123
125
124
126
return config
125
127
128
+
126
129
def _verify_and_get_cache_config (config : CacheConfig ) -> CacheConfig :
127
130
_GB = 1 << 30
128
131
if config .enable_prefix_caching :
You can’t perform that action at this time.
0 commit comments