format

bigPYJ1151 · bigPYJ1151 · commit c7882fac1690 · 2024-04-11T17:12:50.000Z
diff --git a/vllm/attention/backends/torch_sdpa.py b/vllm/attention/backends/torch_sdpa.py
@@ -7,7 +7,8 @@
 from torch.nn.functional import scaled_dot_product_attention
 
 from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl,
-                                              AttentionMetadata, AttentionMetadataPerStage)
+                                              AttentionMetadata,
+                                              AttentionMetadataPerStage)
 from vllm.attention.ops.paged_attn import (PagedAttention,
                                            PagedAttentionMetadata)
 
@@ -49,7 +50,8 @@ def copy_blocks(
 
 
 @dataclass
-class TorchSDPAMetadata(AttentionMetadata, PagedAttentionMetadata, AttentionMetadataPerStage):
+class TorchSDPAMetadata(AttentionMetadata, PagedAttentionMetadata,
+                        AttentionMetadataPerStage):
     """Metadata for TorchSDPABackend.
     """
     # Currently, input sequences can only contain all prompts
@@ -244,4 +246,4 @@ def _make_sliding_window_bias(
         mask = torch.log(mask)
         attn_biases.append(mask.to(dtype))
 
-    return attn_biases
+    return attn_biases
diff --git a/vllm/executor/cpu_executor.py b/vllm/executor/cpu_executor.py
@@ -116,13 +116,16 @@ def _verify_and_get_model_config(config: ModelConfig) -> ModelConfig:
         config.enforce_eager = True
     return config
 
-def _verify_and_get_scheduler_config(config: SchedulerConfig) -> SchedulerConfig:
+
+def _verify_and_get_scheduler_config(
+        config: SchedulerConfig) -> SchedulerConfig:
     if config.chunked_prefill_enabled:
         logger.warning("Chunked prefill is not supported on CPU, disable it.")
         config.chunked_prefill_enabled = False
 
     return config
 
+
 def _verify_and_get_cache_config(config: CacheConfig) -> CacheConfig:
     _GB = 1 << 30
     if config.enable_prefix_caching: