We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 744064d commit 8d549e4Copy full SHA for 8d549e4
vllm/config/compilation.py
@@ -513,6 +513,16 @@ def __post_init__(self) -> None:
513
if isinstance(self.pass_config, dict):
514
self.pass_config = PassConfig(**self.pass_config)
515
516
+ if (
517
+ is_torch_equal_or_newer("2.9.0.dev")
518
+ and "combo_kernels" not in self.inductor_compile_config
519
+ and "benchmark_combo_kernel" not in self.inductor_compile_config
520
+ ):
521
+ # use horizontal fusion, which is useful for fusing qk-norm and
522
+ # qk-rope when query and key have different shapes.
523
+ self.inductor_compile_config["combo_kernels"] = True
524
+ self.inductor_compile_config["benchmark_combo_kernel"] = True
525
+
526
# migrate the deprecated flags
527
if not self.use_cudagraph:
528
logger.warning(
0 commit comments