Skip to content

Commit cff73cd

Browse files
committed
rebase fixup
1 parent d1aae3a commit cff73cd

File tree

5 files changed

+2
-6
lines changed

5 files changed

+2
-6
lines changed

vllm/model_executor/layers/fused_moe/fused_moe.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
from typing import Any, Callable, Dict, List, Optional, Tuple
77

88
import torch
9-
import triton
10-
import triton.language as tl
119

1210
import vllm.envs as envs
1311
from vllm import _custom_ops as ops
@@ -25,6 +23,7 @@
2523
dequant_mxfp4,
2624
)
2725
from vllm.platforms import current_platform
26+
from vllm.triton_utils import tl, triton
2827
from vllm.utils import direct_register_custom_op
2928

3029
from .rocm_aiter_fused_moe import is_rocm_aiter_moe_enabled

vllm/model_executor/layers/quantization/quark/quark.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
import torch
77

88
from vllm.logger import init_logger
9-
import vllm.envs as envs
109
from vllm.model_executor.layers.fused_moe import FusedMoE
1110
from vllm.model_executor.layers.linear import (LinearBase, LinearMethodBase,
1211
UnquantizedLinearMethod)

vllm/model_executor/layers/quantization/quark/quark_moe.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,6 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
336336
layer.w2_weight_scale = None
337337

338338
# This call is necessary to release the scales memory.
339-
# TODO: is it still?
340339
torch.cuda.empty_cache()
341340

342341
def apply(

vllm/model_executor/layers/quantization/quark/schemes/quark_w4a4_mxfp4.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
4747
layer.weight_scale = None
4848

4949
# This call is necessary to release the scales memory.
50-
# TODO: is it still?
5150
torch.cuda.empty_cache()
5251

5352
def create_weights(self, layer: torch.nn.Module,

vllm/worker/model_runner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1795,7 +1795,7 @@ def execute_model(
17951795
])
17961796
else:
17971797
model_executable = self.model
1798-
1798+
17991799
# Receive KV cache in distributed KV cache transfer setting
18001800
# In disagg prefill setting, it will also recv hidden states and bypass
18011801
# model forwarding

0 commit comments

Comments
 (0)