|
70 | 70 | from vllm.sequence import IntermediateTensors
|
71 | 71 |
|
72 | 72 | from vllm_ascend.ascend_config import get_ascend_config
|
73 |
| -from vllm_ascend.ops.fused_moe import AscendFusedMoE |
| 73 | +from vllm_ascend.torchair.ops.torchair_fused_moe import TorchairAscendFusedMoE |
74 | 74 | from vllm_ascend.quantization.quant_config import AscendLinearMethod
|
75 | 75 | from vllm_ascend.quantization.w8a8_dynamic import AscendW8A8DynamicLinearMethod
|
76 | 76 | from vllm_ascend.utils import dispose_tensor, npu_prefetch
|
@@ -335,7 +335,7 @@ def __init__(
|
335 | 335 | else:
|
336 | 336 | self.gate.e_score_correction_bias = None
|
337 | 337 |
|
338 |
| - self.experts = AscendFusedMoE( |
| 338 | + self.experts = TorchairAscendFusedMoE( |
339 | 339 | num_experts=config.n_routed_experts,
|
340 | 340 | top_k=config.num_experts_per_tok,
|
341 | 341 | hidden_size=config.hidden_size,
|
@@ -951,7 +951,7 @@ def load_weights(self, weights: Iterable[tuple[str,
|
951 | 951 |
|
952 | 952 | # Params for weights, fp8 weight scales, fp8 activation scales
|
953 | 953 | # (param_name, weight_name, expert_id, shard_id)
|
954 |
| - expert_params_mapping = AscendFusedMoE.make_expert_params_mapping( |
| 954 | + expert_params_mapping = TorchairAscendFusedMoE.make_expert_params_mapping( |
955 | 955 | ckpt_gate_proj_name="gate_proj",
|
956 | 956 | ckpt_down_proj_name="down_proj",
|
957 | 957 | ckpt_up_proj_name="up_proj",
|
|
0 commit comments