Skip to content

Commit 99df622

Browse files
author
angazenn
committed
rename
Signed-off-by: angazenn <zengyanjia@huawei.com>
1 parent 68fffc2 commit 99df622

File tree

2 files changed

+32
-16
lines changed

2 files changed

+32
-16
lines changed

vllm_ascend/models/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,5 +49,5 @@ def register_model():
4949
"vllm_ascend.models.qwen3_moe:CustomQwen3MoeForCausalLM")
5050

5151
ModelRegistry.register_model(
52-
"PanGuMoEForCausalLM",
53-
"vllm_ascend.models.pangu_moe:PanGuMoEForCausalLM")
52+
"PanguProMoEForCausalLM",
53+
"vllm_ascend.models.pangu_moe:PanguProMoEForCausalLM")

vllm_ascend/models/pangu_moe.py

Lines changed: 30 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,20 @@
1-
"""Inference-only PanGuMoE model compatible with HuggingFace weights."""
1+
#
2+
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3+
#
4+
# This file is a part of the vllm-ascend project.
5+
#
6+
# Licensed under the Apache License, Version 2.0 (the "License");
7+
# you may not use this file except in compliance with the License.
8+
# You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
218
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Union
319

420
import torch
@@ -40,7 +56,7 @@
4056
_ROUTER_SCALE = None
4157

4258

43-
class PanGuMoeMLP(nn.Module):
59+
class PanguProMoEMLP(nn.Module):
4460

4561
def __init__(
4662
self,
@@ -79,7 +95,7 @@ def forward(self, x):
7995
return x
8096

8197

82-
class PanGuMoeSparseMoeBlock(nn.Module):
98+
class PanguProMoESparseMoeBlock(nn.Module):
8399

84100
@staticmethod
85101
def pangu_group8_topk(
@@ -152,7 +168,7 @@ def __init__(
152168
intermediate_size=config.moe_intermediate_size,
153169
reduce_results=False,
154170
quant_config=quant_config,
155-
custom_routing_function=PanGuMoeSparseMoeBlock.pangu_group8_topk,
171+
custom_routing_function=PanguProMoESparseMoeBlock.pangu_group8_topk,
156172
prefix=f"{prefix}.experts",
157173
)
158174

@@ -165,7 +181,7 @@ def __init__(
165181
)
166182

167183
if config.shared_expert_intermediate_size > 0:
168-
self.shared_expert = PanGuMoeMLP(
184+
self.shared_expert = PanguProMoEMLP(
169185
hidden_size=config.hidden_size,
170186
intermediate_size=config.shared_expert_intermediate_size,
171187
hidden_act=config.hidden_act,
@@ -201,7 +217,7 @@ def forward(
201217
return final_hidden_states.view(num_tokens, hidden_dim)
202218

203219

204-
class PanGuMoeAttention(nn.Module):
220+
class PanguProMoEAttention(nn.Module):
205221

206222
def __init__(
207223
self,
@@ -288,7 +304,7 @@ def forward(
288304
return output
289305

290306

291-
class PanGuMoeDecoderLayer(nn.Module):
307+
class PanguProMoEDecoderLayer(nn.Module):
292308

293309
def __init__(
294310
self,
@@ -304,7 +320,7 @@ def __init__(
304320
max_position_embeddings = getattr(config, "max_position_embeddings",
305321
8192)
306322

307-
self.self_attn = PanGuMoeAttention(
323+
self.self_attn = PanguProMoEAttention(
308324
hidden_size=self.hidden_size,
309325
num_heads=config.num_attention_heads,
310326
num_kv_heads=config.num_key_value_heads,
@@ -322,13 +338,13 @@ def __init__(
322338
config.mlp_only_layers)
323339
if (layer_idx
324340
not in mlp_only_layers) and (config.num_experts > 0): ### ???
325-
self.mlp = PanGuMoeSparseMoeBlock(
341+
self.mlp = PanguProMoESparseMoeBlock(
326342
config=config,
327343
quant_config=quant_config,
328344
prefix=f"{prefix}.mlp",
329345
)
330346
else:
331-
self.mlp = PanGuMoeMLP(
347+
self.mlp = PanguProMoEMLP(
332348
hidden_size=config.hidden_size,
333349
intermediate_size=config.intermediate_size,
334350
hidden_act=config.hidden_act,
@@ -370,7 +386,7 @@ def forward(
370386

371387

372388
@support_torch_compile
373-
class PanGuMoEModel(nn.Module):
389+
class PanguProMoEModel(nn.Module):
374390

375391
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
376392
super().__init__()
@@ -390,7 +406,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
390406

391407
self.start_layer, self.end_layer, self.layers = make_layers(
392408
config.num_hidden_layers,
393-
lambda prefix: PanGuMoeDecoderLayer(config=config,
409+
lambda prefix: PanguProMoEDecoderLayer(config=config,
394410
cache_config=cache_config,
395411
quant_config=quant_config,
396412
prefix=prefix),
@@ -439,7 +455,7 @@ def forward(
439455
return hidden_states
440456

441457

442-
class PanGuMoEForCausalLM(nn.Module, SupportsPP):
458+
class PanguProMoEForCausalLM(nn.Module, SupportsPP):
443459

444460
fall_back_to_pt_during_load = False
445461

@@ -456,7 +472,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
456472
quant_config = vllm_config.quant_config
457473
self.config = config
458474
self.quant_config = quant_config
459-
self.model = PanGuMoEModel(vllm_config=vllm_config,
475+
self.model = PanguProMoEModel(vllm_config=vllm_config,
460476
prefix=maybe_prefix(prefix, "model"))
461477
self.lm_head = ParallelLMHead(
462478
config.vocab_size,

0 commit comments

Comments
 (0)