rename

angazenn · angazenn · commit 99df622fcbb7 · 2025-06-20T19:37:30.000+08:00
Signed-off-by: angazenn &lt;zengyanjia@huawei.com&gt;
diff --git a/vllm_ascend/models/__init__.py b/vllm_ascend/models/__init__.py
@@ -49,5 +49,5 @@ def register_model():
         "vllm_ascend.models.qwen3_moe:CustomQwen3MoeForCausalLM")
 
     ModelRegistry.register_model(
-        "PanGuMoEForCausalLM",
-        "vllm_ascend.models.pangu_moe:PanGuMoEForCausalLM")
+        "PanguProMoEForCausalLM",
+        "vllm_ascend.models.pangu_moe:PanguProMoEForCausalLM")
diff --git a/vllm_ascend/models/pangu_moe.py b/vllm_ascend/models/pangu_moe.py
@@ -1,4 +1,20 @@
-"""Inference-only PanGuMoE model compatible with HuggingFace weights."""
+#
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+#
+# This file is a part of the vllm-ascend project.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Union
 
 import torch
@@ -40,7 +56,7 @@
 _ROUTER_SCALE = None
 
 
-class PanGuMoeMLP(nn.Module):
+class PanguProMoEMLP(nn.Module):
 
     def __init__(
         self,
@@ -79,7 +95,7 @@ def forward(self, x):
         return x
 
 
-class PanGuMoeSparseMoeBlock(nn.Module):
+class PanguProMoESparseMoeBlock(nn.Module):
 
     @staticmethod
     def pangu_group8_topk(
@@ -152,7 +168,7 @@ def __init__(
             intermediate_size=config.moe_intermediate_size,
             reduce_results=False,
             quant_config=quant_config,
-            custom_routing_function=PanGuMoeSparseMoeBlock.pangu_group8_topk,
+            custom_routing_function=PanguProMoESparseMoeBlock.pangu_group8_topk,
             prefix=f"{prefix}.experts",
         )
 
@@ -165,7 +181,7 @@ def __init__(
         )
 
         if config.shared_expert_intermediate_size > 0:
-            self.shared_expert = PanGuMoeMLP(
+            self.shared_expert = PanguProMoEMLP(
                 hidden_size=config.hidden_size,
                 intermediate_size=config.shared_expert_intermediate_size,
                 hidden_act=config.hidden_act,
@@ -201,7 +217,7 @@ def forward(
         return final_hidden_states.view(num_tokens, hidden_dim)
 
 
-class PanGuMoeAttention(nn.Module):
+class PanguProMoEAttention(nn.Module):
 
     def __init__(
         self,
@@ -288,7 +304,7 @@ def forward(
         return output
 
 
-class PanGuMoeDecoderLayer(nn.Module):
+class PanguProMoEDecoderLayer(nn.Module):
 
     def __init__(
         self,
@@ -304,7 +320,7 @@ def __init__(
         max_position_embeddings = getattr(config, "max_position_embeddings",
                                           8192)
 
-        self.self_attn = PanGuMoeAttention(
+        self.self_attn = PanguProMoEAttention(
             hidden_size=self.hidden_size,
             num_heads=config.num_attention_heads,
             num_kv_heads=config.num_key_value_heads,
@@ -322,13 +338,13 @@ def __init__(
                            config.mlp_only_layers)
         if (layer_idx
                 not in mlp_only_layers) and (config.num_experts > 0):  ### ???
-            self.mlp = PanGuMoeSparseMoeBlock(
+            self.mlp = PanguProMoESparseMoeBlock(
                 config=config,
                 quant_config=quant_config,
                 prefix=f"{prefix}.mlp",
             )
         else:
-            self.mlp = PanGuMoeMLP(
+            self.mlp = PanguProMoEMLP(
                 hidden_size=config.hidden_size,
                 intermediate_size=config.intermediate_size,
                 hidden_act=config.hidden_act,
@@ -370,7 +386,7 @@ def forward(
 
 
 @support_torch_compile
-class PanGuMoEModel(nn.Module):
+class PanguProMoEModel(nn.Module):
 
     def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         super().__init__()
@@ -390,7 +406,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
 
         self.start_layer, self.end_layer, self.layers = make_layers(
             config.num_hidden_layers,
-            lambda prefix: PanGuMoeDecoderLayer(config=config,
+            lambda prefix: PanguProMoEDecoderLayer(config=config,
                                                 cache_config=cache_config,
                                                 quant_config=quant_config,
                                                 prefix=prefix),
@@ -439,7 +455,7 @@ def forward(
         return hidden_states
 
 
-class PanGuMoEForCausalLM(nn.Module, SupportsPP):
+class PanguProMoEForCausalLM(nn.Module, SupportsPP):
 
     fall_back_to_pt_during_load = False
 
@@ -456,7 +472,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         quant_config = vllm_config.quant_config
         self.config = config
         self.quant_config = quant_config
-        self.model = PanGuMoEModel(vllm_config=vllm_config,
+        self.model = PanguProMoEModel(vllm_config=vllm_config,
                                    prefix=maybe_prefix(prefix, "model"))
         self.lm_head = ParallelLMHead(
             config.vocab_size,