xjpang
diff --git a/‎vllm/model_executor/layers/layernorm.py‎
Lines changed: 0 additions & 25 deletions b/‎vllm/model_executor/layers/layernorm.py‎
Lines changed: 0 additions & 25 deletions
diff --git a/‎vllm/model_executor/models/__init__.py‎
Lines changed: 5 additions & 4 deletions b/‎vllm/model_executor/models/__init__.py‎
Lines changed: 5 additions & 4 deletions
@@ -7,31 +7,6 @@
 from vllm._C import ops
 
 
-class LayerNorm(nn.LayerNorm):
-
-    def __init__(
-        self,
-        hidden_size: int,
-        eps: float = 1e-6,
-    ) -> None:
-        super().__init__(hidden_size, eps=eps)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        residual: Optional[torch.Tensor] = None,
-    ) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
-        """normalization."""
-        if residual is not None:
-            x = x + residual
-            residual = x
-        x = super().forward(x)
-        if residual is None:
-            return x
-        else:
-            return x, residual
-
-
 class RMSNorm(nn.Module):
     """Root mean square normalization.
 
 
@@ -10,8 +10,8 @@
 
 # Architecture -> (module, class).
 _MODELS = {
-    "AquilaModel": ("llama", "LlamaForCausalLM"),
-    "AquilaForCausalLM": ("llama", "LlamaForCausalLM"),  # AquilaChat2
+    "AquilaModel": ("aquila", "AquilaForCausalLM"),
+    "AquilaForCausalLM": ("aquila", "AquilaForCausalLM"),  # AquilaChat2
     "BaiChuanForCausalLM": ("baichuan", "BaiChuanForCausalLM"),  # baichuan-7b
     "BaichuanForCausalLM": ("baichuan", "BaichuanForCausalLM"),  # baichuan-13b
     "BloomForCausalLM": ("bloom", "BloomForCausalLM"),
@@ -24,12 +24,12 @@
     "GPTBigCodeForCausalLM": ("gpt_bigcode", "GPTBigCodeForCausalLM"),
     "GPTJForCausalLM": ("gpt_j", "GPTJForCausalLM"),
     "GPTNeoXForCausalLM": ("gpt_neox", "GPTNeoXForCausalLM"),
-    "InternLMForCausalLM": ("llama", "LlamaForCausalLM"),
+    "InternLMForCausalLM": ("internlm", "InternLMForCausalLM"),
     "InternLM2ForCausalLM": ("internlm2", "InternLM2ForCausalLM"),
     "LlamaForCausalLM": ("llama", "LlamaForCausalLM"),
     # For decapoda-research/llama-*
     "LLaMAForCausalLM": ("llama", "LlamaForCausalLM"),
-    "MistralForCausalLM": ("llama", "LlamaForCausalLM"),
+    "MistralForCausalLM": ("mistral", "MistralForCausalLM"),
     "MixtralForCausalLM": ("mixtral", "MixtralForCausalLM"),
     "QuantMixtralForCausalLM": ("mixtral_quant", "MixtralForCausalLM"),
     # transformers's mpt class has lower case
@@ -41,6 +41,7 @@
     "Qwen2ForCausalLM": ("qwen2", "Qwen2ForCausalLM"),
     "RWForCausalLM": ("falcon", "FalconForCausalLM"),
     "StableLMEpochForCausalLM": ("stablelm", "StablelmForCausalLM"),
+    "YiForCausalLM": ("yi", "YiForCausalLM")
 }
 
 # Models not supported by ROCm.