Skip to content

Commit ba089c4

Browse files
DarkLight1337Isotr0py
authored andcommitted
[Model] Remove unnecessary weight initialization logic (vllm-project#11736)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> Signed-off-by: Isotr0py <2037008807@qq.com> Co-authored-by: Isotr0py <2037008807@qq.com>
1 parent a98ee3e commit ba089c4

File tree

3 files changed

+5
-22
lines changed

3 files changed

+5
-22
lines changed

vllm/model_executor/layers/resampler.py

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
Shared resampler perceiver network used in multimodal models and
2828
related helpers for sincos positional embeddings.
2929
30-
Example models: Qwen (Qwen-VL), Minicpmv2.0
30+
Example models: Qwen (Qwen-VL), MiniCPM-V 2.0
3131
"""
3232
import math
3333
from functools import partial
@@ -37,7 +37,6 @@
3737
import torch
3838
import torch.nn.functional as F
3939
from torch import nn
40-
from torch.nn.init import trunc_normal_
4140

4241
from vllm.model_executor.layers.linear import ReplicatedLinear
4342
from vllm.model_executor.layers.quantization import QuantizationConfig
@@ -169,8 +168,8 @@ def __init__(self,
169168
self.embed_dim = embed_dim
170169
self.num_heads = num_heads
171170

172-
self.query = nn.Parameter(torch.zeros(self.num_queries, embed_dim))
173-
trunc_normal_(self.query, std=0.02)
171+
self.query = nn.Parameter(torch.empty(self.num_queries, embed_dim))
172+
174173
if kv_dim is not None and kv_dim != embed_dim:
175174
self.kv_proj = ReplicatedLinear(kv_dim,
176175
embed_dim,
@@ -190,16 +189,7 @@ def __init__(self,
190189
self.ln_post = norm_layer(embed_dim) if do_post_projection else None
191190
self.proj = nn.Parameter(
192191
(embed_dim**-0.5) *
193-
torch.randn(embed_dim, embed_dim)) if do_post_projection else None
194-
195-
def _init_weights(self, m: nn.Module) -> None:
196-
if isinstance(m, nn.Linear):
197-
trunc_normal_(m.weight, std=0.02)
198-
if isinstance(m, nn.Linear) and m.bias is not None:
199-
nn.init.constant_(m.bias, 0)
200-
elif isinstance(m, nn.LayerNorm):
201-
nn.init.constant_(m.bias, 0)
202-
nn.init.constant_(m.weight, 1.0)
192+
torch.empty(embed_dim, embed_dim)) if do_post_projection else None
203193

204194
def _repeat(self, query, N: int):
205195
return query.unsqueeze(1).repeat(1, N, 1)
@@ -240,8 +230,6 @@ def __init__(self,
240230
self.pos_embed = nn.Parameter(
241231
torch.from_numpy(pos_embed_arr).requires_grad_(False))
242232

243-
self.apply(self._init_weights)
244-
245233
def forward(
246234
self,
247235
x: torch.Tensor,

vllm/model_executor/models/aria.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33

44
import torch
55
import torch.nn as nn
6-
from torch.nn.init import trunc_normal_
76
from transformers import BatchFeature, PretrainedConfig
87

98
from vllm.attention import AttentionMetadata
@@ -216,9 +215,7 @@ def __init__(
216215
self.num_heads = num_heads
217216

218217
self.query = nn.Parameter(
219-
torch.zeros(max(patch_to_query_dict.values()), self.embed_dim))
220-
221-
trunc_normal_(self.query, std=0.02)
218+
torch.empty(max(patch_to_query_dict.values()), self.embed_dim))
222219

223220
self.cross_attn = CrossAttention(kv_dim, embed_dim, num_heads)
224221

vllm/model_executor/models/minicpmv.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -141,8 +141,6 @@ def __init__(self,
141141
self.max_size = max_size
142142
self._set_2d_pos_cache(self.max_size)
143143

144-
self.apply(self._init_weights)
145-
146144
def _set_2d_pos_cache(self,
147145
max_size: Tuple[int, int],
148146
device: torch.types.Device = "cpu") -> None:

0 commit comments

Comments
 (0)