@@ -1123,6 +1123,34 @@ def input_processor_for_molmo(ctx: InputContext, inputs: DecoderOnlyInputs):
11231123@INPUT_REGISTRY .register_input_processor (input_processor_for_molmo )
11241124class MolmoForCausalLM (nn .Module , SupportsMultiModal , SupportsPP ):
11251125
1126+ hf_to_vllm_mapper = WeightsMapper (
1127+ orig_to_new_substr = {
1128+ # vision backbone mapping
1129+ "image_projector.w1." : "image_projector.gate_proj." ,
1130+ "image_projector.w3." : "image_projector.up_proj." ,
1131+ "image_projector.w2." : "image_projector.down_proj." ,
1132+ # language backbone mapping
1133+ "att_proj" : "self_attn.qkv_proj" ,
1134+ "attn_out" : "self_attn.o_proj" ,
1135+ "q_norm" : "self_attn.q_norm" ,
1136+ "k_norm" : "self_attn.k_norm" ,
1137+ "ff_proj" : "mlp.gate_up_proj" ,
1138+ "ff_out" : "mlp.down_proj" ,
1139+ "attn_norm" : "input_layernorm" ,
1140+ "ff_norm" : "post_attention_layernorm" ,
1141+ },
1142+ orig_to_new_prefix = {
1143+ # vision backbone mapping
1144+ "model.vision_backbone." : "vision_backbone." ,
1145+ # language backbone mapping
1146+ "model.transformer.blocks." : "model.layers." ,
1147+ "model.transformer.ln_f." : "model.norm." ,
1148+ # lm_head is renamed to model.transformer.mlp.down_proj firstly,
1149+ # we need to run a second renaming for it
1150+ "model.transformer.mlp.down_proj." : "lm_head." ,
1151+ },
1152+ )
1153+
11261154 def __init__ (self , * , vllm_config : VllmConfig , prefix : str = "" ):
11271155 super ().__init__ ()
11281156 config = vllm_config .model_config .hf_config
@@ -1298,36 +1326,10 @@ def sample(
12981326 return next_tokens
12991327
13001328 def load_weights (self , weights : Iterable [Tuple [str , torch .Tensor ]]):
1301- hf_to_vllm_mapper = WeightsMapper (
1302- orig_to_new_substr = {
1303- # vision backbone mapping
1304- "image_projector.w1." : "image_projector.gate_proj." ,
1305- "image_projector.w3." : "image_projector.up_proj." ,
1306- "image_projector.w2." : "image_projector.down_proj." ,
1307- # language backbone mapping
1308- "att_proj" : "self_attn.qkv_proj" ,
1309- "attn_out" : "self_attn.o_proj" ,
1310- "q_norm" : "self_attn.q_norm" ,
1311- "k_norm" : "self_attn.k_norm" ,
1312- "ff_proj" : "mlp.gate_up_proj" ,
1313- "ff_out" : "mlp.down_proj" ,
1314- "attn_norm" : "input_layernorm" ,
1315- "ff_norm" : "post_attention_layernorm" ,
1316- },
1317- orig_to_new_prefix = {
1318- # vision backbone mapping
1319- "model.vision_backbone." : "vision_backbone." ,
1320- # language backbone mapping
1321- "model.transformer.blocks." : "model.layers." ,
1322- "model.transformer.ln_f." : "model.norm." ,
1323- # lm_head is renamed to model.transformer.mlp.down_proj firstly,
1324- # we need to run a second renaming for it
1325- "model.transformer.mlp.down_proj." : "lm_head." ,
1326- },
1327- )
1329+
13281330 loader = AutoWeightsLoader (self )
13291331 weights = _get_weights_with_merged_embedding (weights )
1330- return loader .load_weights (weights , mapper = hf_to_vllm_mapper )
1332+ return loader .load_weights (weights , mapper = self . hf_to_vllm_mapper )
13311333
13321334
13331335def _get_weights_with_merged_embedding (
0 commit comments