sync

flexflow · hugolatendresse · Dec 8, 2024 · Dec 2, 2024 · Dec 3, 2024 · Dec 3, 2024
commit 62bf0121f0a6d6fa1799aa71a57567414b1e2846
diff --git a/inference/models/mixtral.cc b/inference/models/mixtral.cc
@@ -75,7 +75,7 @@ void MIXTRAL::create_mixtral_model(FFModel &ff,
           mixtral_config.rms_norm_eps,
           mixtral_config.hidden_size,
           DT_NONE,
-          std::string("layers_" + std::to_string(i) + ".input_layernorm")
+          std::string("layers_" + std::to_string(i) + "_input_layernorm")
               .c_str());
     } else {
       ff.residual_rms_norm(
@@ -86,7 +86,7 @@ void MIXTRAL::create_mixtral_model(FFModel &ff,
           mixtral_config.hidden_size,
           false, // inplace_residual
           DT_NONE,
-          std::string("layers_" + std::to_string(i) + ".input_layernorm")
+          std::string("layers_" + std::to_string(i) + "_input_layernorm")
               .c_str());
       token = token_att_norm[0];
       att_norm = token_att_norm[1];
@@ -104,7 +104,7 @@ void MIXTRAL::create_mixtral_model(FFModel &ff,
           nullptr,       // ?
           REG_MODE_NONE, // no regularization
           0.0f,          // no dropout
-          std::string("layers_" + std::to_string(i) + ".self_attn.qkv_proj")
+          std::string("layers_" + std::to_string(i) + "_self_attn_qkv_proj")
               .c_str());
 
     Tensor mha;
@@ -126,7 +126,7 @@ void MIXTRAL::create_mixtral_model(FFModel &ff,
             1.0f,  /*scaling factor*/
             true,  /*qk_prod_scaling*/
             false, /*position_bias*/
-            std::string("layers_" + std::to_string(i) + ".self_attn")
+            std::string("layers_" + std::to_string(i) + "_self_attn")
                 .c_str() /*name*/
         );
         break;
@@ -148,7 +148,7 @@ void MIXTRAL::create_mixtral_model(FFModel &ff,
         nullptr,
         REG_MODE_NONE,
         0.0f,
-        std::string("layers_" + std::to_string(i) + ".self_attn.o_proj")
+        std::string("layers_" + std::to_string(i) + "_self_attn_o_proj")
             .c_str());
 
     // step 2: SILU activaion
@@ -161,7 +161,7 @@ void MIXTRAL::create_mixtral_model(FFModel &ff,
         mixtral_config.hidden_size,
         false, // inplace_residual
         DT_NONE,
-        std::string("layers_" + std::to_string(i) + ".post_attention_layernorm")
+        std::string("layers_" + std::to_string(i) + "_post_attention_layernorm")
             .c_str());
     token = token_ff_norm[0];
     Tensor ff_norm = token_ff_norm[1];