@@ -13,7 +13,7 @@ class TensorNameMap:
1313 "transformer.wte" , # gpt2 gpt-j mpt refact qwen dbrx jais exaone
1414 "transformer.word_embeddings" , # falcon
1515 "word_embeddings" , # bloom
16- "model.embed_tokens" , # llama-hf nemotron olmoe olmo2 rwkv6qwen2 glm4-0414
16+ "model.embed_tokens" , # llama-hf nemotron olmoe olmo2 rwkv6qwen2 glm4-0414 exaone4
1717 "tok_embeddings" , # llama-pth
1818 "embeddings.word_embeddings" , # bert nomic-bert
1919 "language_model.embedding.word_embeddings" , # persimmon
@@ -62,7 +62,7 @@ class TensorNameMap:
6262 # Output
6363 MODEL_TENSOR .OUTPUT : (
6464 "embed_out" , # gptneox
65- "lm_head" , # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo2 phimoe
65+ "lm_head" , # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone exaone4 olmoe olmo2 phimoe
6666 "output" , # llama-pth bloom internlm2
6767 "word_embeddings_for_head" , # persimmon
6868 "lm_head.linear" , # phi2
@@ -76,7 +76,7 @@ class TensorNameMap:
7676 MODEL_TENSOR .OUTPUT_NORM : (
7777 "gpt_neox.final_layer_norm" , # gptneox
7878 "transformer.ln_f" , # gpt2 gpt-j falcon jais exaone
79- "model.norm" , # llama-hf baichuan internlm2 olmoe olmo2 phimoe
79+ "model.norm" , # llama-hf baichuan internlm2 olmoe olmo2 phimoe exaone4
8080 "norm" , # llama-pth
8181 "transformer.norm_f" , # mpt dbrx
8282 "ln_f" , # refact bloom qwen gpt2
@@ -168,7 +168,7 @@ class TensorNameMap:
168168
169169 # Attention query
170170 MODEL_TENSOR .ATTN_Q : (
171- "model.layers.{bid}.self_attn.q_proj" , # llama-hf nemotron olmoe olmo2 phimoe
171+ "model.layers.{bid}.self_attn.q_proj" , # llama-hf nemotron olmoe olmo2 phimoe exaone4
172172 "model.layers.{bid}.self_attn.q_proj_no_perm" , # llama-custom
173173 "layers.{bid}.attention.wq" , # llama-pth
174174 "encoder.layer.{bid}.attention.self.query" , # bert
@@ -183,7 +183,7 @@ class TensorNameMap:
183183
184184 # Attention key
185185 MODEL_TENSOR .ATTN_K : (
186- "model.layers.{bid}.self_attn.k_proj" , # llama-hf nemotron olmoe olmo2 phimoe
186+ "model.layers.{bid}.self_attn.k_proj" , # llama-hf nemotron olmoe olmo2 phimoe exaone4
187187 "model.layers.{bid}.self_attn.k_proj_no_perm" , # llama-custom
188188 "layers.{bid}.attention.wk" , # llama-pth
189189 "encoder.layer.{bid}.attention.self.key" , # bert
@@ -199,7 +199,7 @@ class TensorNameMap:
199199
200200 # Attention value
201201 MODEL_TENSOR .ATTN_V : (
202- "model.layers.{bid}.self_attn.v_proj" , # llama-hf nemotron olmoe olmo2 phimoe
202+ "model.layers.{bid}.self_attn.v_proj" , # llama-hf nemotron olmoe olmo2 phimoe exaone4
203203 "layers.{bid}.attention.wv" , # llama-pth
204204 "encoder.layer.{bid}.attention.self.value" , # bert
205205 "transformer.layer.{bid}.attention.v_lin" , # distillbert
@@ -219,7 +219,7 @@ class TensorNameMap:
219219 "transformer.blocks.{bid}.attn.out_proj" , # mpt
220220 "transformer.h.{bid}.self_attention.dense" , # falcon
221221 "h.{bid}.self_attention.dense" , # bloom
222- "model.layers.{bid}.self_attn.o_proj" , # llama-hf nemotron olmoe olmo2 phimoe
222+ "model.layers.{bid}.self_attn.o_proj" , # llama-hf nemotron olmoe olmo2 phimoe exaone4
223223 "model.layers.{bid}.self_attn.linear_attn" , # deci
224224 "layers.{bid}.attention.wo" , # llama-pth
225225 "encoder.layer.{bid}.attention.output.dense" , # bert
@@ -252,7 +252,7 @@ class TensorNameMap:
252252 ),
253253
254254 MODEL_TENSOR .ATTN_POST_NORM : (
255- "model.layers.{bid}.post_attention_layernorm" , # gemma2 olmo2 # ge
255+ "model.layers.{bid}.post_attention_layernorm" , # gemma2 olmo2 exaone4 # ge
256256 "model.layers.{bid}.post_self_attn_layernorm" , # glm-4-0414
257257 ),
258258
@@ -290,7 +290,7 @@ class TensorNameMap:
290290
291291 # Post feed-forward norm
292292 MODEL_TENSOR .FFN_POST_NORM : (
293- "model.layers.{bid}.post_feedforward_layernorm" , # gemma2 olmo2
293+ "model.layers.{bid}.post_feedforward_layernorm" , # gemma2 olmo2 exaone4
294294 "model.layers.{bid}.post_mlp_layernorm" , # glm-4-0414
295295 ),
296296
@@ -321,7 +321,7 @@ class TensorNameMap:
321321 "transformer.blocks.{bid}.ffn.up_proj" , # mpt
322322 "transformer.h.{bid}.mlp.dense_h_to_4h" , # falcon
323323 "h.{bid}.mlp.dense_h_to_4h" , # bloom
324- "model.layers.{bid}.mlp.up_proj" , # llama-hf refact nemotron olmo2
324+ "model.layers.{bid}.mlp.up_proj" , # llama-hf refact nemotron olmo2 exaone4
325325 "layers.{bid}.feed_forward.w3" , # llama-pth
326326 "encoder.layer.{bid}.intermediate.dense" , # bert
327327 "transformer.layer.{bid}.ffn.lin1" , # distillbert
@@ -373,7 +373,7 @@ class TensorNameMap:
373373
374374 # Feed-forward gate
375375 MODEL_TENSOR .FFN_GATE : (
376- "model.layers.{bid}.mlp.gate_proj" , # llama-hf refact olmo2
376+ "model.layers.{bid}.mlp.gate_proj" , # llama-hf refact olmo2 exaone4
377377 "layers.{bid}.feed_forward.w1" , # llama-pth
378378 "transformer.h.{bid}.mlp.w2" , # qwen
379379 "transformer.h.{bid}.mlp.c_fc2" , # jais
@@ -410,7 +410,7 @@ class TensorNameMap:
410410 "transformer.blocks.{bid}.ffn.down_proj" , # mpt
411411 "transformer.h.{bid}.mlp.dense_4h_to_h" , # falcon
412412 "h.{bid}.mlp.dense_4h_to_h" , # bloom
413- "model.layers.{bid}.mlp.down_proj" , # llama-hf nemotron olmo2
413+ "model.layers.{bid}.mlp.down_proj" , # llama-hf nemotron olmo2 exaone4
414414 "layers.{bid}.feed_forward.w2" , # llama-pth
415415 "encoder.layer.{bid}.output.dense" , # bert
416416 "transformer.layer.{bid}.ffn.lin2" , # distillbert
@@ -457,7 +457,7 @@ class TensorNameMap:
457457 "language_model.encoder.layers.{bid}.self_attention.q_layernorm" ,
458458 "model.layers.{bid}.self_attn.q_layernorm" , # persimmon
459459 "model.layers.{bid}.self_attn.query_layernorm" , # hunyuan
460- "model.layers.{bid}.self_attn.q_norm" , # cohere olmoe chameleon olmo2
460+ "model.layers.{bid}.self_attn.q_norm" , # cohere olmoe chameleon olmo2 exaone4
461461 "transformer.blocks.{bid}.attn.q_ln" , # sea-lion
462462 "encoder.layer.{bid}.attention.self.layer_norm_q" , # jina-bert-v2
463463 "transformer.layers.{bid}.attn.q_norm" , # openelm
@@ -467,7 +467,7 @@ class TensorNameMap:
467467 "language_model.encoder.layers.{bid}.self_attention.k_layernorm" ,
468468 "model.layers.{bid}.self_attn.k_layernorm" , # persimmon
469469 "model.layers.{bid}.self_attn.key_layernorm" , # hunyuan
470- "model.layers.{bid}.self_attn.k_norm" , # cohere olmoe chameleon olmo2
470+ "model.layers.{bid}.self_attn.k_norm" , # cohere olmoe chameleon olmo2 exaone4
471471 "transformer.blocks.{bid}.attn.k_ln" , # sea-lion
472472 "encoder.layer.{bid}.attention.self.layer_norm_k" , # jina-bert-v2
473473 "transformer.layers.{bid}.attn.k_norm" , # openelm
0 commit comments