Fix tie_word_embeddings handling for GGUF models (#35085)

Isotr0py · web-flow · commit 482cb28a1878 · 2024-12-05T16:00:41.000+01:00
* fix tie_word_embeddings

Signed-off-by: Isotr0py &lt;2037008807@qq.com&gt;

* fix

Signed-off-by: Isotr0py &lt;2037008807@qq.com&gt;

---------

Signed-off-by: Isotr0py &lt;2037008807@qq.com&gt;
diff --git a/src/transformers/modeling_gguf_pytorch_utils.py b/src/transformers/modeling_gguf_pytorch_utils.py
@@ -291,7 +291,6 @@ def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False):
     # FIXME: Currnetly this implementation is only for flan-t5 architecture.
     # It needs to be developed for supporting legacy t5.
     elif "t5" in architecture or "t5encoder" in architecture:
-        parsed_parameters["config"]["tie_word_embeddings"] = False
         parsed_parameters["config"]["is_gated_act"] = True
         updated_architecture = "t5"
     else:
@@ -326,6 +325,12 @@ def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False):
     if architecture + model_size not in GGUF_SUPPORTED_ARCHITECTURES:
         raise ValueError(f"Architecture {architecture + model_size} not supported")
 
+    # Handle tie_word_embeddings, if lm_head.weight is not present in tensors,
+    # tie_word_embeddings is true otherwise false
+    parsed_parameters["config"]["tie_word_embeddings"] = all(
+        "output.weight" != tensor.name for tensor in reader.tensors
+    )
+
     # List all key-value pairs in a columnized format
     for gguf_key, field in reader.fields.items():
         gguf_key = gguf_key.replace(architecture, updated_architecture)