Skip to content

Commit d86c30c

Browse files
committed
fix: Use correct GGUF metadata keys for softcapping
The GGUF metadata stores softcapping as: - gemma2.attn_logit_softcapping - gemma2.final_logit_softcapping After stripping the architecture prefix, the mapping keys should be 'attn_logit_softcapping' and 'final_logit_softcapping', not 'attention.logit_softcapping' which doesn't exist in GGUF metadata. Also adds final_logit_softcapping mapping which was missing. Signed-off-by: Christina <truffle@gmail.com>
1 parent cecafdd commit d86c30c

File tree

2 files changed

+13
-7
lines changed

2 files changed

+13
-7
lines changed

src/transformers/integrations/ggml.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,8 @@
245245
"attention.head_count_kv": "num_key_value_heads",
246246
"attention.layer_norm_rms_epsilon": "rms_norm_eps",
247247
"attention.sliding_window": "sliding_window",
248-
"attention.logit_softcapping": "attn_logit_softcapping",
248+
"attn_logit_softcapping": "attn_logit_softcapping",
249+
"final_logit_softcapping": "final_logit_softcapping",
249250
"vocab_size": "vocab_size",
250251
},
251252
"gemma3": {
@@ -262,7 +263,8 @@
262263
"attention.head_count_kv": "num_key_value_heads",
263264
"attention.layer_norm_rms_epsilon": "rms_norm_eps",
264265
"attention.sliding_window": "sliding_window",
265-
"attention.logit_softcapping": "attn_logit_softcapping",
266+
"attn_logit_softcapping": "attn_logit_softcapping",
267+
"final_logit_softcapping": "final_logit_softcapping",
266268
"vocab_size": "vocab_size",
267269
},
268270
"umt5": {

tests/quantization/ggml/test_ggml.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1045,20 +1045,24 @@ def test_deci_config_mapping(self):
10451045
self.assertIsNone(deci_mapping["rope.dimension_count"])
10461046

10471047
def test_gemma_softcap_config_mapping(self):
1048-
"""Test that Gemma2/Gemma3 GGUF config mapping includes attn_logit_softcapping."""
1048+
"""Test that Gemma2/Gemma3 GGUF config mapping includes softcapping parameters."""
10491049
from transformers.integrations.ggml import GGUF_CONFIG_MAPPING
10501050

10511051
# Test Gemma2
10521052
self.assertIn("gemma2", GGUF_CONFIG_MAPPING)
10531053
gemma2_mapping = GGUF_CONFIG_MAPPING["gemma2"]
1054-
self.assertIn("attention.logit_softcapping", gemma2_mapping)
1055-
self.assertEqual(gemma2_mapping["attention.logit_softcapping"], "attn_logit_softcapping")
1054+
self.assertIn("attn_logit_softcapping", gemma2_mapping)
1055+
self.assertEqual(gemma2_mapping["attn_logit_softcapping"], "attn_logit_softcapping")
1056+
self.assertIn("final_logit_softcapping", gemma2_mapping)
1057+
self.assertEqual(gemma2_mapping["final_logit_softcapping"], "final_logit_softcapping")
10561058

10571059
# Test Gemma3
10581060
self.assertIn("gemma3", GGUF_CONFIG_MAPPING)
10591061
gemma3_mapping = GGUF_CONFIG_MAPPING["gemma3"]
1060-
self.assertIn("attention.logit_softcapping", gemma3_mapping)
1061-
self.assertEqual(gemma3_mapping["attention.logit_softcapping"], "attn_logit_softcapping")
1062+
self.assertIn("attn_logit_softcapping", gemma3_mapping)
1063+
self.assertEqual(gemma3_mapping["attn_logit_softcapping"], "attn_logit_softcapping")
1064+
self.assertIn("final_logit_softcapping", gemma3_mapping)
1065+
self.assertEqual(gemma3_mapping["final_logit_softcapping"], "final_logit_softcapping")
10621066

10631067
def test_deci_architecture_mapping(self):
10641068
"""Test that Deci architectures are mapped to GGUFLlamaConverter."""

0 commit comments

Comments
 (0)