Merge branch 'main' into add-jais2-model

sarathc-cerebras · web-flow · commit efed368a3c91 · 2025-12-10T20:52:13.000+05:30
diff --git a/src/transformers/integrations/__init__.py b/src/transformers/integrations/__init__.py
@@ -54,6 +54,7 @@
     "finegrained_fp8": ["FP8Linear", "replace_with_fp8_linear"],
     "fsdp": ["is_fsdp_enabled", "is_fsdp_managed_module"],
     "ggml": [
+        "GGUF_CONFIG_DEFAULTS_MAPPING",
         "GGUF_CONFIG_MAPPING",
         "GGUF_TOKENIZER_MAPPING",
         "_gguf_parse_value",
@@ -201,6 +202,7 @@
     from .finegrained_fp8 import FP8Linear, replace_with_fp8_linear
     from .fsdp import is_fsdp_enabled, is_fsdp_managed_module
     from .ggml import (
+        GGUF_CONFIG_DEFAULTS_MAPPING,
         GGUF_CONFIG_MAPPING,
         GGUF_TOKENIZER_MAPPING,
         _gguf_parse_value,
diff --git a/src/transformers/integrations/ggml.py b/src/transformers/integrations/ggml.py
@@ -313,6 +313,16 @@
     },
 }
 
+# We only need to set here the parameters that default to different values between transformers and llamacpp.
+GGUF_CONFIG_DEFAULTS_MAPPING = {
+    "qwen3_moe": {
+        # NOTE: Qwen3MoeConfig defaults to false but llama.cpp needs this to be true.
+        # See: https://github.com/ggml-org/llama.cpp/blob/17f7f4baad8b3a716ee139da7bb56ae984e8c0fa/src/models/qwen3moe.cpp#L85-L96
+        #      (the parameter right after LLM_FFN_SILU corresponds to norm_topk_prob)
+        "norm_topk_prob": True,
+    },
+}
+
 
 def _gguf_parse_value(_value, data_type):
     if not isinstance(data_type, list):
diff --git a/src/transformers/modeling_gguf_pytorch_utils.py b/src/transformers/modeling_gguf_pytorch_utils.py
@@ -20,6 +20,7 @@
 from tqdm.auto import tqdm
 
 from .integrations import (
+    GGUF_CONFIG_DEFAULTS_MAPPING,
     GGUF_CONFIG_MAPPING,
     GGUF_TOKENIZER_MAPPING,
     _gguf_parse_value,
@@ -437,6 +438,13 @@ def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False, model_to_lo
         all("output.weight" != tensor.name for tensor in reader.tensors) or architecture in exceptions
     )
 
+    # Set GGUF-specific default values
+    config_defaults = GGUF_CONFIG_DEFAULTS_MAPPING.get(
+        updated_architecture, GGUF_CONFIG_DEFAULTS_MAPPING.get(architecture) or {}
+    )
+    for key, value in config_defaults.items():
+        parsed_parameters["config"].setdefault(key, value)
+
     # List all key-value pairs in a columnized format
     for gguf_key, field in reader.fields.items():
         gguf_key = gguf_key.replace(architecture, updated_architecture)