Skip to content

Commit efed368

Browse files
Merge branch 'main' into add-jais2-model
2 parents 799e117 + 51a6673 commit efed368

File tree

3 files changed

+20
-0
lines changed

3 files changed

+20
-0
lines changed

src/transformers/integrations/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
"finegrained_fp8": ["FP8Linear", "replace_with_fp8_linear"],
5555
"fsdp": ["is_fsdp_enabled", "is_fsdp_managed_module"],
5656
"ggml": [
57+
"GGUF_CONFIG_DEFAULTS_MAPPING",
5758
"GGUF_CONFIG_MAPPING",
5859
"GGUF_TOKENIZER_MAPPING",
5960
"_gguf_parse_value",
@@ -201,6 +202,7 @@
201202
from .finegrained_fp8 import FP8Linear, replace_with_fp8_linear
202203
from .fsdp import is_fsdp_enabled, is_fsdp_managed_module
203204
from .ggml import (
205+
GGUF_CONFIG_DEFAULTS_MAPPING,
204206
GGUF_CONFIG_MAPPING,
205207
GGUF_TOKENIZER_MAPPING,
206208
_gguf_parse_value,

src/transformers/integrations/ggml.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,16 @@
313313
},
314314
}
315315

316+
# We only need to set here the parameters that default to different values between transformers and llamacpp.
317+
GGUF_CONFIG_DEFAULTS_MAPPING = {
318+
"qwen3_moe": {
319+
# NOTE: Qwen3MoeConfig defaults to false but llama.cpp needs this to be true.
320+
# See: https://github.com/ggml-org/llama.cpp/blob/17f7f4baad8b3a716ee139da7bb56ae984e8c0fa/src/models/qwen3moe.cpp#L85-L96
321+
# (the parameter right after LLM_FFN_SILU corresponds to norm_topk_prob)
322+
"norm_topk_prob": True,
323+
},
324+
}
325+
316326

317327
def _gguf_parse_value(_value, data_type):
318328
if not isinstance(data_type, list):

src/transformers/modeling_gguf_pytorch_utils.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from tqdm.auto import tqdm
2121

2222
from .integrations import (
23+
GGUF_CONFIG_DEFAULTS_MAPPING,
2324
GGUF_CONFIG_MAPPING,
2425
GGUF_TOKENIZER_MAPPING,
2526
_gguf_parse_value,
@@ -437,6 +438,13 @@ def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False, model_to_lo
437438
all("output.weight" != tensor.name for tensor in reader.tensors) or architecture in exceptions
438439
)
439440

441+
# Set GGUF-specific default values
442+
config_defaults = GGUF_CONFIG_DEFAULTS_MAPPING.get(
443+
updated_architecture, GGUF_CONFIG_DEFAULTS_MAPPING.get(architecture) or {}
444+
)
445+
for key, value in config_defaults.items():
446+
parsed_parameters["config"].setdefault(key, value)
447+
440448
# List all key-value pairs in a columnized format
441449
for gguf_key, field in reader.fields.items():
442450
gguf_key = gguf_key.replace(architecture, updated_architecture)

0 commit comments

Comments
 (0)