Skip to content

Commit c545c67

Browse files
Copilotowndev
andcommitted
fix: Update thinking config per feedback
- Change max thinking_budget from 24576 to 32768 - Support all Gemini 3 models (not just Pro) for thinking_level - Make thinking config independent of include_thoughts - Update documentation to clarify model-specific settings Co-authored-by: owndev <69784886+owndev@users.noreply.github.com>
1 parent 521897d commit c545c67

File tree

2 files changed

+53
-36
lines changed

2 files changed

+53
-36
lines changed

docs/google-gemini-integration.md

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -126,18 +126,18 @@ GOOGLE_IMAGE_UPLOAD_FALLBACK=true
126126
# Default: true
127127
GOOGLE_INCLUDE_THOUGHTS=true
128128

129-
# Thinking budget for Gemini models (Gemini 2.5 and other thinking-capable models)
130-
# -1 = dynamic (model decides), 0 = disabled, 1-24576 = fixed token limit
129+
# Thinking budget for Gemini 2.5 models (not used for Gemini 3 models)
130+
# -1 = dynamic (model decides), 0 = disabled, 1-32768 = fixed token limit
131131
# Default: -1 (dynamic)
132-
# Note: This setting is ignored for Gemini 3 Pro models which use GOOGLE_THINKING_LEVEL instead
132+
# Note: Gemini 3 models use GOOGLE_THINKING_LEVEL instead
133133
GOOGLE_THINKING_BUDGET=-1
134134

135-
# Thinking level for Gemini 3 Pro models only
135+
# Thinking level for Gemini 3 models only
136136
# Valid values: "low", "high", or empty string for model default
137137
# - "low": Minimizes latency and cost, suitable for simple tasks
138138
# - "high": Maximizes reasoning depth, ideal for complex problem-solving
139139
# Default: "" (empty, uses model default)
140-
# Note: This setting is ignored for non-Gemini 3 Pro models
140+
# Note: This setting is ignored for non-Gemini 3 models
141141
GOOGLE_THINKING_LEVEL=""
142142

143143
# Enable streaming responses globally
@@ -252,13 +252,16 @@ The Google Gemini pipeline supports advanced thinking configuration to control h
252252
> [!Note]
253253
> For detailed information about thinking capabilities, see the [Google Gemini Thinking Documentation](https://ai.google.dev/gemini-api/docs/thinking).
254254
255-
### Thinking Levels (Gemini 3 Pro only)
255+
### Thinking Levels (Gemini 3 models)
256256

257-
Gemini 3 Pro models support the `thinking_level` parameter, which controls the depth of reasoning:
257+
Gemini 3 models support the `thinking_level` parameter, which controls the depth of reasoning:
258258

259259
- **`"low"`**: Minimizes latency and cost, suitable for simple tasks, chat, or high-throughput APIs.
260260
- **`"high"`**: Maximizes reasoning depth, ideal for complex problem-solving, code analysis, and agentic workflows.
261261

262+
> [!Note]
263+
> Gemini 3 models use `thinking_level` and do **not** use `thinking_budget`. The thinking budget setting is ignored for Gemini 3 models.
264+
262265
Set via environment variable:
263266

264267
```bash
@@ -288,13 +291,16 @@ response = client.models.generate_content(
288291
print(response.text)
289292
```
290293

291-
### Thinking Budget (Gemini 2.5 and other models)
294+
### Thinking Budget (Gemini 2.5 models)
292295

293-
For models that support thinking budgets (like Gemini 2.5), you can control the maximum number of tokens used during internal reasoning:
296+
For Gemini 2.5 models, you can control the maximum number of tokens used during internal reasoning using `thinking_budget`:
294297

295298
- **`0`**: Disables thinking entirely for fastest responses
296299
- **`-1`**: Dynamic thinking (model decides based on query complexity) - default
297-
- **`1-24576`**: Fixed token limit for reasoning
300+
- **`1-32768`**: Fixed token limit for reasoning
301+
302+
> [!Note]
303+
> Gemini 3 models do **not** use `thinking_budget`. Use `GOOGLE_THINKING_LEVEL` for Gemini 3 models instead.
298304
299305
Set via environment variable:
300306

@@ -352,7 +358,7 @@ print(response.text)
352358

353359
| Model | thinking_level | thinking_budget |
354360
|-------|---------------|-----------------|
355-
| gemini-3-pro-* | ✅ Supported ("low", "high") | ❌ Not used |
356-
| gemini-2.5-* | ❌ Not used | ✅ Supported (0-24576) |
361+
| gemini-3-* | ✅ Supported ("low", "high") | ❌ Not used |
362+
| gemini-2.5-* | ❌ Not used | ✅ Supported (0-32768) |
357363
| gemini-2.5-flash-image-* | ❌ Not supported | ❌ Not supported |
358364
| Other models | ❌ Not used | ✅ May be supported |

pipelines/google/google_gemini.py

Lines changed: 35 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -170,8 +170,8 @@ class Valves(BaseModel):
170170
)
171171
THINKING_BUDGET: int = Field(
172172
default=int(os.getenv("GOOGLE_THINKING_BUDGET", "-1")),
173-
description="Thinking budget for Gemini models (0=disabled, -1=dynamic, 1-24576=fixed token limit). "
174-
"Only applicable to models that support thinking (e.g., gemini-2.5-*, gemini-3-*).",
173+
description="Thinking budget for Gemini 2.5 models (0=disabled, -1=dynamic, 1-32768=fixed token limit). "
174+
"Not used for Gemini 3 models which use THINKING_LEVEL instead.",
175175
)
176176
THINKING_LEVEL: str = Field(
177177
default=os.getenv("GOOGLE_THINKING_LEVEL", ""),
@@ -696,22 +696,22 @@ def _check_thinking_level_support(self, model_id: str) -> bool:
696696
"""
697697
Check if a model supports the thinking_level parameter.
698698
699-
Currently, only Gemini 3 Pro models support thinking_level.
700-
Other models use thinking_budget instead.
699+
Gemini 3 models support thinking_level and should NOT use thinking_budget.
700+
Other models (like Gemini 2.5) use thinking_budget instead.
701701
702702
Args:
703703
model_id: The model ID to check
704704
705705
Returns:
706706
True if the model supports thinking_level, False otherwise
707707
"""
708-
# Gemini 3 Pro models support thinking_level
709-
gemini_3_pro_patterns = [
710-
"gemini-3-pro",
708+
# Gemini 3 models support thinking_level (not thinking_budget)
709+
gemini_3_patterns = [
710+
"gemini-3-",
711711
]
712712

713713
model_lower = model_id.lower()
714-
for pattern in gemini_3_pro_patterns:
714+
for pattern in gemini_3_patterns:
715715
if pattern in model_lower:
716716
return True
717717

@@ -750,7 +750,7 @@ def _validate_thinking_budget(self, budget: int) -> int:
750750
budget: The thinking budget integer to validate
751751
752752
Returns:
753-
Validated budget: -1 for dynamic, 0 to disable, or 1-24576 for fixed limit
753+
Validated budget: -1 for dynamic, 0 to disable, or 1-32768 for fixed limit
754754
"""
755755
# -1 means dynamic thinking (let the model decide)
756756
if budget == -1:
@@ -760,18 +760,18 @@ def _validate_thinking_budget(self, budget: int) -> int:
760760
if budget == 0:
761761
return 0
762762

763-
# Validate positive range (1-24576)
763+
# Validate positive range (1-32768)
764764
if budget > 0:
765-
if budget > 24576:
765+
if budget > 32768:
766766
self.log.warning(
767-
f"Thinking budget {budget} exceeds maximum of 24576. Clamping to 24576."
767+
f"Thinking budget {budget} exceeds maximum of 32768. Clamping to 32768."
768768
)
769-
return 24576
769+
return 32768
770770
return budget
771771

772772
# Negative values (except -1) are invalid, treat as -1 (dynamic)
773773
self.log.warning(
774-
f"Invalid thinking budget {budget}. Only -1 (dynamic), 0 (disabled), or 1-24576 are valid. "
774+
f"Invalid thinking budget {budget}. Only -1 (dynamic), 0 (disabled), or 1-32768 are valid. "
775775
"Falling back to dynamic thinking."
776776
)
777777
return -1
@@ -1474,18 +1474,25 @@ def _configure_generation(
14741474
if enable_image_generation:
14751475
gen_config_params["response_modalities"] = ["TEXT", "IMAGE"]
14761476

1477-
# Enable Gemini "Thinking" when requested (default: on) and supported by the model
1478-
include_thoughts = body.get("include_thoughts", True)
1479-
if not self.valves.INCLUDE_THOUGHTS:
1480-
include_thoughts = False
1481-
self.log.debug("Thoughts disabled via GOOGLE_INCLUDE_THOUGHTS")
1482-
1483-
if include_thoughts and self._check_thinking_support(model_id):
1477+
# Configure Gemini thinking/reasoning for models that support it
1478+
# This is independent of include_thoughts - thinking config controls HOW the model reasons,
1479+
# while include_thoughts controls whether the reasoning is shown in the output
1480+
if self._check_thinking_support(model_id):
14841481
try:
1485-
thinking_config_params: Dict[str, Any] = {"include_thoughts": True}
1482+
thinking_config_params: Dict[str, Any] = {}
1483+
1484+
# Determine include_thoughts setting
1485+
include_thoughts = body.get("include_thoughts", True)
1486+
if not self.valves.INCLUDE_THOUGHTS:
1487+
include_thoughts = False
1488+
self.log.debug(
1489+
"Thoughts output disabled via GOOGLE_INCLUDE_THOUGHTS"
1490+
)
1491+
thinking_config_params["include_thoughts"] = include_thoughts
14861492

1487-
# Check if model supports thinking_level (Gemini 3 Pro only)
1493+
# Check if model supports thinking_level (Gemini 3 models)
14881494
if self._check_thinking_level_support(model_id):
1495+
# For Gemini 3 models, use thinking_level (not thinking_budget)
14891496
validated_level = self._validate_thinking_level(
14901497
self.valves.THINKING_LEVEL
14911498
)
@@ -1494,8 +1501,12 @@ def _configure_generation(
14941501
self.log.debug(
14951502
f"Using thinking_level='{validated_level}' for model {model_id}"
14961503
)
1504+
else:
1505+
self.log.debug(
1506+
f"Using default thinking level for model {model_id}"
1507+
)
14971508
else:
1498-
# For non-Gemini 3 Pro models, use thinking_budget
1509+
# For non-Gemini 3 models (e.g., Gemini 2.5), use thinking_budget
14991510
validated_budget = self._validate_thinking_budget(
15001511
self.valves.THINKING_BUDGET
15011512
)

0 commit comments

Comments
 (0)