fix(google_gemini): Update thinking budgets from 24576 to 32768 tokens

owndev · web-flow · commit 3e30764e18d7 · 2025-11-25T09:38:01.000+01:00
diff --git a/docs/google-gemini-integration.md b/docs/google-gemini-integration.md
@@ -26,7 +26,7 @@ This integration enables **Open WebUI** to interact with **Google Gemini** model
 > Streaming is automatically disabled for image generation models to prevent chunk size issues.
 
 - **Thinking Support**  
-  Support reasoning and thinking steps, allowing models to break down complex tasks. Includes configurable thinking levels for Gemini 3 Pro ("low"/"high") and thinking budgets (0-24576 tokens) for other thinking-capable models.
+  Support reasoning and thinking steps, allowing models to break down complex tasks. Includes configurable thinking levels for Gemini 3 Pro ("low"/"high") and thinking budgets (0-32768 tokens) for other thinking-capable models.
 
   > [!Note]
   > **Thinking Levels vs Thinking Budgets**: Gemini 3 Pro models use `thinking_level` ("low" or "high"), while other models like Gemini 2.5 use `thinking_budget` (token count). See [Gemini Thinking Documentation](https://ai.google.dev/gemini-api/docs/thinking) for details.
diff --git a/pipelines/google/google_gemini.py b/pipelines/google/google_gemini.py
@@ -32,8 +32,8 @@
   - Optimized payload creation for image generation models
   - Configurable image processing parameters (size, quality, compression)
   - Flexible upload fallback options and optimization controls
-  - Configurable thinking levels (low/high) for Gemini 3 Pro models
-  - Configurable thinking budgets (0-24576 tokens) for Gemini 2.5 models
+  - Configurable thinking levels (low/high) for Gemini 3 models
+  - Configurable thinking budgets (0-32768 tokens) for Gemini 2.5 models
 """
 
 import os
@@ -175,7 +175,7 @@ class Valves(BaseModel):
         )
         THINKING_LEVEL: str = Field(
             default=os.getenv("GOOGLE_THINKING_LEVEL", ""),
-            description="Thinking level for Gemini 3 Pro models only ('low' or 'high'). "
+            description="Thinking level for Gemini 3 models ('low' or 'high'). "
             "Ignored for other models. Empty string means use model default.",
         )
         USE_VERTEX_AI: bool = Field(
@@ -1523,6 +1523,7 @@ def _configure_generation(
                         )
                     else:
                         # -1 means dynamic thinking
+                        thinking_config_params["thinking_budget"] = -1
                         self.log.debug(
                             f"Using dynamic thinking (model decides) for model {model_id}"
                         )