@@ -34,6 +34,7 @@ def __init__(
3434 self .api_key = model_cfg .api_key
3535 self .random_seed = getattr (model_cfg , "random_seed" , None )
3636 self .reasoning_effort = getattr (model_cfg , "reasoning_effort" , None )
37+ self .api_type = getattr (model_cfg , "api_type" , "auto" )
3738
3839 # Set up API client
3940 # OpenAI client requires max_retries to be int, not None
@@ -45,6 +46,9 @@ def __init__(
4546 max_retries = max_retries ,
4647 )
4748
49+ # Determine which API to use (Responses API vs Chat Completions)
50+ self .use_responses_api = self ._should_use_responses_api ()
51+
4852 # Only log unique models to reduce duplication
4953 if not hasattr (logger , "_initialized_models" ):
5054 logger ._initialized_models = set ()
@@ -53,6 +57,39 @@ def __init__(
5357 logger .info (f"Initialized OpenAI LLM with model: { self .model } " )
5458 logger ._initialized_models .add (self .model )
5559
60+ def _should_use_responses_api (self ) -> bool :
61+ """
62+ Determine if the Responses API should be used instead of Chat Completions.
63+
64+ The Responses API is only available on OpenAI's official endpoints.
65+ For other providers (OpenRouter, Google AI Studio, local servers, etc.),
66+ we must use the Chat Completions API for compatibility.
67+
68+ Returns:
69+ True if Responses API should be used, False for Chat Completions
70+ """
71+ # Normalize api_type (None defaults to "auto")
72+ api_type = self .api_type if self .api_type is not None else "auto"
73+
74+ # Check for explicit override
75+ if api_type == "responses" :
76+ return True
77+ if api_type == "chat_completions" :
78+ return False
79+
80+ # Auto-detect based on API base URL
81+ if not self .api_base :
82+ return False
83+
84+ api_lower = self .api_base .lower ()
85+
86+ # Only use Responses API for official OpenAI endpoints
87+ return (
88+ api_lower .startswith ("https://api.openai.com" ) or
89+ api_lower .startswith ("https://eu.api.openai.com" ) or
90+ api_lower .startswith ("https://apac.api.openai.com" )
91+ )
92+
5693 async def generate (self , prompt : str , ** kwargs ) -> str :
5794 """Generate text from a prompt"""
5895 return await self .generate_with_context (
@@ -159,14 +196,82 @@ async def generate_with_context(
159196 raise
160197
161198 async def _call_api (self , params : Dict [str , Any ]) -> str :
162- """Make the actual API call"""
199+ """Make the actual API call, dispatching to appropriate API """
163200 # Use asyncio to run the blocking API call in a thread pool
164201 loop = asyncio .get_event_loop ()
165- response = await loop .run_in_executor (
166- None , lambda : self .client .chat .completions .create (** params )
167- )
202+
203+ if self .use_responses_api :
204+ response = await loop .run_in_executor (
205+ None , lambda : self ._call_responses_api (params )
206+ )
207+ response_text = response .output_text
208+ else :
209+ response = await loop .run_in_executor (
210+ None , lambda : self .client .chat .completions .create (** params )
211+ )
212+ response_text = response .choices [0 ].message .content
213+
168214 # Logging of system prompt, user message and response content
169215 logger = logging .getLogger (__name__ )
170216 logger .debug (f"API parameters: { params } " )
171- logger .debug (f"API response: { response .choices [0 ].message .content } " )
172- return response .choices [0 ].message .content
217+ logger .debug (f"API response: { response_text } " )
218+ return response_text
219+
220+ def _call_responses_api (self , chat_params : Dict [str , Any ]) -> Any :
221+ """
222+ Convert Chat Completions params to Responses API format and make the call.
223+
224+ The Responses API uses a different parameter structure:
225+ - 'messages' -> 'input' (can be array of messages)
226+ - System message in 'messages' -> 'instructions' parameter
227+ - 'max_tokens'/'max_completion_tokens' -> 'max_output_tokens'
228+ - 'reasoning_effort' -> 'reasoning: {"effort": ...}'
229+
230+ Args:
231+ chat_params: Parameters in Chat Completions format
232+
233+ Returns:
234+ Response object from client.responses.create()
235+ """
236+ messages = chat_params ["messages" ]
237+
238+ # Extract system message as instructions, keep other messages as input
239+ instructions = None
240+ input_messages = []
241+ for msg in messages :
242+ if msg ["role" ] == "system" :
243+ instructions = msg ["content" ]
244+ else :
245+ input_messages .append (msg )
246+
247+ # Build Responses API params
248+ resp_params = {
249+ "model" : chat_params ["model" ],
250+ "input" : input_messages ,
251+ }
252+
253+ if instructions :
254+ resp_params ["instructions" ] = instructions
255+
256+ # Map token limits (Responses API uses max_output_tokens)
257+ if "max_completion_tokens" in chat_params :
258+ resp_params ["max_output_tokens" ] = chat_params ["max_completion_tokens" ]
259+ elif "max_tokens" in chat_params :
260+ resp_params ["max_output_tokens" ] = chat_params ["max_tokens" ]
261+
262+ # Map sampling parameters
263+ if "temperature" in chat_params :
264+ resp_params ["temperature" ] = chat_params ["temperature" ]
265+ if "top_p" in chat_params :
266+ resp_params ["top_p" ] = chat_params ["top_p" ]
267+ if "seed" in chat_params :
268+ resp_params ["seed" ] = chat_params ["seed" ]
269+
270+ # Map reasoning_effort to nested format for Responses API
271+ if "reasoning_effort" in chat_params :
272+ resp_params ["reasoning" ] = {"effort" : chat_params ["reasoning_effort" ]}
273+
274+ # Disable conversation storage (not needed for OpenEvolve's use case)
275+ resp_params ["store" ] = False
276+
277+ return self .client .responses .create (** resp_params )
0 commit comments