Skip to content

Commit e70d52a

Browse files
committed
Add support for responses API; maintain backward compatability
1 parent a631841 commit e70d52a

File tree

6 files changed

+623
-9
lines changed

6 files changed

+623
-9
lines changed

openevolve/config.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,13 @@ class LLMModelConfig:
7878
# Reasoning parameters
7979
reasoning_effort: Optional[str] = None
8080

81+
# API type selection: "auto" (default), "responses", or "chat_completions"
82+
# - "auto": Use Responses API for OpenAI endpoints, Chat Completions for others
83+
# - "responses": Force use of OpenAI Responses API
84+
# - "chat_completions": Force use of Chat Completions API
85+
# None means inherit from parent config (defaults to "auto")
86+
api_type: Optional[str] = None
87+
8188
def __post_init__(self):
8289
"""Post-initialization to resolve ${VAR} env var references in api_key"""
8390
self.api_key = _resolve_env_var(self.api_key)
@@ -116,6 +123,9 @@ class LLMConfig(LLMModelConfig):
116123
# Reasoning parameters (inherited from LLMModelConfig but can be overridden)
117124
reasoning_effort: Optional[str] = None
118125

126+
# API type for LLM level (defaults to "auto" for auto-detection)
127+
api_type: str = "auto"
128+
119129
def __post_init__(self):
120130
"""Post-initialization to set up model configurations"""
121131
super().__post_init__() # Resolve ${VAR} in api_key at LLMConfig level
@@ -170,6 +180,7 @@ def __post_init__(self):
170180
"retry_delay": self.retry_delay,
171181
"random_seed": self.random_seed,
172182
"reasoning_effort": self.reasoning_effort,
183+
"api_type": self.api_type,
173184
}
174185
self.update_model_params(shared_config)
175186

@@ -223,6 +234,7 @@ def rebuild_models(self) -> None:
223234
"retry_delay": self.retry_delay,
224235
"random_seed": self.random_seed,
225236
"reasoning_effort": self.reasoning_effort,
237+
"api_type": self.api_type,
226238
}
227239
self.update_model_params(shared_config)
228240

openevolve/llm/openai.py

Lines changed: 111 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ def __init__(
3434
self.api_key = model_cfg.api_key
3535
self.random_seed = getattr(model_cfg, "random_seed", None)
3636
self.reasoning_effort = getattr(model_cfg, "reasoning_effort", None)
37+
self.api_type = getattr(model_cfg, "api_type", "auto")
3738

3839
# Set up API client
3940
# OpenAI client requires max_retries to be int, not None
@@ -45,6 +46,9 @@ def __init__(
4546
max_retries=max_retries,
4647
)
4748

49+
# Determine which API to use (Responses API vs Chat Completions)
50+
self.use_responses_api = self._should_use_responses_api()
51+
4852
# Only log unique models to reduce duplication
4953
if not hasattr(logger, "_initialized_models"):
5054
logger._initialized_models = set()
@@ -53,6 +57,39 @@ def __init__(
5357
logger.info(f"Initialized OpenAI LLM with model: {self.model}")
5458
logger._initialized_models.add(self.model)
5559

60+
def _should_use_responses_api(self) -> bool:
61+
"""
62+
Determine if the Responses API should be used instead of Chat Completions.
63+
64+
The Responses API is only available on OpenAI's official endpoints.
65+
For other providers (OpenRouter, Google AI Studio, local servers, etc.),
66+
we must use the Chat Completions API for compatibility.
67+
68+
Returns:
69+
True if Responses API should be used, False for Chat Completions
70+
"""
71+
# Normalize api_type (None defaults to "auto")
72+
api_type = self.api_type if self.api_type is not None else "auto"
73+
74+
# Check for explicit override
75+
if api_type == "responses":
76+
return True
77+
if api_type == "chat_completions":
78+
return False
79+
80+
# Auto-detect based on API base URL
81+
if not self.api_base:
82+
return False
83+
84+
api_lower = self.api_base.lower()
85+
86+
# Only use Responses API for official OpenAI endpoints
87+
return (
88+
api_lower.startswith("https://api.openai.com") or
89+
api_lower.startswith("https://eu.api.openai.com") or
90+
api_lower.startswith("https://apac.api.openai.com")
91+
)
92+
5693
async def generate(self, prompt: str, **kwargs) -> str:
5794
"""Generate text from a prompt"""
5895
return await self.generate_with_context(
@@ -159,14 +196,82 @@ async def generate_with_context(
159196
raise
160197

161198
async def _call_api(self, params: Dict[str, Any]) -> str:
162-
"""Make the actual API call"""
199+
"""Make the actual API call, dispatching to appropriate API"""
163200
# Use asyncio to run the blocking API call in a thread pool
164201
loop = asyncio.get_event_loop()
165-
response = await loop.run_in_executor(
166-
None, lambda: self.client.chat.completions.create(**params)
167-
)
202+
203+
if self.use_responses_api:
204+
response = await loop.run_in_executor(
205+
None, lambda: self._call_responses_api(params)
206+
)
207+
response_text = response.output_text
208+
else:
209+
response = await loop.run_in_executor(
210+
None, lambda: self.client.chat.completions.create(**params)
211+
)
212+
response_text = response.choices[0].message.content
213+
168214
# Logging of system prompt, user message and response content
169215
logger = logging.getLogger(__name__)
170216
logger.debug(f"API parameters: {params}")
171-
logger.debug(f"API response: {response.choices[0].message.content}")
172-
return response.choices[0].message.content
217+
logger.debug(f"API response: {response_text}")
218+
return response_text
219+
220+
def _call_responses_api(self, chat_params: Dict[str, Any]) -> Any:
221+
"""
222+
Convert Chat Completions params to Responses API format and make the call.
223+
224+
The Responses API uses a different parameter structure:
225+
- 'messages' -> 'input' (can be array of messages)
226+
- System message in 'messages' -> 'instructions' parameter
227+
- 'max_tokens'/'max_completion_tokens' -> 'max_output_tokens'
228+
- 'reasoning_effort' -> 'reasoning: {"effort": ...}'
229+
230+
Args:
231+
chat_params: Parameters in Chat Completions format
232+
233+
Returns:
234+
Response object from client.responses.create()
235+
"""
236+
messages = chat_params["messages"]
237+
238+
# Extract system message as instructions, keep other messages as input
239+
instructions = None
240+
input_messages = []
241+
for msg in messages:
242+
if msg["role"] == "system":
243+
instructions = msg["content"]
244+
else:
245+
input_messages.append(msg)
246+
247+
# Build Responses API params
248+
resp_params = {
249+
"model": chat_params["model"],
250+
"input": input_messages,
251+
}
252+
253+
if instructions:
254+
resp_params["instructions"] = instructions
255+
256+
# Map token limits (Responses API uses max_output_tokens)
257+
if "max_completion_tokens" in chat_params:
258+
resp_params["max_output_tokens"] = chat_params["max_completion_tokens"]
259+
elif "max_tokens" in chat_params:
260+
resp_params["max_output_tokens"] = chat_params["max_tokens"]
261+
262+
# Map sampling parameters
263+
if "temperature" in chat_params:
264+
resp_params["temperature"] = chat_params["temperature"]
265+
if "top_p" in chat_params:
266+
resp_params["top_p"] = chat_params["top_p"]
267+
if "seed" in chat_params:
268+
resp_params["seed"] = chat_params["seed"]
269+
270+
# Map reasoning_effort to nested format for Responses API
271+
if "reasoning_effort" in chat_params:
272+
resp_params["reasoning"] = {"effort": chat_params["reasoning_effort"]}
273+
274+
# Disable conversation storage (not needed for OpenEvolve's use case)
275+
resp_params["store"] = False
276+
277+
return self.client.responses.create(**resp_params)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ authors = [
1313
{name = "codelion"}
1414
]
1515
dependencies = [
16-
"openai>=1.0.0",
16+
"openai>=1.80.0", # Required for Responses API
1717
"pyyaml>=6.0",
1818
"numpy>=1.22.0",
1919
"tqdm>=4.64.0",

tests/test_openai_model_detection.py

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,5 +94,92 @@ def is_reasoning_model(model_name, api_base):
9494
)
9595

9696

97+
class TestResponsesAPIDetection(unittest.TestCase):
98+
"""Test Responses API vs Chat Completions API selection logic"""
99+
100+
def _should_use_responses_api(self, api_base, api_type="auto"):
101+
"""Test function that mimics the logic in openai.py"""
102+
# Check for explicit override
103+
if api_type == "responses":
104+
return True
105+
if api_type == "chat_completions":
106+
return False
107+
108+
# Auto-detect based on API base URL
109+
if not api_base:
110+
return False
111+
112+
api_lower = api_base.lower()
113+
114+
# Only use Responses API for official OpenAI endpoints
115+
return (
116+
api_lower.startswith("https://api.openai.com") or
117+
api_lower.startswith("https://eu.api.openai.com") or
118+
api_lower.startswith("https://apac.api.openai.com")
119+
)
120+
121+
def test_openai_endpoints_use_responses_api(self):
122+
"""Test that official OpenAI endpoints use Responses API by default"""
123+
test_cases = [
124+
("https://api.openai.com/v1", True, "Main OpenAI endpoint"),
125+
("https://api.openai.com", True, "OpenAI without path"),
126+
("https://eu.api.openai.com/v1", True, "EU endpoint"),
127+
("https://apac.api.openai.com/v1", True, "APAC endpoint"),
128+
("https://API.OPENAI.COM/v1", True, "Uppercase URL"),
129+
]
130+
131+
for api_base, expected, description in test_cases:
132+
with self.subTest(api_base=api_base, desc=description):
133+
result = self._should_use_responses_api(api_base)
134+
self.assertEqual(
135+
result,
136+
expected,
137+
f"API base '{api_base}' ({description}): expected {expected}, got {result}",
138+
)
139+
140+
def test_non_openai_endpoints_use_chat_completions(self):
141+
"""Test that non-OpenAI endpoints use Chat Completions API"""
142+
test_cases = [
143+
("https://generativelanguage.googleapis.com/v1beta/openai/", False, "Google AI Studio"),
144+
("https://openrouter.ai/api/v1", False, "OpenRouter"),
145+
("http://localhost:8000/v1", False, "Local server"),
146+
("https://api.anthropic.com/v1", False, "Anthropic"),
147+
("https://api.deepseek.com/v1", False, "DeepSeek"),
148+
(None, False, "None API base"),
149+
("", False, "Empty API base"),
150+
]
151+
152+
for api_base, expected, description in test_cases:
153+
with self.subTest(api_base=api_base, desc=description):
154+
result = self._should_use_responses_api(api_base)
155+
self.assertEqual(
156+
result,
157+
expected,
158+
f"API base '{api_base}' ({description}): expected {expected}, got {result}",
159+
)
160+
161+
def test_explicit_api_type_override(self):
162+
"""Test that api_type override works correctly"""
163+
# Force responses API even for non-OpenAI endpoint
164+
self.assertTrue(
165+
self._should_use_responses_api("http://localhost:8000/v1", api_type="responses")
166+
)
167+
168+
# Force chat completions even for OpenAI endpoint
169+
self.assertFalse(
170+
self._should_use_responses_api("https://api.openai.com/v1", api_type="chat_completions")
171+
)
172+
173+
# Auto detection with OpenAI endpoint
174+
self.assertTrue(
175+
self._should_use_responses_api("https://api.openai.com/v1", api_type="auto")
176+
)
177+
178+
# Auto detection with non-OpenAI endpoint
179+
self.assertFalse(
180+
self._should_use_responses_api("http://localhost:8000/v1", api_type="auto")
181+
)
182+
183+
97184
if __name__ == "__main__":
98185
unittest.main()

tests/test_reasoning_effort_config.py

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ def test_openai_llm_uses_reasoning_effort(self):
132132
model_cfg.api_key = "test-key"
133133
model_cfg.random_seed = None
134134
model_cfg.reasoning_effort = "high"
135+
model_cfg.api_type = "chat_completions" # Force Chat Completions API for this test
135136

136137
# Mock OpenAI client to avoid actual API calls
137138
with unittest.mock.patch('openai.OpenAI'):
@@ -140,8 +141,8 @@ def test_openai_llm_uses_reasoning_effort(self):
140141
# Verify the reasoning_effort is stored
141142
self.assertEqual(llm.reasoning_effort, "high")
142143

143-
def test_reasoning_effort_passed_to_api_params(self):
144-
"""Test that reasoning_effort is included in API call parameters"""
144+
def test_reasoning_effort_passed_to_api_params_chat_completions(self):
145+
"""Test that reasoning_effort is included in API call parameters (Chat Completions)"""
145146
model_cfg = Mock()
146147
model_cfg.name = "gpt-oss-120b"
147148
model_cfg.system_message = "system"
@@ -155,6 +156,7 @@ def test_reasoning_effort_passed_to_api_params(self):
155156
model_cfg.api_key = "test-key"
156157
model_cfg.random_seed = None
157158
model_cfg.reasoning_effort = "medium"
159+
model_cfg.api_type = "chat_completions" # Force Chat Completions API for this test
158160

159161
with unittest.mock.patch('openai.OpenAI'):
160162
llm = OpenAILLM(model_cfg)
@@ -178,6 +180,51 @@ def test_reasoning_effort_passed_to_api_params(self):
178180
# Verify the API was called with reasoning_effort
179181
llm.client.chat.completions.create.assert_called_once_with(**test_params)
180182

183+
def test_reasoning_effort_passed_to_responses_api(self):
184+
"""Test that reasoning_effort is converted to nested format for Responses API"""
185+
model_cfg = Mock()
186+
model_cfg.name = "gpt-oss-120b"
187+
model_cfg.system_message = "system"
188+
model_cfg.temperature = 0.7
189+
model_cfg.top_p = 0.95
190+
model_cfg.max_tokens = 4096
191+
model_cfg.timeout = 60
192+
model_cfg.retries = 3
193+
model_cfg.retry_delay = 5
194+
model_cfg.api_base = "https://api.openai.com/v1"
195+
model_cfg.api_key = "test-key"
196+
model_cfg.random_seed = None
197+
model_cfg.reasoning_effort = "medium"
198+
model_cfg.api_type = "responses" # Force Responses API for this test
199+
200+
with unittest.mock.patch('openai.OpenAI'):
201+
llm = OpenAILLM(model_cfg)
202+
203+
# Test the _call_api method directly with mocked client
204+
mock_response = Mock()
205+
mock_response.output_text = "Test response"
206+
llm.client.responses.create.return_value = mock_response
207+
208+
# Input params in Chat Completions format
209+
test_params = {
210+
"model": "gpt-oss-120b",
211+
"messages": [{"role": "system", "content": "Test"}, {"role": "user", "content": "Test user"}],
212+
"max_completion_tokens": 4096,
213+
"reasoning_effort": "medium"
214+
}
215+
216+
result = asyncio.run(llm._call_api(test_params))
217+
218+
# Verify the Responses API was called with nested reasoning format
219+
llm.client.responses.create.assert_called_once()
220+
call_args = llm.client.responses.create.call_args
221+
self.assertEqual(call_args.kwargs["model"], "gpt-oss-120b")
222+
self.assertEqual(call_args.kwargs["instructions"], "Test")
223+
self.assertEqual(call_args.kwargs["input"], [{"role": "user", "content": "Test user"}])
224+
self.assertEqual(call_args.kwargs["reasoning"], {"effort": "medium"})
225+
self.assertEqual(call_args.kwargs["max_output_tokens"], 4096)
226+
self.assertFalse(call_args.kwargs["store"])
227+
181228
def test_yaml_file_loading_with_reasoning_effort(self):
182229
"""Test loading reasoning_effort from actual YAML file"""
183230
yaml_content = """

0 commit comments

Comments
 (0)