Skip to content

Commit 474e60b

Browse files
Add debug logging to CePO
1 parent 42e5f6a commit 474e60b

File tree

2 files changed

+14
-3
lines changed

2 files changed

+14
-3
lines changed

optillm.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,6 @@ def execute_single_approach(approach, system_prompt, initial_query, client, mode
294294
elif approach == 're2':
295295
return re2_approach(system_prompt, initial_query, client, model, n=server_config['n'])
296296
elif approach == 'cepo':
297-
logger.debug(f"Calling with {cepo_config}")
298297
return cepo(system_prompt, initial_query, client, model, cepo_config)
299298
elif approach in plugin_approaches:
300299
return plugin_approaches[approach](system_prompt, initial_query, client, model)

optillm/cepo.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
# Apache license 2 - added after the fork for the CePO method
22
import re
3+
import yaml
4+
import logging
35

46
from dataclasses import dataclass
57
from typing import Literal, Any
68
from cerebras.cloud.sdk import BadRequestError as CerebrasBadRequestError
79
from openai import BadRequestError as OpenAIBadRequestError
810

9-
import yaml
11+
12+
logger = logging.getLogger(__name__)
1013

1114

1215
@dataclass
@@ -111,11 +114,13 @@ def generate_completion(system_prompt: str, task: str, client: Any, model: str,
111114
if response.choices[0].finish_reason == "length":
112115
messages.append({"role": "assistant", "content": response.choices[0].message.content})
113116
cb_log[f"messages_planning_{i}_rejected_due_to_length"] = messages
117+
logger.debug(f"Plan proposal rejected due to length. Attempt {i + 1} out of {cepo_config.planning_m}.\nMessages: {messages}")
114118
continue
115119

116120
plans.append(response.choices[0].message.content)
117121
messages.append({"role": "assistant", "content": response.choices[0].message.content})
118122
cb_log[f"messages_planning_{i}"] = messages
123+
logger.debug(f"Plan proposal generated. Attempt {i + 1} out of {cepo_config.planning_m}.\nMessages: {messages}")
119124

120125
if len(plans) == cepo_config.planning_n:
121126
break
@@ -125,6 +130,7 @@ def generate_completion(system_prompt: str, task: str, client: Any, model: str,
125130
plans.append(response.choices[0].message.content)
126131
messages.append({"role": "assistant", "content": response.choices[0].message.content})
127132
cb_log[f"messages_planning_{i}_no_plans_so_taking_the_last_one"] = messages
133+
logger.debug(f"No plans generated successfully. Taking the last one from rejected due to length.\nMessages: {messages}")
128134

129135
# Step 3 - Review and address inconsistencies
130136
try:
@@ -164,6 +170,7 @@ def generate_completion(system_prompt: str, task: str, client: Any, model: str,
164170
completion_tokens += response.usage.completion_tokens
165171

166172
cb_log["messages"] = messages
173+
logger.debug(f"Answer generated.\nMessages: {messages}")
167174
return response.choices[0].message.content, completion_tokens, cb_log
168175

169176

@@ -186,6 +193,7 @@ def generate_n_completions(system_prompt: str, initial_query: str, client: Any,
186193
completions = []
187194

188195
for i in range(cepo_config.bestofn_n):
196+
logger.debug(f"Generating completion {i + 1} out of {cepo_config.bestofn_n}")
189197
response_i, completion_tokens_i, cb_log_i = generate_completion(system_prompt, initial_query, client, model, cepo_config)
190198
completions.append(response_i)
191199
completion_tokens += completion_tokens_i
@@ -257,6 +265,7 @@ def rate_completions_absolute(system_prompt: str, initial_query: str, client: An
257265

258266
rating_response = rating_response.choices[0].message.content.strip()
259267
cb_log[f"rating_response_{i}"] = rating_response
268+
logger.debug(f"Rating response for completion {i}: {rating_response}")
260269

261270
pattern = r"Rating: \[\[(\d+)\]\]"
262271
match = re.search(pattern, rating_response)
@@ -272,6 +281,7 @@ def rate_completions_absolute(system_prompt: str, initial_query: str, client: An
272281
best_index = ratings.index(max(ratings))
273282
cb_log["ratings"] = ratings
274283
cb_log["best_index"] = best_index
284+
logger.debug(f"Finished rating completions. Ratings: {ratings}, best completion index: {best_index}")
275285
return completions[best_index], completion_tokens, cb_log
276286

277287

@@ -331,6 +341,7 @@ def rate_completions_pairwise(system_prompt: str, initial_query: str, client: An
331341

332342
rating_response = rating_response.choices[0].message.content.strip()
333343
cb_log[f"rating_response_for_pair_{pair[0]}_{pair[1]}"] = rating_response
344+
logger.debug(f"Rating response for pair {pair}: {rating_response}")
334345

335346
pattern = r"Better Response: \[\[(\d+)\]\]"
336347
match = re.search(pattern, rating_response)
@@ -349,6 +360,7 @@ def rate_completions_pairwise(system_prompt: str, initial_query: str, client: An
349360
best_index = ratings.index(max(ratings))
350361
cb_log["ratings"] = ratings
351362
cb_log["best_index"] = best_index
363+
logger.debug(f"Finished rating completions. Ratings: {ratings}, best completion index: {best_index}")
352364
return completions[best_index], completion_tokens, cb_log
353365

354366

@@ -376,7 +388,7 @@ def cepo(system_prompt: str, initial_query: str, client: Any, model: str, cepo_c
376388
Returns:
377389
Tuple[str, int, dict]: The generated completion, number of tokens used
378390
"""
379-
391+
380392
# Generate completions
381393
completions, completion_tokens_planning, cb_log = generate_n_completions(system_prompt, initial_query, client, model, cepo_config) # cb_log is a dictionary for debugging purposes
382394

0 commit comments

Comments
 (0)