Add a flag to print intermediate outputs in CePO

pawelf-cerebras · pawelf-cerebras · commit f72224ebfee9 · 2025-01-23T15:06:20.000-08:00
diff --git a/README.md b/README.md
@@ -331,6 +331,7 @@ optillm supports various command-line arguments and environment variables for co
 | `--cepo_planning_max_tokens_step2`  | Maximum number of tokens in step 2 of planning stage            | 4096            |
 | `--cepo_planning_max_tokens_step3`  | Maximum number of tokens in step 3 of planning stage            | 4096            |
 | `--cepo_planning_max_tokens_step4`  | Maximum number of tokens in step 4 of planning stage            | 4096            |
+| `--cepo_print_output`               | Whether to print the output of each stage                       | False           |
 | `--cepo_config_file`                | Path to CePO configuration file                                 | None            |
 
 When using Docker, these can be set as environment variables prefixed with `OPTILLM_`.
diff --git a/optillm/cepo.py b/optillm/cepo.py
@@ -1,16 +1,12 @@
 import re
 import yaml
-import logging
 
 from dataclasses import dataclass
 from typing import Literal, Any
 from cerebras.cloud.sdk import BadRequestError as CerebrasBadRequestError
 from openai import BadRequestError as OpenAIBadRequestError
 
 
-logger = logging.getLogger(__name__)
-
-
 @dataclass
 class CepoConfig:
     bestofn_n: int  # number of responses to be generated in best of n stage
@@ -27,6 +23,7 @@ class CepoConfig:
     planning_max_tokens_step2: int  # maximum number of tokens in step 2 of planning stage
     planning_max_tokens_step3: int  # maximum number of tokens in step 3 of planning stage
     planning_max_tokens_step4: int  # maximum number of tokens in step 4 of planning stage
+    print_output: bool = False  # whether to print the output of each stage
 
 
 # given command line arguments which includes a yaml file path, initialize a CePO configuration
@@ -113,13 +110,15 @@ def generate_completion(system_prompt: str, task: str, client: Any, model: str,
         if response.choices[0].finish_reason == "length":
             messages.append({"role": "assistant", "content": response.choices[0].message.content})
             cb_log[f"messages_planning_{i}_rejected_due_to_length"] = messages
-            logger.debug(f"Plan proposal rejected due to length. Attempt {i + 1} out of {cepo_config.planning_m}.\nMessages: {messages}")
+            if cepo_config.print_output:
+                print(f"\nCePO: Plan proposal rejected due to length. Attempt {i + 1} out of {cepo_config.planning_m}.\nMessages: {messages}")
             continue
 
         plans.append(response.choices[0].message.content)
         messages.append({"role": "assistant", "content": response.choices[0].message.content})
         cb_log[f"messages_planning_{i}"] = messages
-        logger.debug(f"Plan proposal generated. Attempt {i + 1} out of {cepo_config.planning_m}.\nMessages: {messages}")
+        if cepo_config.print_output:
+            print(f"\nCePO: Plan proposal generated. Attempt {i + 1} out of {cepo_config.planning_m}.\nMessages: {messages}")
         
         if len(plans) == cepo_config.planning_n:
             break
@@ -129,7 +128,8 @@ def generate_completion(system_prompt: str, task: str, client: Any, model: str,
         plans.append(response.choices[0].message.content)
         messages.append({"role": "assistant", "content": response.choices[0].message.content})
         cb_log[f"messages_planning_{i}_no_plans_so_taking_the_last_one"] = messages
-        logger.debug(f"No plans generated successfully. Taking the last one from rejected due to length.\nMessages: {messages}")
+        if cepo_config.print_output:
+            print(f"\nCePO: No plans generated successfully. Taking the last one from rejected due to length.\nMessages: {messages}")
 
     # Step 3 - Review and address inconsistencies
     try:
@@ -169,7 +169,8 @@ def generate_completion(system_prompt: str, task: str, client: Any, model: str,
     completion_tokens += response.usage.completion_tokens
 
     cb_log["messages"] = messages
-    logger.debug(f"Answer generated.\nMessages: {messages}")
+    if cepo_config.print_output:
+        print(f"\nCePO: Answer generated.\nMessages: {messages}")
     return response.choices[0].message.content, completion_tokens, cb_log
 
 
@@ -192,7 +193,8 @@ def generate_n_completions(system_prompt: str, initial_query: str, client: Any,
     completions = []
 
     for i in range(cepo_config.bestofn_n):
-        logger.debug(f"Generating completion {i + 1} out of {cepo_config.bestofn_n}")
+        if cepo_config.print_output:
+            print(f"\nCePO: Generating completion {i + 1} out of {cepo_config.bestofn_n}")
         response_i, completion_tokens_i, cb_log_i = generate_completion(system_prompt, initial_query, client, model, cepo_config)
         completions.append(response_i)
         completion_tokens += completion_tokens_i
@@ -264,7 +266,8 @@ def rate_completions_absolute(system_prompt: str, initial_query: str, client: An
         
         rating_response = rating_response.choices[0].message.content.strip()
         cb_log[f"rating_response_{i}"] = rating_response
-        logger.debug(f"Rating response for completion {i}: {rating_response}")
+        if cepo_config.print_output:
+            print(f"\nCePO: Rating response for completion {i}: {rating_response}")
 
         pattern = r"Rating: \[\[(\d+)\]\]"
         match = re.search(pattern, rating_response)
@@ -280,7 +283,8 @@ def rate_completions_absolute(system_prompt: str, initial_query: str, client: An
     best_index = ratings.index(max(ratings))
     cb_log["ratings"] = ratings
     cb_log["best_index"] = best_index
-    logger.debug(f"Finished rating completions. Ratings: {ratings}, best completion index: {best_index}")
+    if cepo_config.print_output:
+        print(f"\nCePO: Finished rating completions. Ratings: {ratings}, best completion index: {best_index}")
     return completions[best_index], completion_tokens, cb_log
 
 
@@ -340,7 +344,8 @@ def rate_completions_pairwise(system_prompt: str, initial_query: str, client: An
         
         rating_response = rating_response.choices[0].message.content.strip()
         cb_log[f"rating_response_for_pair_{pair[0]}_{pair[1]}"] = rating_response
-        logger.debug(f"Rating response for pair {pair}: {rating_response}")
+        if cepo_config.print_output:
+            print(f"\nCePO: Rating response for pair {pair}: {rating_response}")
 
         pattern = r"Better Response: \[\[(\d+)\]\]"
         match = re.search(pattern, rating_response)
@@ -359,7 +364,8 @@ def rate_completions_pairwise(system_prompt: str, initial_query: str, client: An
     best_index = ratings.index(max(ratings))
     cb_log["ratings"] = ratings
     cb_log["best_index"] = best_index
-    logger.debug(f"Finished rating completions. Ratings: {ratings}, best completion index: {best_index}")
+    if cepo_config.print_output:
+        print(f"\nCePO: Finished rating completions. Ratings: {ratings}, best completion index: {best_index}")
     return completions[best_index], completion_tokens, cb_log