updates

codelion · codelion · commit a46533e538cd · 2024-10-04T12:10:46.000-07:00
diff --git a/optillm/plugins/memory_plugin.py b/optillm/plugins/memory_plugin.py
@@ -76,7 +76,7 @@ def run(system_prompt: str, initial_query: str, client, model: str) -> Tuple[str
     completion_tokens = 0
 
     # Process context and add to memory
-    chunk_size = 10000
+    chunk_size = 100000
     for i in range(0, len(context), chunk_size):
         chunk = context[i:i+chunk_size]
         # print(f"chunk: {chunk}")
@@ -100,10 +100,9 @@ def run(system_prompt: str, initial_query: str, client, model: str) -> Tuple[str
     response = client.chat.completions.create(
         model=model,
         messages=[{"role": "user", "content": prompt}],
-        max_tokens=1000
     )
-    print(f"response : {response}")
+    # print(f"response : {response}")
     final_response = response.choices[0].message.content.strip()
     completion_tokens += response.usage.completion_tokens
-    print(f"final_response: {final_response}")
+    # print(f"final_response: {final_response}")
     return final_response, completion_tokens
diff --git a/scripts/eval_frames_benchmark.py b/scripts/eval_frames_benchmark.py
@@ -8,8 +8,9 @@
 from datasets import load_dataset
 from tqdm import tqdm
 
-client = OpenAI(api_key="none", base_url="http://localhost:8000/v1")
-SLEEP_INTERVAL = 60
+client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"), base_url="http://localhost:8000/v1")
+# client = OpenAI()
+SLEEP_INTERVAL = 300
 
 def load_existing_results(filename: str) -> List[Dict]:
     try:
@@ -29,7 +30,6 @@ def get_last_processed_index(results: List[Dict]) -> int:
         return -1
     return max(int(r.get('index', -1)) for r in results)
 
-
 def generate_llm_prompt(prompt: str, wiki_links: List[str]) -> str:
     return f"Here are the relevant Wikipedia articles:\n{wiki_links}\n\nBased on all the information, answer the query. \n\nQuery: {prompt}\n\n"
 
@@ -54,7 +54,7 @@ def evaluate_response(question: str, llm_response: str, ground_truth: str, model
 response. Please analyze the provided data and make a decision.
 ===Instructions===
 1. Carefully compare the "Predicted Answer" with the "Ground Truth Answer".
-2. Consider the substance of the answers – look for equivalent information or correct answers.
+2. Consider the substance of the answers - look for equivalent information or correct answers.
 Do not focus on exact wording unless the exact wording is crucial to the meaning.
 3. Your final decision should be based on whether the meaning and the vital facts of the
 "Ground Truth Answer" are present in the "Predicted Answer:"
@@ -122,8 +122,8 @@ def main(model: str):
         }
         
         save_result(filename, result)
-        print(f"Index: {index}, Decision: {result['evaluation_decision']}")
-        time.sleep(SLEEP_INTERVAL)
+        # print(f"Index: {index}, Decision: {result['evaluation_decision']}")
+        # time.sleep(SLEEP_INTERVAL)
 
     # Calculate and print summary statistics
     results = load_existing_results(filename)
diff --git a/scripts/gen_optillm_dataset.py b/scripts/gen_optillm_dataset.py
@@ -80,9 +80,12 @@ async def generate_dataset(num_samples: int, output_file: str):
     dataset = load_dataset("lmsys/arena-hard-auto-v0.1", split="train")
     
     with open(output_file, "w") as f:
-        for sample in tqdm(dataset.select(range(29, 29 + num_samples)), total=num_samples):
-            result = await process_sample(sample)
-            f.write(json.dumps(result) + "\n")
+        for sample in tqdm(dataset.select(range( num_samples)), total=num_samples):
+            try:
+                result = await process_sample(sample)
+                f.write(json.dumps(result) + "\n")
+            except Exception as e:
+                print(f"Skip over this item due to error {str(e)}")
 
 def main():
     parser = argparse.ArgumentParser(description="Generate OptILM dataset")
diff --git a/scripts/requirements.txt b/scripts/requirements.txt
@@ -1 +1,3 @@
 datasets
+accelerate
+huggingface_hub
diff --git a/scripts/train_optillm_classifier.py b/scripts/train_optillm_classifier.py

Original file line number	Diff line number	Diff line change
`@@ -1 +1,3 @@`
`1`	`1`	`datasets`
	`2`	`+accelerate`
	`3`	`+huggingface_hub`