Skip to content

Commit a46533e

Browse files
committed
updates
1 parent d027ac4 commit a46533e

File tree

5 files changed

+307
-115
lines changed

5 files changed

+307
-115
lines changed

optillm/plugins/memory_plugin.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def run(system_prompt: str, initial_query: str, client, model: str) -> Tuple[str
7676
completion_tokens = 0
7777

7878
# Process context and add to memory
79-
chunk_size = 10000
79+
chunk_size = 100000
8080
for i in range(0, len(context), chunk_size):
8181
chunk = context[i:i+chunk_size]
8282
# print(f"chunk: {chunk}")
@@ -100,10 +100,9 @@ def run(system_prompt: str, initial_query: str, client, model: str) -> Tuple[str
100100
response = client.chat.completions.create(
101101
model=model,
102102
messages=[{"role": "user", "content": prompt}],
103-
max_tokens=1000
104103
)
105-
print(f"response : {response}")
104+
# print(f"response : {response}")
106105
final_response = response.choices[0].message.content.strip()
107106
completion_tokens += response.usage.completion_tokens
108-
print(f"final_response: {final_response}")
107+
# print(f"final_response: {final_response}")
109108
return final_response, completion_tokens

scripts/eval_frames_benchmark.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@
88
from datasets import load_dataset
99
from tqdm import tqdm
1010

11-
client = OpenAI(api_key="none", base_url="http://localhost:8000/v1")
12-
SLEEP_INTERVAL = 60
11+
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"), base_url="http://localhost:8000/v1")
12+
# client = OpenAI()
13+
SLEEP_INTERVAL = 300
1314

1415
def load_existing_results(filename: str) -> List[Dict]:
1516
try:
@@ -29,7 +30,6 @@ def get_last_processed_index(results: List[Dict]) -> int:
2930
return -1
3031
return max(int(r.get('index', -1)) for r in results)
3132

32-
3333
def generate_llm_prompt(prompt: str, wiki_links: List[str]) -> str:
3434
return f"Here are the relevant Wikipedia articles:\n{wiki_links}\n\nBased on all the information, answer the query. \n\nQuery: {prompt}\n\n"
3535

@@ -54,7 +54,7 @@ def evaluate_response(question: str, llm_response: str, ground_truth: str, model
5454
response. Please analyze the provided data and make a decision.
5555
===Instructions===
5656
1. Carefully compare the "Predicted Answer" with the "Ground Truth Answer".
57-
2. Consider the substance of the answers look for equivalent information or correct answers.
57+
2. Consider the substance of the answers - look for equivalent information or correct answers.
5858
Do not focus on exact wording unless the exact wording is crucial to the meaning.
5959
3. Your final decision should be based on whether the meaning and the vital facts of the
6060
"Ground Truth Answer" are present in the "Predicted Answer:"
@@ -122,8 +122,8 @@ def main(model: str):
122122
}
123123

124124
save_result(filename, result)
125-
print(f"Index: {index}, Decision: {result['evaluation_decision']}")
126-
time.sleep(SLEEP_INTERVAL)
125+
# print(f"Index: {index}, Decision: {result['evaluation_decision']}")
126+
# time.sleep(SLEEP_INTERVAL)
127127

128128
# Calculate and print summary statistics
129129
results = load_existing_results(filename)

scripts/gen_optillm_dataset.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,12 @@ async def generate_dataset(num_samples: int, output_file: str):
8080
dataset = load_dataset("lmsys/arena-hard-auto-v0.1", split="train")
8181

8282
with open(output_file, "w") as f:
83-
for sample in tqdm(dataset.select(range(29, 29 + num_samples)), total=num_samples):
84-
result = await process_sample(sample)
85-
f.write(json.dumps(result) + "\n")
83+
for sample in tqdm(dataset.select(range( num_samples)), total=num_samples):
84+
try:
85+
result = await process_sample(sample)
86+
f.write(json.dumps(result) + "\n")
87+
except Exception as e:
88+
print(f"Skip over this item due to error {str(e)}")
8689

8790
def main():
8891
parser = argparse.ArgumentParser(description="Generate OptILM dataset")

scripts/requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
11
datasets
2+
accelerate
3+
huggingface_hub

0 commit comments

Comments
 (0)