AugmentedDesignLab · dataei · Feb 20, 2025 · Feb 20, 2025 · Feb 26, 2025 · Feb 26, 2025
diff --git a/.gitignore b/.gitignore
@@ -1,9 +1,12 @@
-apla-planner/generated_pddls_deepseek/dataset/*
-apla-planner/generated_pddls_deepseek/logs/*
+apla-planner/*
 generated_pddls/*
 parsed_womdr_data/*
 pddl-examples/*
 *.pddl
 __pycache__/*
-apla-planner/generated_pddls_deepseek/.DS_Store
+v3-grades/*
+plt-graph-v3/*
+cot_file_size_graphs/*
+abc_bar_graph/*
 grades/*
+plt-graph/*
diff --git a/apla-planner/generated_pddls_deepseek/planner_test.py b/apla-planner/generated_pddls_deepseek/planner_test.py
@@ -1,7 +1,6 @@
 from jupyddl import AutomatedPlanner # Comment this line along with the other planner lines if running from outside WSL
 import os
 import json
-import matplotlib.pyplot as plt
 
 ## There is one context per scenario. Each context has a corresponding PDDL domain file.
 ## Each scenario has multiple interactions. Each interaction will have one PDDL problem file. 
@@ -22,11 +21,12 @@
 
     # We will traverse the problem list since there will be only one domain per scenario
 
-    plans_for_one_scenario = {}
     problem_coverage_scores = []
     problem_initial_state_sizes = []
     print("Scenario ID is {}".format(scenario_folder))
     for problem_file_name in problems_within_scenario:
+        print(f"Considering problem file {problem_file_name}")
+        plans_for_one_scenario = {}
         problem_full_path = "dataset/problems/"+scenario_folder+"/"+problem_file_name
         domain_full_path = "dataset/domains/"+scenario_folder+"/"+domains_within_scenario[0]
         print("Planner is now running for the problem {}".format(problem_file_name))
@@ -39,6 +39,5 @@
         except:
             continue
 
-
-    with open("dataset/problems/"+scenario_folder+"/plan_set.json", 'w') as file:
-        json.dump(plans_for_one_scenario, file)
+        with open("dataset/problems/"+scenario_folder+"/"+problem_file_name+"_"+"plan_set.json", 'w') as file:
+            json.dump(plans_for_one_scenario, file)
diff --git a/basic_scenario_gpt.py b/basic_scenario_gpt.py
@@ -0,0 +1,194 @@
+from guidance import models, gen, user, assistant, system
+import parse_scenario_womd
+import json
+
+
+def generate_scenario_concepts(granularity, scenario_data):
+    gpt_scenario = models.OpenAI(model="gpt-4o", echo=False)
+
+    with system():
+        lm_scenario = gpt_scenario
+
+    with user():
+        lm_scenario += f"""
+        Think deeply about scenarios for testing autonomous vehicles. 
+
+        I need some states of the world that would be relevant for logically describing this traffic scenario:
+        {scenario_data}
+
+        A state is just an assertion with a true or false value that's representing the world in that particular moment. 
+        This is similar to the concept of a turn in a turn based game.
+
+        There must be states regarding the following concepts:
+        * Static environment description.
+        * Ego agent
+        * The respective surrounding agents.
+
+        In each action and state, the ego agent or the surrounding agent must be identified as <EGO> or <SURROUNDING AGENT #0> or <SURROUNDING AGENT #1> as needed.
+
+        Increase the granularity of the concepts in proportion to the granularity level.
+        The granularity level is {str(granularity)} on a scale of 1 to 10 with 1 being the least and 10 being the most granular
+        Granularity pertains to how specific the information is.
+
+        Make sure to rewrite the concepts given in the generated list of concepts in addition to your concepts. 
+        """
+
+    with assistant():
+        lm_scenario += gen("concepts", temperature=0.5)
+
+    print("The scenario concepts are {}".format(lm_scenario["concepts"]))
+    return lm_scenario["concepts"]
+
+def generate_scenario_states(concepts):
+    gpt_scenario = models.OpenAI(model="gpt-4o", echo=False)
+
+    with system():
+        lm_scenario = gpt_scenario
+
+    with user():
+        lm_scenario += f"""
+        Based on the concepts detailed in {concepts}, 
+        Write down a list of states pertaining to these concepts in natural language. Write them in the following format: 
+        ```json 
+        <curly bracket> 
+            "<state name>": <curly bracket> 
+                "statement": "<the assertion in natural language. Use the fewest words possible for maximum clarity>
+            <close curly bracket>, 
+            "<state name>": <open curly bracket> 
+                "statement": "<the assertion in natural language>,
+            <close curly bracket>, 
+            ... 
+        <close curly bracket>
+        json```
+
+        Be very very very specific.   
+        """
+
+    with assistant():
+        lm_scenario += gen("state_dictionary", temperature=0.5)
+
+    return lm_scenario["state_dictionary"]
+
+def generate_scenario_actions(concepts, granularity=2):
+    gpt_scenario = models.OpenAI(model="gpt-4o", echo=False)
+
+    with system():
+        lm_scenario = gpt_scenario
+
+    with user():
+        lm_scenario += f"""
+        Based on the concepts detailed in {concepts}, 
+        * Write down a list of actions that map between these states in natural language. 
+        * Each action has some causal states (predicates) and some effect states that will be true or false.
+        * Each action is a cause and effect mapping between any number of causal states and any number of effect states.
+        * Actions and states must not contradict each other.
+        * Action names must be descriptive and the action can be understood just by looking at the name.
+        * The state names within each action are also descriptive. The cause and effect statements and the state names must have the same information.
+        * There must be separate states regarding the environment, ego and the respective surrounding agents.
+        * In each action and state, the ego agent or the surrounding agent must be identified as <EGO> or <SURROUNDING AGENT #0> or <SURROUNDING AGENT #1> as needed.
+        * For distances, positions and speeds do not use specific numbers but words instead such as front, left, right, near, far, fast, slow, medium (or combinations such as front-left and so on) or other similar descriptive words. 
+        * The action itself will only become true when the causal states and the effect states are in the specific states that this description details.
+        * Write them in the following format: 
+        ```json 
+        <open curly bracket>
+            "<action name>": 
+            <open curly bracket> 
+                "<state name>": <open curly bracket> 
+                    "statement": "<the assertion in natural language. Use the fewest words possible for maximum clarity>
+                    "value": <Whether this value is true for false>,
+                    "state_type": <whether this state is a cause or effect for the current action>
+                <close curly bracket>, 
+                "<state name>": <curly bracket> 
+                    "statement": "<the assertion in natural language. Use the fewest words possible for maximum clarity>
+                    "value": <Whether this value is true for false>,
+                    "state_type": <whether this state is a cause or effect for the current action>
+                <close curly bracket>
+            <close curly bracket>, 
+            ... 
+        <close curly bracket>
+        json```
+
+        Increase the granularity of these actions in proportion to the granularity level.
+        Granularity pertains to how specific the information is. 
+        While the actions must be relevant to the given scenario, they must be general enough to be used for other scenarios as well.
+        The granularity level is {str(granularity)} on a scale of 1 to 10 with 1 being the least and 10 being the most granular
+
+        """
+
+    with assistant():
+        lm_scenario += gen("action_dictionary", temperature=0.8)
+
+    print("The scenario actions are {}".format(lm_scenario["action_dictionary"]))
+    return lm_scenario["action_dictionary"]
+
+# # Removed from this project after consideration
+# def generate_scenario_states(concepts):
+#     gpt_scenario = models.OpenAI(model="gpt-4o", echo=False)
+
+#     with system():
+#         lm_scenario = gpt_scenario
+
+#     with user():
+#         lm_scenario += f"""
+#         Based on the concepts detailed in {concepts}, 
+#         Write down a list of states pertaining to these concepts in natural language. Write them in the following format: 
+#         ```json 
+#         <curly bracket> 
+#             "<state name>": <curly bracket> 
+#                 "statement": "<the assertion in natural language. Use the fewest words possible for maximum clarity>
+#             <close curly bracket>, 
+#             "<state name>": <open curly bracket> 
+#                 "statement": "<the assertion in natural language>,
+#             <close curly bracket>, 
+#             ... 
+#         <close curly bracket>
+#         json```
+
+#         Be very very very specific and granular. Very granualar,  fine details and specific.    
+#         """
+
+#     with assistant():
+#         lm_scenario += gen("state_dictionary", temperature=0.8)
+
+#     return lm_scenario["state_dictionary"]
+
+def respond_scenario_query(concepts, actions, questions):
+    gpt_scenario = models.OpenAI(model="gpt-4o", echo=False)
+
+    with system():
+        lm_scenario = gpt_scenario
+
+    with user():
+        lm_scenario += f"""
+        Based on the concepts detailed in {concepts} and actions detailed in {actions}, respond to the following questions:
+        {questions}
+        Be very specific and very granular. Very granual,  fine details and specific.    
+        """
+
+    with assistant():
+        lm_scenario += gen("scenario_response", temperature=0.8)
+
+    #print("The scenario responses are {}".format(lm_scenario["scenario_response"]))
+    return lm_scenario["scenario_response"]
+
+def evaluate_gpt(question):
+    gpt_scenario = models.OpenAI(model="gpt-4o-mini", echo=False)
+
+    with system():
+        lm_scenario = gpt_scenario
+
+    with user():
+        lm_scenario += f"""
+        Given the questions here: 
+        {question}
+
+        Choose the correct answer. Only mention the option. 
+        """
+
+    with assistant():
+        lm_scenario += gen("mcq_response", temperature=0.5)
+
+    #print("The scenario responses are {}".format(lm_scenario["scenario_response"]))
+    return lm_scenario["mcq_response"]
+
+
diff --git a/box_plots.py b/box_plots.py
@@ -0,0 +1,34 @@
+import seaborn as sns
+import matplotlib.pyplot as plt
+import pandas as pd
+import numpy as np
+
+#manually changed indices and scores for each graph
+exp_and_scores = {
+    'Zero-Shot': [6, 3, 2, 1, 3, 2, 8, 8, 2, 10, 10, 3, 10, 2, 2, 8, 7, 4, 2, 5, 10, 10, 4, 10, 3, 3, 10, 2, 1, 2, 9, 10, 8, 8, 3, 10, 6, 3, 6, 8, 8, 2, 6, 6, 8, 3, 2, 5, 8, 2, 10, 1, 8, 10, 6, 7, 7, 8, 8, 5, 6, 10, 10, 4],
+    'Two-Shot': [4, 10, 10, 10, 10, 4, 10, 8, 8, 10, 10, 2, 2, 10, 4, 8, 7, 10, 10, 10, 10, 8, 10, 10, 10, 10, 10, 10, 10, 10, 9, 10, 6, 8, 4, 5, 4, 2, 6, 3, 9, 7, 7, 4, 10, 6, 10, 10, 10, 10, 10, 5, 6, 4, 8, 3, 10, 10, 10, 10, 10, 10, 10, 3],
+    'Four-Shot': [3, 10, 1, 2, 2, 2, 8, 9, 9, 10, 10, 2, 2, 10, 3, 9, 6, 10, 8, 10, 10, 10, 5, 10, 10, 10, 10, 10, 10, 10, 7, 10, 6, 5, 3, 4, 6, 3, 3, 2, 7, 2, 9, 4, 10, 2, 10, 10, 10, 10, 10, 10, 5, 3, 4, 2, 8, 10, 10, 10, 10, 10, 8, 2],
+    'Six-Shot': [4, 10, 1, 10, 3, 4, 6, 10, 10, 10, 8, 2, 2, 10, 3, 7, 6, 8, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 8, 10, 7, 4, 2, 8, 4, 3, 4, 6, 8, 4, 10, 3, 9, 6, 10, 10, 10, 8, 10, 2, 5, 6, 3, 8, 5, 10, 9, 10, 10, 10, 10, 3]
+}
+
+data = []
+for experiment, scores in exp_and_scores.items():
+    score_array = np.array(scores)
+    sorted_array = np.sort(score_array)
+    for individual_score in sorted_array:
+        data.append({'CoT Prompting Style': experiment, 'Correctness Scores': individual_score})
+
+df = pd.DataFrame(data)
+sns.set_theme(style="whitegrid")
+plt.figure(figsize=(10, 6))
+#creates the box plots
+ax = sns.boxplot(x='CoT Prompting Style', y='Correctness Scores', data=df, width=0.5, fliersize=0)
+#inserting data points
+sns.stripplot(x='CoT Prompting Style', y='Correctness Scores', data=df, jitter=0.23, color='black', size=6, alpha=0.7)
+for i, (experiment, scores) in enumerate(exp_and_scores.items()):
+    q1_label = np.percentile(scores, 25)
+    ax.text(i, q1_label, f'Q1: {q1_label:.2f}', ha = 'center', va = 'bottom', color = 'white', fontsize = 12)
+
+plt.ylim(0, 11)
+plt.title('Zero, Two, Four, and Six-Shot CoT Prompting Score Distribution for Scenarios of Large Files')
+plt.show() 
diff --git a/client_model_setup.py b/client_model_setup.py
@@ -9,7 +9,7 @@ class ProvidedLLM():
     def __init__(self):
         self.client_oai = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
         self.client_deepinfra = OpenAI(api_key=os.environ["DEEPINFRA_API_KEY"], base_url="https://api.deepinfra.com/v1/openai")
-        self.client_dsapi = OpenAI(api_key=os.environ["DEEPSEEK_API_KEY"], base_url="https://api.deepseek.com")
+        #self.client_dsapi = OpenAI(api_key=os.environ["DEEPSEEK_API_KEY"], base_url="https://api.deepseek.com")
 
         # The following are model names for DS models provided via their own API service.
         self.ds_v3_dsapi = "deepseek-chat"
@@ -18,6 +18,7 @@ def __init__(self):
         # The following are model names for Large DeepInfra provided models
         self.ds_v3 = "deepseek-ai/DeepSeek-V3"
         self.ds_r1 = "deepseek-ai/DeepSeek-R1" # This model thinks. Cannot use for json output
+        self.ds_r1_turbo = "deepseek-ai/DeepSeek-R1-Turbo"
         self.llama_33_70b = "meta-llama/Llama-3.3-70B-Instruct-Turbo"
         self.llama_31_405b = "meta-llama/Meta-Llama-3.1-405B-Instruct"
         self.qw_25_72b = "Qwen/Qwen2.5-72B-Instruct"
@@ -32,6 +33,8 @@ def __init__(self):
         # The following are the small model names for models provided via the OpenAI API service
         self.gpt_4o_mini = "gpt-4o-mini"
         self.o3_mini = "o3-mini"
+        self.gpt_45 = "gpt-4.5-preview"
+        self.gpt_41 = "gpt-4.1"
 
         self.model_dictionary = {
                                     "openai_models": [self.gpt_4o_mini, self.o3_mini],
@@ -53,7 +56,11 @@ def non_thinking_llm_call(self, client, model, prompt):
     # DS api reasoner doesn't send think tags so no need for this function.
     # Deepinfra thinking models send these tags so this function is needed.
     def thinking_llm_call(self, client, model, prompt):
-        output = self.llm_call(client=client, model=model, prompt=prompt)
+        output_content = client.chat.completions.create(model=model, 
+                                        messages=[{"role": "user", "content": prompt}],
+                                        stream=False
+                                        )
+        output = output_content.choices[0].message.content
         separated_string = re.split(r"(</think>)", output)
         separated_string_thoughts = re.split(r"(<think>)", separated_string[0])
         separated_string_output = separated_string[2]
@@ -63,7 +70,7 @@ def thinking_llm_call(self, client, model, prompt):
     def llm_call(self, client, model, prompt):
         output = ""
         thoughts = ""
-        if (model==self.ds_r1) or (model==self.ds_distil_llama_70b):
+        if (model==self.ds_r1) or (model==self.ds_distil_llama_70b) or (model==self.ds_r1_turbo):
             output, thoughts = self.thinking_llm_call(client, model, prompt)
         else:
             output = self.non_thinking_llm_call(client, model, prompt)