From cffd0606bb416d12fb46a1a63f09c196fc75d8a1 Mon Sep 17 00:00:00 2001
From: Dillon Laird <dillonalaird@gmail.com>
Date: Thu, 3 Oct 2024 16:46:39 -0700
Subject: [PATCH] revert changes with planning step for now

---
 vision_agent/agent/vision_agent.py | 26 ++++++++++++++++++----
 vision_agent/tools/meta_tools.py   | 35 +++++-------------------------
 2 files changed, 28 insertions(+), 33 deletions(-)

diff --git a/vision_agent/agent/vision_agent.py b/vision_agent/agent/vision_agent.py
index 518858d1..ac9f4f32 100644
--- a/vision_agent/agent/vision_agent.py
+++ b/vision_agent/agent/vision_agent.py
@@ -204,7 +204,7 @@ def __call__(
         input: Union[str, List[Message]],
         media: Optional[Union[str, Path]] = None,
         artifacts: Optional[Artifacts] = None,
-    ) -> List[Message]:
+    ) -> str:
         """Chat with VisionAgent and get the conversation response.
 
         Parameters:
@@ -221,10 +221,28 @@ def __call__(
             input = [{"role": "user", "content": input}]
             if media is not None:
                 input[0]["media"] = [media]
-        results, _ = self.chat_with_code(input, artifacts)
-        return results
+        results, _ = self.chat_and_artifacts(input, artifacts)
+        return results[-1]["content"]  # type: ignore
+
+    def chat(
+        self,
+        chat: List[Message],
+    ) -> List[Message]:
+        """Chat with VisionAgent, it will use code to execute actions to accomplish
+        its tasks.
+
+        Parameters:
+            chat (List[Message]): A conversation in the format of:
+                [{"role": "user", "content": "describe your task here..."}]
+                or if it contains media files, it should be in the format of:
+                [{"role": "user", "content": "describe your task here...", "media": ["image1.jpg", "image2.jpg"]}]
+
+        Returns:
+            List[Message]: The conversation response.
+        """
+        return self.chat_and_artifacts(chat)[0]
 
-    def chat_with_code(
+    def chat_and_artifacts(
         self,
         chat: List[Message],
         artifacts: Optional[Artifacts] = None,
diff --git a/vision_agent/tools/meta_tools.py b/vision_agent/tools/meta_tools.py
index 6e97198e..cdd522a8 100644
--- a/vision_agent/tools/meta_tools.py
+++ b/vision_agent/tools/meta_tools.py
@@ -405,7 +405,7 @@ def generate_vision_plan(
 
     output_str += f"\nbest plan: {response.best_plan}\n"
     output_str += "thoughts: " + response.plan_thoughts.strip() + "\n"
-    output_str += f"[End Plan Context]"
+    output_str += "[End Plan Context]"
     print(output_str)
     return output_str
 
@@ -415,9 +415,6 @@ def generate_vision_code(
     name: str,
     chat: str,
     media: List[str],
-    plan: Optional[Dict[str, Union[str, List[str]]]] = None,
-    plan_thoughts: Optional[str] = None,
-    plan_context_artifact: Optional[str] = None,
     test_multi_plan: bool = True,
     custom_tool_names: Optional[List[str]] = None,
 ) -> str:
@@ -428,10 +425,6 @@ def generate_vision_code(
         name (str): The name of the artifact to save the code to.
         chat (str): The chat message from the user.
         media (List[str]): The media files to use.
-        plan (Optional[Dict[str, Union[str, List[str]]]): The plan to use to generate
-            the code.
-        plan_thoughts (Optional[str]): The thoughts to use to generate the code.
-        plan_context_artifact (Optional[str]): The artifact name of the stored plan context.
         test_multi_plan (bool): Do not change this parameter.
         custom_tool_names (Optional[List[str]]): Do not change this parameter.
 
@@ -447,7 +440,6 @@ def detect_dogs(image_path: str):
             dogs = owl_v2("dog", image)
             return dogs
     """
-
     if ZMQ_PORT is not None:
         agent = va.agent.VisionAgentCoder(
             report_progress_callback=lambda inp: report_progress_callback(
@@ -458,26 +450,11 @@ def detect_dogs(image_path: str):
         agent = va.agent.VisionAgentCoder()
 
     fixed_chat: List[Message] = [{"role": "user", "content": chat, "media": media}]
-    if plan is None or plan_thoughts is None or plan_context_artifact is None:
-        response = agent.generate_code(
-            fixed_chat,
-            test_multi_plan=test_multi_plan,
-            custom_tool_names=custom_tool_names,
-        )
-    else:
-        plan_context = json.loads(artifacts[plan_context_artifact])
-        plan_context = va.agent.PlanContext(
-            plans={"plan1": plan},
-            best_plan="plan1",
-            plan_thoughts=plan_thoughts,
-            tool_output=plan_context["tool_output"],
-            tool_doc=plan_context["tool_doc"],
-            test_results=None,
-        )
-        response = agent.generate_code_from_plan(
-            fixed_chat,
-            plan_context,
-        )
+    response = agent.generate_code(
+        fixed_chat,
+        test_multi_plan=test_multi_plan,
+        custom_tool_names=custom_tool_names,
+    )
     redisplay_results(response["test_result"])
     code = response["code"]
     artifacts[name] = code