revert changes with planning step for now

landing-ai · Oct 3, 2024 · e61eeee · e61eeee
1 parent 097360d
commit e61eeee
Show file tree

Hide file tree

Showing 2 changed files with 28 additions and 33 deletions.
diff --git a/vision_agent/agent/vision_agent.py b/vision_agent/agent/vision_agent.py
@@ -204,7 +204,7 @@ def __call__(
  input: Union[str, List[Message]],
  media: Optional[Union[str, Path]] = None,
  artifacts: Optional[Artifacts] = None,
- ) -> List[Message]:
+ ) -> str:
  """Chat with VisionAgent and get the conversation response.
 
  Parameters:
@@ -221,10 +221,28 @@ def __call__(
  input = [{"role": "user", "content": input}]
  if media is not None:
  input[0]["media"] = [media]
- results, _ = self.chat_with_code(input, artifacts)
- return results
+ results, _ = self.chat_and_artifacts(input, artifacts)
+ return results[-1]["content"] # type: ignore
+
+ def chat(
+ self,
+ chat: List[Message],
+ ) -> List[Message]:
+ """Chat with VisionAgent, it will use code to execute actions to accomplish
+ its tasks.
+
+ Parameters:
+ chat (List[Message]): A conversation in the format of:
+ [{"role": "user", "content": "describe your task here..."}]
+ or if it contains media files, it should be in the format of:
+ [{"role": "user", "content": "describe your task here...", "media": ["image1.jpg", "image2.jpg"]}]
+
+ Returns:
+ List[Message]: The conversation response.
+ """
+ return self.chat_and_artifacts(chat)[0]
 
- def chat_with_code(
+ def chat_and_artifacts(
  self,
  chat: List[Message],
  artifacts: Optional[Artifacts] = None,

diff --git a/vision_agent/tools/meta_tools.py b/vision_agent/tools/meta_tools.py
@@ -400,7 +400,7 @@ def generate_vision_plan(
 
  output_str += f"\nbest plan: {response.best_plan}\n"
  output_str += "thoughts: " + response.plan_thoughts.strip() + "\n"
- output_str += f"[End Plan Context]"
+ output_str += "[End Plan Context]"
  print(output_str)
  return output_str
 
@@ -410,9 +410,6 @@ def generate_vision_code(
  name: str,
  chat: str,
  media: List[str],
- plan: Optional[Dict[str, Union[str, List[str]]]] = None,
- plan_thoughts: Optional[str] = None,
- plan_context_artifact: Optional[str] = None,
  test_multi_plan: bool = True,
  custom_tool_names: Optional[List[str]] = None,
 ) -> str:
@@ -423,10 +420,6 @@ def generate_vision_code(
  name (str): The name of the artifact to save the code to.
  chat (str): The chat message from the user.
  media (List[str]): The media files to use.
- plan (Optional[Dict[str, Union[str, List[str]]]): The plan to use to generate
- the code.
- plan_thoughts (Optional[str]): The thoughts to use to generate the code.
- plan_context_artifact (Optional[str]): The artifact name of the stored plan context.
  test_multi_plan (bool): Do not change this parameter.
  custom_tool_names (Optional[List[str]]): Do not change this parameter.
 
@@ -442,7 +435,6 @@ def detect_dogs(image_path: str):
  dogs = owl_v2("dog", image)
  return dogs
  """
-
  if ZMQ_PORT is not None:
  agent = va.agent.VisionAgentCoder(
  report_progress_callback=lambda inp: report_progress_callback(
@@ -453,26 +445,11 @@ def detect_dogs(image_path: str):
  agent = va.agent.VisionAgentCoder()
 
  fixed_chat: List[Message] = [{"role": "user", "content": chat, "media": media}]
- if plan is None or plan_thoughts is None or plan_context_artifact is None:
- response = agent.generate_code(
- fixed_chat,
- test_multi_plan=test_multi_plan,
- custom_tool_names=custom_tool_names,
- )
- else:
- plan_context = json.loads(artifacts[plan_context_artifact])
- plan_context = va.agent.PlanContext(
- plans={"plan1": plan},
- best_plan="plan1",
- plan_thoughts=plan_thoughts,
- tool_output=plan_context["tool_output"],
- tool_doc=plan_context["tool_doc"],
- test_results=None,
- )
- response = agent.generate_code_from_plan(
- fixed_chat,
- plan_context,
- )
+ response = agent.generate_code(
+ fixed_chat,
+ test_multi_plan=test_multi_plan,
+ custom_tool_names=custom_tool_names,
+ )
  redisplay_results(response["test_result"])
  code = response["code"]
  artifacts[name] = code