landing-ai · dillonalaird · Jul 16, 2024 · Jul 12, 2024 · Jul 13, 2024 · Jul 15, 2024
diff --git a/vision_agent/agent/vision_agent.py b/vision_agent/agent/vision_agent.py
@@ -176,6 +176,7 @@ def pick_plan(
  model: LMM,
  code_interpreter: CodeInterpreter,
  verbosity: int = 0,
+ max_retries: int = 3,
 ) -> Tuple[str, str]:
  chat = copy.deepcopy(chat)
  if chat[-1]["role"] != "user":
@@ -192,13 +193,13 @@ def pick_plan(
  if len(tool_output.logs.stdout) > 0:
  tool_output_str = tool_output.logs.stdout[0]
 
- if verbosity >= 1:
+ if verbosity == 2:
  _print_code("Initial code and tests:", code)
  _LOGGER.info(f"Initial code execution result:\n{tool_output.text()}")
 
  # retry if the tool output is empty or code fails
- count = 1
- while (not tool_output.success or tool_output_str == "") and count < 3:
+ count = 0
+ while (not tool_output.success or tool_output_str == "") and count < max_retries:
  prompt = TEST_PLANS.format(
  docstring=tool_info,
  plans=plan_str,
@@ -214,12 +215,15 @@ def pick_plan(
  if len(tool_output.logs.stdout) > 0:
  tool_output_str = tool_output.logs.stdout[0]
 
- if verbosity == 1:
+ if verbosity == 2:
  _print_code("Code and test after attempted fix:", code)
  _LOGGER.info(f"Code execution result after attempte {count}")
 
  count += 1
 
+ if verbosity >= 1:
+ _print_code("Final code:", code)
+
  user_req = chat[-1]["content"]
  context = USER_REQ.format(user_request=user_req)
  # because the tool picker model gets the image as well, we have to be careful with
@@ -408,7 +412,7 @@ def debug_code(
  FIX_BUG.format(
  code=code,
  tests=test,
- result="\n".join(result.text().splitlines()[-50:]),
+ result="\n".join(result.text().splitlines()[-100:]),
  feedback=format_memory(working_memory + new_working_memory),
  )
  )
@@ -673,92 +677,85 @@ def chat_with_workflow(
  working_memory: List[Dict[str, str]] = []
  results = {"code": "", "test": "", "plan": []}
  plan = []
- success = False
- retries = 0
-
- while not success and retries < self.max_retries:
- self.log_progress(
- {
- "type": "plans",
- "status": "started",
- }
- )
- plans = write_plans(
- int_chat,
- T.TOOL_DESCRIPTIONS,
- format_memory(working_memory),
- self.planner,
- )
 
- if self.verbosity >= 1:
- for p in plans:
- _LOGGER.info(
- f"\n{tabulate(tabular_data=plans[p], headers='keys', tablefmt='mixed_grid', maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"
- )
-
- tool_infos = retrieve_tools(
- plans,
- self.tool_recommender,
- self.log_progress,
- self.verbosity,
- )
- best_plan, tool_output_str = pick_plan(
- int_chat,
- plans,
- tool_infos["all"],
- self.coder,
- code_interpreter,
- verbosity=self.verbosity,
- )
+ self.log_progress(
+ {
+ "type": "plans",
+ "status": "started",
+ }
+ )
+ plans = write_plans(
+ int_chat,
+ T.TOOL_DESCRIPTIONS,
+ format_memory(working_memory),
+ self.planner,
+ )
 
- if best_plan in plans and best_plan in tool_infos:
- plan_i = plans[best_plan]
- tool_info = tool_infos[best_plan]
- else:
- if self.verbosity >= 1:
- _LOGGER.warning(
- f"Best plan {best_plan} not found in plans or tool_infos. Using the first plan and tool info."
- )
- k = list(plans.keys())[0]
- plan_i = plans[k]
- tool_info = tool_infos[k]
-
- self.log_progress(
- {
- "type": "plans",
- "status": "completed",
- "payload": plan_i,
- }
- )
- if self.verbosity >= 1:
+ if self.verbosity >= 1:
+ for p in plans:
  _LOGGER.info(
- f"Picked best plan:\n{tabulate(tabular_data=plan_i, headers='keys', tablefmt='mixed_grid', maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"
+ f"\n{tabulate(tabular_data=plans[p], headers='keys', tablefmt='mixed_grid', maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"
  )
 
- results = write_and_test_code(
- chat=[
- {"role": c["role"], "content": c["content"]} for c in int_chat
- ],
- plan="\n-" + "\n-".join([e["instructions"] for e in plan_i]),
- tool_info=tool_info,
- tool_output=tool_output_str,
- tool_utils=T.UTILITIES_DOCSTRING,
- working_memory=working_memory,
- coder=self.coder,
- tester=self.tester,
- debugger=self.debugger,
- code_interpreter=code_interpreter,
- log_progress=self.log_progress,
- verbosity=self.verbosity,
- media=media_list,
+ tool_infos = retrieve_tools(
+ plans,
+ self.tool_recommender,
+ self.log_progress,
+ self.verbosity,
+ )
+ best_plan, tool_output_str = pick_plan(
+ int_chat,
+ plans,
+ tool_infos["all"],
+ self.coder,
+ code_interpreter,
+ verbosity=self.verbosity,
+ )
+
+ if best_plan in plans and best_plan in tool_infos:
+ plan_i = plans[best_plan]
+ tool_info = tool_infos[best_plan]
+ else:
+ if self.verbosity >= 1:
+ _LOGGER.warning(
+ f"Best plan {best_plan} not found in plans or tool_infos. Using the first plan and tool info."
+ )
+ k = list(plans.keys())[0]
+ plan_i = plans[k]
+ tool_info = tool_infos[k]
+
+ self.log_progress(
+ {
+ "type": "plans",
+ "status": "completed",
+ "payload": plan_i,
+ }
+ )
+ if self.verbosity >= 1:
+ _LOGGER.info(
+ f"Picked best plan:\n{tabulate(tabular_data=plan_i, headers='keys', tablefmt='mixed_grid', maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"
  )
- success = cast(bool, results["success"])
- code = cast(str, results["code"])
- test = cast(str, results["test"])
- working_memory.extend(results["working_memory"]) # type: ignore
- plan.append({"code": code, "test": test, "plan": plan_i})
 
- retries += 1
+ results = write_and_test_code(
+ chat=[{"role": c["role"], "content": c["content"]} for c in int_chat],
+ plan="\n-" + "\n-".join([e["instructions"] for e in plan_i]),
+ tool_info=tool_info,
+ tool_output=tool_output_str,
+ tool_utils=T.UTILITIES_DOCSTRING,
+ working_memory=working_memory,
+ coder=self.coder,
+ tester=self.tester,
+ debugger=self.debugger,
+ code_interpreter=code_interpreter,
+ log_progress=self.log_progress,
+ verbosity=self.verbosity,
+ media=media_list,
+ )
+ success = cast(bool, results["success"])
+ code = cast(str, results["code"])
+ test = cast(str, results["test"])
+ working_memory.extend(results["working_memory"]) # type: ignore
+ plan.append({"code": code, "test": test, "plan": plan_i})
 
  execution_result = cast(Execution, results["test_result"])
  self.log_progress(