From 801b82926c532fdfefee1bbe9925b2766c21f2eb Mon Sep 17 00:00:00 2001
From: wuyiqunLu <132986242+wuyiqunLu@users.noreply.github.com>
Date: Thu, 23 May 2024 00:56:49 -0500
Subject: [PATCH] feat: add progress logging for v2 and v3 (#93)

* feat: add progress loging for v3

* fix lint

* fix lint

* fix format

* fix format

* fix format

* run black formatter

* fix lint

* fix lint

* fix lint

* separate code and test

* add reflection
---
 vision_agent/agent/agent.py           |  4 +-
 vision_agent/agent/agent_coder.py     |  6 +--
 vision_agent/agent/vision_agent.py    | 38 ++++++++------
 vision_agent/agent/vision_agent_v2.py | 72 +++++++++++++++++++++------
 vision_agent/agent/vision_agent_v3.py | 71 ++++++++++++++++++++++++--
 5 files changed, 153 insertions(+), 38 deletions(-)
diff --git a/vision_agent/agent/agent.py b/vision_agent/agent/agent.py
index 93b3223d..b2fccb01 100644
--- a/vision_agent/agent/agent.py
+++ b/vision_agent/agent/agent.py
@@ -1,6 +1,6 @@
 from abc import ABC, abstractmethod
 from pathlib import Path
-from typing import Dict, List, Optional, Union
+from typing import Dict, List, Optional, Union, Any
 
 
 class Agent(ABC):
@@ -13,7 +13,7 @@ def __call__(
         pass
 
     @abstractmethod
-    def log_progress(self, description: str) -> None:
+    def log_progress(self, data: Dict[str, Any]) -> None:
         """Log the progress of the agent.
         This is a hook that is intended for reporting the progress of the agent.
         """
diff --git a/vision_agent/agent/agent_coder.py b/vision_agent/agent/agent_coder.py
index fca9ea64..aad3df66 100644
--- a/vision_agent/agent/agent_coder.py
+++ b/vision_agent/agent/agent_coder.py
@@ -3,7 +3,7 @@
 import os
 import sys
 from pathlib import Path
-from typing import Dict, List, Optional, Union
+from typing import Dict, List, Optional, Union, Any
 
 from rich.console import Console
 from rich.syntax import Syntax
@@ -206,5 +206,5 @@ def chat(
 
         return f"{IMPORT_HELPER}\n{code}"
 
-    def log_progress(self, description: str) -> None:
-        _LOGGER.info(description)
+    def log_progress(self, data: Dict[str, Any]) -> None:
+        _LOGGER.info(data)
diff --git a/vision_agent/agent/vision_agent.py b/vision_agent/agent/vision_agent.py
index 42fcbee6..2db933d9 100644
--- a/vision_agent/agent/vision_agent.py
+++ b/vision_agent/agent/vision_agent.py
@@ -451,7 +451,7 @@ def __init__(
         reflect_model: Optional[Union[LLM, LMM]] = None,
         max_retries: int = 2,
         verbose: bool = False,
-        report_progress_callback: Optional[Callable[[str], None]] = None,
+        report_progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None,
     ):
         """VisionAgent constructor.
 
@@ -518,23 +518,23 @@ def __call__(
             self_reflection=self_reflection,
         )
 
-    def log_progress(self, description: str) -> None:
-        _LOGGER.info(description)
+    def log_progress(self, data: Dict[str, Any]) -> None:
+        _LOGGER.info(data)
         if self.report_progress_callback:
-            self.report_progress_callback(description)
+            self.report_progress_callback(data)
 
     def _report_visualization_via_callback(
         self, images: Sequence[Union[str, Path]]
     ) -> None:
         """This is intended for streaming the visualization images via the callback to the client side."""
         if self.report_progress_callback:
-            self.report_progress_callback("<VIZ>")
+            self.report_progress_callback({"log": "<VIZ>"})
             if images:
                 for img in images:
                     self.report_progress_callback(
-                        f"<IMG>base:64{convert_to_b64(img)}</IMG>"
+                        {"log": f"<IMG>base:64{convert_to_b64(img)}</IMG>"}
                     )
-            self.report_progress_callback("</VIZ>")
+            self.report_progress_callback({"log": "</VIZ>"})
 
     def chat_with_workflow(
         self,
@@ -618,8 +618,8 @@ def chat_with_workflow(
                 tool_results["answer"] = answer
                 all_tool_results.append(tool_results)
 
-                self.log_progress(f"\tCall Result: {call_results}")
-                self.log_progress(f"\tAnswer: {answer}")
+                self.log_progress({"log": f"\tCall Result: {call_results}"})
+                self.log_progress({"log": f"\tAnswer: {answer}"})
                 answers.append({"task": task_str, "answer": answer})
                 task_depend[task["id"]]["answer"] = answer  # type: ignore
                 task_depend[task["id"]]["call_result"] = call_results  # type: ignore
@@ -644,18 +644,22 @@ def chat_with_workflow(
                     final_answer,
                     reflection_images,
                 )
-                self.log_progress(f"Reflection: {reflection}")
+                self.log_progress({"log": f"Reflection: {reflection}"})
                 parsed_reflection = parse_reflect(reflection)
                 if parsed_reflection["Finish"]:
                     break
                 else:
                     reflections += "\n" + parsed_reflection["Reflection"]
             else:
-                self.log_progress("Self Reflection skipped based on user request.")
+                self.log_progress(
+                    {"log": "Self Reflection skipped based on user request."}
+                )
                 break
         # '<ANSWER>' is a symbol to indicate the end of the chat, which is useful for streaming logs.
         self.log_progress(
-            f"The Vision Agent has concluded this chat. <ANSWER>{final_answer}</ANSWER>"
+            {
+                "log": f"The Vision Agent has concluded this chat. <ANSWER>{final_answer}</ANSWER>"
+            }
         )
 
         if visualize_output:
@@ -718,8 +722,10 @@ def retrieval(
         }
 
         self.log_progress(
-            f"""Going to run the following tool(s) in sequence:
+            {
+                "log": f"""Going to run the following tool(s) in sequence:
 {tabulate(tabular_data=[tool_results], headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
+            }
         )
 
         def parse_tool_results(result: Dict[str, Union[Dict, List]]) -> Any:
@@ -764,7 +770,9 @@ def create_tasks(
         else:
             task_list = []
         self.log_progress(
-            f"""Planned tasks:
-{tabulate(task_list, headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
+            {
+                "log": "Planned tasks:",
+                "plan": task_list,
+            }
         )
         return task_list
diff --git a/vision_agent/agent/vision_agent_v2.py b/vision_agent/agent/vision_agent_v2.py
index 340ba42e..d7bf1372 100644
--- a/vision_agent/agent/vision_agent_v2.py
+++ b/vision_agent/agent/vision_agent_v2.py
@@ -165,7 +165,7 @@ def write_and_exec_code(
     tool_info: str,
     exec: Execute,
     retrieved_ltm: str,
-    log_progress: Callable[..., str],
+    log_progress: Callable[[Dict[str, Any]], None],
     max_retry: int = 3,
     verbosity: int = 0,
 ) -> Tuple[bool, str, str, Dict[str, List[str]]]:
@@ -179,7 +179,23 @@ def write_and_exec_code(
     success, result = exec.run_isolation(code)
     if verbosity == 2:
         _CONSOLE.print(Syntax(code, "python", theme="gruvbox-dark", line_numbers=True))
-        log_progress(f"\tCode success: {success}\n\tResult: {str(result)}", code)
+        log_progress(
+            {
+                "log": f"Code success: {success}",
+            }
+        )
+        log_progress(
+            {
+                "log": "Code:",
+                "code": code,
+            }
+        )
+        log_progress(
+            {
+                "log": "Result:",
+                "result": str(result),
+            }
+        )
         _LOGGER.info(f"\tCode success: {success}, result: {str(result)}")
     working_memory: Dict[str, List[str]] = {}
     while not success and counter < max_retry:
@@ -206,7 +222,18 @@ def write_and_exec_code(
             _CONSOLE.print(
                 Syntax(code, "python", theme="gruvbox-dark", line_numbers=True)
             )
-            log_progress(f"\tDebugging reflection: {reflection}\n\tResult: {result}")
+            log_progress(
+                {
+                    "log": "Debugging reflection:",
+                    "reflection": reflection,
+                }
+            )
+            log_progress(
+                {
+                    "log": "Result:",
+                    "result": result,
+                }
+            )
             _LOGGER.info(f"\tDebugging reflection: {reflection}, result: {result}")
 
         if success:
@@ -227,7 +254,7 @@ def run_plan(
     exec: Execute,
     code: str,
     tool_recommender: Sim,
-    log_progress: Callable[..., str],
+    log_progress: Callable[[Dict[str, Any]], None],
     long_term_memory: Optional[Sim] = None,
     verbosity: int = 0,
 ) -> Tuple[str, str, List[Dict[str, Any]], Dict[str, List[str]]]:
@@ -239,8 +266,7 @@ def run_plan(
 
     for task in active_plan:
         log_progress(
-            f"""Going to run the following task(s) in sequence:
-{tabulate(tabular_data=[task], headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
+            {"log": "Going to run the following task(s) in sequence:", "task": task}
         )
         _LOGGER.info(
             f"""
@@ -250,7 +276,7 @@ def run_plan(
         tool_info = "\n".join([e["doc"] for e in tools])
 
         if verbosity == 2:
-            log_progress(f"Tools retrieved: {[e['desc'] for e in tools]}")
+            log_progress({"log": f"Tools retrieved: {[e['desc'] for e in tools]}"})
             _LOGGER.info(f"Tools retrieved: {[e['desc'] for e in tools]}")
 
         if long_term_memory is not None:
@@ -282,7 +308,17 @@ def run_plan(
                 Syntax(code, "python", theme="gruvbox-dark", line_numbers=True)
             )
 
-        log_progress(f"\tCode success: {success}\n\tResult: {str(result)}")
+        log_progress(
+            {
+                "log": f"Code success: {success}",
+            }
+        )
+        log_progress(
+            {
+                "log": "Result:",
+                "result": str(result),
+            }
+        )
         _LOGGER.info(f"\tCode success: {success} result: {str(result)}")
 
         task["success"] = success
@@ -320,7 +356,7 @@ def __init__(
         tool_recommender: Optional[Sim] = None,
         long_term_memory: Optional[Sim] = None,
         verbosity: int = 0,
-        report_progress_callback: Optional[Callable[..., Any]] = None,
+        report_progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None,
     ) -> None:
         self.planner = OpenAILLM(temperature=0.0, json_mode=True)
         self.coder = OpenAILLM(temperature=0.0)
@@ -376,8 +412,10 @@ def chat_with_workflow(
 
         user_req, plan = write_plan(chat, plan, TOOL_DESCRIPTIONS, self.planner)
         self.log_progress(
-            f"""Plan:
-{tabulate(tabular_data=plan, headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
+            {
+                "log": "Plans:",
+                "plan": plan,
+            }
         )
         _LOGGER.info(
             f"""Plan:
@@ -412,8 +450,12 @@ def chat_with_workflow(
 
             retries += 1
 
-        self.log_progress("The Vision Agent V2 has concluded this chat.")
-        self.log_progress(f"<ANSWER>Plan success: {success}</ANSWER>")
+        self.log_progress(
+            {
+                "log": f"The Vision Agent V2 has concluded this chat.\nSuccess: {success}",
+                "finished": True,
+            }
+        )
 
         return {
             "code": working_code,
@@ -423,7 +465,7 @@ def chat_with_workflow(
             "plan": plan,
         }
 
-    def log_progress(self, description: str, code: Optional[str] = "") -> None:
+    def log_progress(self, data: Dict[str, Any]) -> None:
         if self.report_progress_callback is not None:
-            self.report_progress_callback(description, code)
+            self.report_progress_callback(data)
         pass
diff --git a/vision_agent/agent/vision_agent_v3.py b/vision_agent/agent/vision_agent_v3.py
index d8de28c6..f774a8f4 100644
--- a/vision_agent/agent/vision_agent_v3.py
+++ b/vision_agent/agent/vision_agent_v3.py
@@ -3,7 +3,7 @@
 import logging
 import sys
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Union, cast
+from typing import Any, Dict, List, Optional, Union, cast, Callable
 
 from rich.console import Console
 from rich.syntax import Syntax
@@ -114,6 +114,7 @@ def write_and_test_code(
     coder: LLM,
     tester: LLM,
     debugger: LLM,
+    log_progress: Callable[[Dict[str, Any]], None],
     verbosity: int = 0,
     max_retries: int = 3,
 ) -> Dict[str, Any]:
@@ -131,9 +132,27 @@ def write_and_test_code(
     success, result = _EXECUTE.run_isolation(f"{code}\n{test}")
     if verbosity == 2:
         _LOGGER.info("First code and tests:")
+        log_progress(
+            {
+                "log": "Code:",
+                "code": code,
+            }
+        )
+        log_progress(
+            {
+                "log": "Test:",
+                "code": test,
+            }
+        )
         _CONSOLE.print(
             Syntax(f"{code}\n{test}", "python", theme="gruvbox-dark", line_numbers=True)
         )
+        log_progress(
+            {
+                "log": "Result:",
+                "result": result,
+            }
+        )
         _LOGGER.info(f"First result: {result}")
 
     count = 0
@@ -156,6 +175,12 @@ def write_and_test_code(
 
         success, result = _EXECUTE.run_isolation(f"{code}\n{test}")
         if verbosity == 2:
+            log_progress(
+                {
+                    "log": f"Debug attempt {count + 1}, reflection:",
+                    "result": fixed_code_and_test["reflections"],
+                }
+            )
             _LOGGER.info(
                 f"Debug attempt {count + 1}, reflection: {fixed_code_and_test['reflections']}"
             )
@@ -164,6 +189,12 @@ def write_and_test_code(
                     f"{code}\n{test}", "python", theme="gruvbox-dark", line_numbers=True
                 )
             )
+            log_progress(
+                {
+                    "log": "Debug result:",
+                    "result": result,
+                }
+            )
             _LOGGER.info(f"Debug result: {result}")
         count += 1
 
@@ -182,7 +213,10 @@ def write_and_test_code(
 
 
 def retrieve_tools(
-    plan: List[Dict[str, str]], tool_recommender: Sim, verbosity: int = 0
+    plan: List[Dict[str, str]],
+    tool_recommender: Sim,
+    log_progress: Callable[[Dict[str, Any]], None],
+    verbosity: int = 0,
 ) -> str:
     tool_info = []
     tool_desc = []
@@ -191,6 +225,12 @@ def retrieve_tools(
         tool_info.extend([e["doc"] for e in tools])
         tool_desc.extend([e["desc"] for e in tools])
     if verbosity == 2:
+        log_progress(
+            {
+                "log": "Retrieved tools:",
+                "tools": tool_desc,
+            }
+        )
         _LOGGER.info(f"Tools: {tool_desc}")
     tool_info_set = set(tool_info)
     return "\n\n".join(tool_info_set)
@@ -206,6 +246,7 @@ def __init__(
         debugger: Optional[LLM] = None,
         tool_recommender: Optional[Sim] = None,
         verbosity: int = 0,
+        report_progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None,
     ) -> None:
         self.planner = (
             OpenAILLM(temperature=0.0, json_mode=True) if planner is None else planner
@@ -223,6 +264,7 @@ def __init__(
         )
         self.verbosity = verbosity
         self.max_retries = 3
+        self.report_progress_callback = report_progress_callback
 
     def __call__(
         self,
@@ -261,6 +303,12 @@ def chat_with_workflow(
             )
             plan_i_str = "\n-".join([e["instructions"] for e in plan_i])
             if self.verbosity == 1 or self.verbosity == 2:
+                self.log_progress(
+                    {
+                        "log": "Going to run the following plan(s) in sequence:\n",
+                        "plan": plan_i,
+                    }
+                )
                 _LOGGER.info(
                     f"""
 {tabulate(tabular_data=plan_i, headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
@@ -269,6 +317,7 @@ def chat_with_workflow(
             tool_info = retrieve_tools(
                 plan_i,
                 self.tool_recommender,
+                self.log_progress,
                 self.verbosity,
             )
             results = write_and_test_code(
@@ -279,6 +328,7 @@ def chat_with_workflow(
                 self.coder,
                 self.tester,
                 self.debugger,
+                self.log_progress,
                 verbosity=self.verbosity,
             )
             success = cast(bool, results["success"])
@@ -289,11 +339,24 @@ def chat_with_workflow(
 
             reflection = reflect(chat, plan_i_str, code, self.planner)
             if self.verbosity > 0:
+                self.log_progress(
+                    {
+                        "log": "Reflection:",
+                        "reflection": reflection,
+                    }
+                )
                 _LOGGER.info(f"Reflection: {reflection}")
             feedback = cast(str, reflection["feedback"])
             success = cast(bool, reflection["success"])
             working_memory.append({"code": f"{code}\n{test}", "feedback": feedback})
 
+        self.log_progress(
+            {
+                "log": f"The Vision Agent V3 has concluded this chat.\nSuccess: {success}",
+                "finished": True,
+            }
+        )
+
         return {
             "code": code,
             "test": test,
@@ -301,5 +364,7 @@ def chat_with_workflow(
             "working_memory": working_memory,
         }
 
-    def log_progress(self, description: str) -> None:
+    def log_progress(self, data: Dict[str, Any]) -> None:
+        if self.report_progress_callback is not None:
+            self.report_progress_callback(data)
         pass