From eb33d08aa955bfba78f351ee653b501d9dbafc23 Mon Sep 17 00:00:00 2001
From: Dillon Laird <dillonalaird@gmail.com>
Date: Tue, 28 May 2024 21:40:04 -0700
Subject: [PATCH] added documentation for chat with workflow

---
 vision_agent/agent/easytool_v2.py  |  2 +-
 vision_agent/agent/vision_agent.py | 53 +++++++++++++++++++++++++++++-
 2 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/vision_agent/agent/easytool_v2.py b/vision_agent/agent/easytool_v2.py
index 035dc391..1ef382e7 100644
--- a/vision_agent/agent/easytool_v2.py
+++ b/vision_agent/agent/easytool_v2.py
@@ -544,7 +544,7 @@ def chat_with_workflow(
         visualize_output: Optional[bool] = False,
         self_reflection: Optional[bool] = True,
     ) -> Tuple[str, List[Dict]]:
-        """Chat with the vision agent and return the final answer and all tool results.
+        """Chat with EasyToolV2 and return the final answer and all tool results.
 
         Parameters:
             chat: A conversation in the format of
diff --git a/vision_agent/agent/vision_agent.py b/vision_agent/agent/vision_agent.py
index 3d13bedd..ac4a2453 100644
--- a/vision_agent/agent/vision_agent.py
+++ b/vision_agent/agent/vision_agent.py
@@ -245,6 +245,18 @@ def retrieve_tools(
 
 
 class VisionAgent(Agent):
+    """Vision Agent is an agentic framework that can output code based on a user
+    request. It can plan tasks, retrieve relevant tools, write code, write tests and
+    reflect on failed test cases to debug code. It is inspired by AgentCoder
+    https://arxiv.org/abs/2312.13010 and Data Interpeter
+    https://arxiv.org/abs/2402.18679
+
+    Example
+    -------
+        >>> from vision_agent import VisionAgent
+        >>> agent = VisionAgent()
+        >>> code = agent("What percentage of the area of the jar is filled with coffee beans?", media="jar.jpg")
+    """
     def __init__(
         self,
         planner: Optional[LLM] = None,
@@ -255,6 +267,22 @@ def __init__(
         verbosity: int = 0,
         report_progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None,
     ) -> None:
+        """Initialize the Vision Agent.
+
+        Parameters:
+            planner (Optional[LLM]): The planner model to use. Defaults to OpenAILLM.
+            coder (Optional[LLM]): The coder model to use. Defaults to OpenAILLM.
+            tester (Optional[LLM]): The tester model to use. Defaults to OpenAILLM.
+            debugger (Optional[LLM]): The debugger model to
+            tool_recommender (Optional[Sim]): The tool recommender model to use.
+            verbosity (int): The verbosity level of the agent. Defaults to 0. 2 is the
+                highest verbosity level which will output all intermediate debugging
+                code.
+            report_progress_callback: a callback to report the progress of the agent.
+                This is useful for streaming logs in a web application where multiple
+                VisionAgent instances are running in parallel. This callback ensures
+                that the progress are not mixed up.
+        """
         self.planner = (
             OpenAILLM(temperature=0.0, json_mode=True) if planner is None else planner
         )
@@ -278,6 +306,17 @@ def __call__(
         input: Union[List[Dict[str, str]], str],
         media: Optional[Union[str, Path]] = None,
     ) -> str:
+        """Chat with Vision Agent and return intermediate information regarding the task.
+
+        Parameters:
+            chat (List[Dict[str, str]]): A conversation in the format of
+                [{"role": "user", "content": "describe your task here..."}].
+            media (Optional[Union[str, Path]]): The media file to be used in the task.
+            self_reflection (bool): Whether to reflect on the task and debug the code.
+
+        Returns:
+            str: The code output by the Vision Agent.
+        """
         if isinstance(input, str):
             input = [{"role": "user", "content": input}]
         results = self.chat_with_workflow(input, media)
@@ -290,6 +329,18 @@ def chat_with_workflow(
         media: Optional[Union[str, Path]] = None,
         self_reflection: bool = False,
     ) -> Dict[str, Any]:
+        """Chat with Vision Agent and return intermediate information regarding the task.
+
+        Parameters:
+            chat (List[Dict[str, str]]): A conversation in the format of
+                [{"role": "user", "content": "describe your task here..."}].
+            media (Optional[Union[str, Path]]): The media file to be used in the task.
+            self_reflection (bool): Whether to reflect on the task and debug the code.
+
+        Returns:
+            Dict[str, Any]: A dictionary containing the code, test, test result, plan,
+                and working memory of the agent.
+        """
         if len(chat) == 0:
             raise ValueError("Chat cannot be empty.")
 
@@ -373,7 +424,7 @@ def chat_with_workflow(
 
         self.log_progress(
             {
-                "log": f"The Vision Agent V3 has concluded this chat.\nSuccess: {success}",
+                "log": f"Vision Agent has concluded this chat.\nSuccess: {success}",
                 "finished": True,
             }
         )