From eb33d08aa955bfba78f351ee653b501d9dbafc23 Mon Sep 17 00:00:00 2001 From: Dillon Laird Date: Tue, 28 May 2024 21:40:04 -0700 Subject: [PATCH] added documentation for chat with workflow --- vision_agent/agent/easytool_v2.py | 2 +- vision_agent/agent/vision_agent.py | 53 +++++++++++++++++++++++++++++- 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/vision_agent/agent/easytool_v2.py b/vision_agent/agent/easytool_v2.py index 035dc391..1ef382e7 100644 --- a/vision_agent/agent/easytool_v2.py +++ b/vision_agent/agent/easytool_v2.py @@ -544,7 +544,7 @@ def chat_with_workflow( visualize_output: Optional[bool] = False, self_reflection: Optional[bool] = True, ) -> Tuple[str, List[Dict]]: - """Chat with the vision agent and return the final answer and all tool results. + """Chat with EasyToolV2 and return the final answer and all tool results. Parameters: chat: A conversation in the format of diff --git a/vision_agent/agent/vision_agent.py b/vision_agent/agent/vision_agent.py index 3d13bedd..ac4a2453 100644 --- a/vision_agent/agent/vision_agent.py +++ b/vision_agent/agent/vision_agent.py @@ -245,6 +245,18 @@ def retrieve_tools( class VisionAgent(Agent): + """Vision Agent is an agentic framework that can output code based on a user + request. It can plan tasks, retrieve relevant tools, write code, write tests and + reflect on failed test cases to debug code. It is inspired by AgentCoder + https://arxiv.org/abs/2312.13010 and Data Interpeter + https://arxiv.org/abs/2402.18679 + + Example + ------- + >>> from vision_agent import VisionAgent + >>> agent = VisionAgent() + >>> code = agent("What percentage of the area of the jar is filled with coffee beans?", media="jar.jpg") + """ def __init__( self, planner: Optional[LLM] = None, @@ -255,6 +267,22 @@ def __init__( verbosity: int = 0, report_progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None, ) -> None: + """Initialize the Vision Agent. + + Parameters: + planner (Optional[LLM]): The planner model to use. Defaults to OpenAILLM. + coder (Optional[LLM]): The coder model to use. Defaults to OpenAILLM. + tester (Optional[LLM]): The tester model to use. Defaults to OpenAILLM. + debugger (Optional[LLM]): The debugger model to + tool_recommender (Optional[Sim]): The tool recommender model to use. + verbosity (int): The verbosity level of the agent. Defaults to 0. 2 is the + highest verbosity level which will output all intermediate debugging + code. + report_progress_callback: a callback to report the progress of the agent. + This is useful for streaming logs in a web application where multiple + VisionAgent instances are running in parallel. This callback ensures + that the progress are not mixed up. + """ self.planner = ( OpenAILLM(temperature=0.0, json_mode=True) if planner is None else planner ) @@ -278,6 +306,17 @@ def __call__( input: Union[List[Dict[str, str]], str], media: Optional[Union[str, Path]] = None, ) -> str: + """Chat with Vision Agent and return intermediate information regarding the task. + + Parameters: + chat (List[Dict[str, str]]): A conversation in the format of + [{"role": "user", "content": "describe your task here..."}]. + media (Optional[Union[str, Path]]): The media file to be used in the task. + self_reflection (bool): Whether to reflect on the task and debug the code. + + Returns: + str: The code output by the Vision Agent. + """ if isinstance(input, str): input = [{"role": "user", "content": input}] results = self.chat_with_workflow(input, media) @@ -290,6 +329,18 @@ def chat_with_workflow( media: Optional[Union[str, Path]] = None, self_reflection: bool = False, ) -> Dict[str, Any]: + """Chat with Vision Agent and return intermediate information regarding the task. + + Parameters: + chat (List[Dict[str, str]]): A conversation in the format of + [{"role": "user", "content": "describe your task here..."}]. + media (Optional[Union[str, Path]]): The media file to be used in the task. + self_reflection (bool): Whether to reflect on the task and debug the code. + + Returns: + Dict[str, Any]: A dictionary containing the code, test, test result, plan, + and working memory of the agent. + """ if len(chat) == 0: raise ValueError("Chat cannot be empty.") @@ -373,7 +424,7 @@ def chat_with_workflow( self.log_progress( { - "log": f"The Vision Agent V3 has concluded this chat.\nSuccess: {success}", + "log": f"Vision Agent has concluded this chat.\nSuccess: {success}", "finished": True, } )