From 8e3c52563a4f991c0b8ecbeaf97408db94b60de6 Mon Sep 17 00:00:00 2001 From: Dillon Laird Date: Wed, 24 Apr 2024 20:54:39 -0700 Subject: [PATCH] updated docs --- vision_agent/agent/vision_agent.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/vision_agent/agent/vision_agent.py b/vision_agent/agent/vision_agent.py index 414f36d6..62dae768 100644 --- a/vision_agent/agent/vision_agent.py +++ b/vision_agent/agent/vision_agent.py @@ -473,9 +473,14 @@ def __call__( """Invoke the vision agent. Parameters: - input: a prompt that describe the task or a conversation in the format of + chat: A conversation in the format of [{"role": "user", "content": "describe your task here..."}]. - image: the input image referenced in the prompt parameter. + image: The input image referenced in the chat parameter. + reference_data: A dictionary containing the reference image, mask or bounding + box in the format of: + {"image": "image.jpg", "mask": "mask.jpg", "bbox": [0.1, 0.2, 0.1, 0.2]} + where the bounding box coordinates are normalized. + visualize_output: Whether to visualize the output. Returns: The result of the vision agent in text. @@ -515,12 +520,14 @@ def chat_with_workflow( """Chat with the vision agent and return the final answer and all tool results. Parameters: - chat: a conversation in the format of + chat: A conversation in the format of [{"role": "user", "content": "describe your task here..."}]. - image: the input image referenced in the chat parameter. - reference_data: a dictionary containing the reference image and mask. in the - format of {"image": "image.jpg", "mask": "mask.jpg} - visualize_output: whether to visualize the output. + image: The input image referenced in the chat parameter. + reference_data: A dictionary containing the reference image, mask or bounding + box in the format of: + {"image": "image.jpg", "mask": "mask.jpg", "bbox": [0.1, 0.2, 0.1, 0.2]} + where the bounding box coordinates are normalized. + visualize_output: Whether to visualize the output. Returns: A tuple where the first item is the final answer and the second item is a