From 7f193b66d0aad8bcbedbcbd0cb856fe570483570 Mon Sep 17 00:00:00 2001 From: Dillon Laird Date: Wed, 5 Jun 2024 22:09:18 -0700 Subject: [PATCH] Minor fixes (#119) * minor fixes * fix rgb issue * fix name typo * add rgb prompt --- vision_agent/agent/vision_agent_prompts.py | 2 +- vision_agent/tools/tools.py | 15 +++++---------- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/vision_agent/agent/vision_agent_prompts.py b/vision_agent/agent/vision_agent_prompts.py index d2c6fe4b..f3847b75 100644 --- a/vision_agent/agent/vision_agent_prompts.py +++ b/vision_agent/agent/vision_agent_prompts.py @@ -71,7 +71,7 @@ 1. **Understand and Clarify**: Make sure you understand the task. 2. **Algorithm/Method Selection**: Decide on the most efficient way. 3. **Pseudocode Creation**: Write down the steps you will follow in pseudocode. -4. **Code Generation**: Translate your pseudocode into executable Python code. Ensure you use correct arguments, remember coordinates are always returned normalized from `vision_agent.tools`. +4. **Code Generation**: Translate your pseudocode into executable Python code. Ensure you use correct arguments, remember coordinates are always returned normalized from `vision_agent.tools`. All images from `vision_agent.tools` are in RGB format, red is (255, 0, 0) and blue is (0, 0, 255). 5. **Logging**: Log the output of the custom functions that were provided to you from `from vision_agent.tools import *`. Use a debug flag in the function parameters to toggle logging on and off. """ diff --git a/vision_agent/tools/tools.py b/vision_agent/tools/tools.py index 040a48a4..01aae061 100644 --- a/vision_agent/tools/tools.py +++ b/vision_agent/tools/tools.py @@ -179,7 +179,7 @@ def extract_frames( ) -> List[Tuple[np.ndarray, float]]: """'extract_frames' extracts frames from a video, returns a list of tuples (frame, timestamp), where timestamp is the relative time in seconds where the frame was - captured. The frame is a local image file path. + captured. The frame is a numpy array. Parameters: video_uri (Union[str, Path]): The path to the video file. @@ -530,27 +530,22 @@ def load_image(image_path: str) -> np.ndarray: return np.array(image) -def save_image(image: np.ndarray) -> str: - """'save_image' is a utility function that saves an image as a temporary file. +def save_image(image: np.ndarray, file_path: str) -> None: + """'save_image' is a utility function that saves an image to a file path. Parameters: image (np.ndarray): The image to save. - - Returns: - str: The path to the saved image. + file_path (str): The path to save the image file. Example ------- >>> save_image(image) - "/tmp/tmpabc123.png" """ from IPython.display import display pil_image = Image.fromarray(image.astype(np.uint8)) display(pil_image) - with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f: - pil_image.save(f, "PNG") - return f.name + pil_image.save(file_path) def save_video(