Skip to content

Commit

Permalink
Fix product issues (#257)
Browse files Browse the repository at this point in the history
* strip installs from code

* make vision agent less verbose

* make artifact name more clear

* don't load artifacts locally

* added more error handling for saving files

* added prompt to keep code closer to user request

* revert back to old prompt

* revert back to old prompt

* formatting fix

* formatting fix

* fix test case

* fix format issue

* merge overlay count into overlay bbox
  • Loading branch information
dillonalaird authored Oct 4, 2024
1 parent c2e18ca commit 0eaf6ce
Show file tree
Hide file tree
Showing 11 changed files with 164 additions and 91 deletions.
22 changes: 21 additions & 1 deletion tests/unit/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
from vision_agent.agent.agent_utils import extract_code, extract_json
from vision_agent.agent.agent_utils import (
extract_code,
extract_json,
remove_installs_from_code,
)


def test_basic_json_extract():
Expand Down Expand Up @@ -43,3 +47,19 @@ def test_basic_json_extract():
a_code = extract_code(a)
assert "def test_basic_json_extract():" in a_code
assert "assert extract_json(a) == {" in a_code


def test_remove_installs_from_code():
a = """import os
imoprt sys
!pip install pandas
def test():
print("!pip install dummy")
"""
out = remove_installs_from_code(a)
assert "import os" in out
assert "!pip install pandas" not in out
assert "!pip install dummy" in out
76 changes: 60 additions & 16 deletions tests/unit/tools/test_tools.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,69 @@
# Generated by CodiumAI
import os
import tempfile
from pathlib import Path

import numpy as np

from vision_agent.tools.tools import save_video
from vision_agent.tools.tools import save_image, save_video


class TestSaveVideo:
def test_saves_frames_without_output_path(self):
frames = [
np.random.randint(0, 256, (480, 640, 3), dtype=np.uint8) for _ in range(10)
]
output_path = save_video(frames)
assert Path(output_path).exists()
def test_saves_frames_without_output_path():
frames = [
np.random.randint(0, 256, (480, 640, 3), dtype=np.uint8) for _ in range(10)
]
output_path = save_video(frames)
assert Path(output_path).exists()
os.remove(output_path)


def test_saves_frames_with_output_path(self, tmp_path):
frames = [
np.random.randint(0, 256, (480, 640, 3), dtype=np.uint8) for _ in range(10)
]
video_output_path = str(tmp_path / "output.mp4")
output_path = save_video(frames, video_output_path)
def test_saves_frames_with_output_path():
frames = [
np.random.randint(0, 256, (480, 640, 3), dtype=np.uint8) for _ in range(10)
]

assert output_path == video_output_path
with tempfile.TemporaryDirectory() as tmp_dir:
video_output_path = Path(tmp_dir) / "output.mp4"
output_path = save_video(frames, str(video_output_path))

assert output_path == str(video_output_path)
assert Path(output_path).exists()


def test_save_null_image():
image = None
try:
save_image(image, "tmp.jpg")
except ValueError as e:
assert str(e) == "The image is not a valid NumPy array with shape (H, W, C)"


def test_save_empty_image():
image = np.zeros((0, 0, 3), dtype=np.uint8)
try:
save_image(image, "tmp.jpg")
except ValueError as e:
assert str(e) == "The image is not a valid NumPy array with shape (H, W, C)"


def test_save_null_video():
frames = None
try:
save_video(frames, "tmp.mp4")
except ValueError as e:
assert str(e) == "Frames must be a list of NumPy arrays"


def test_save_empty_list():
frames = []
try:
save_video(frames, "tmp.mp4")
except ValueError as e:
assert str(e) == "Frames must be a list of NumPy arrays"


def test_save_invalid_frame():
frames = [np.zeros((0, 0, 3), dtype=np.uint8)]
try:
save_video(frames, "tmp.mp4")
except ValueError as e:
assert str(e) == "A frame is not a valid NumPy array with shape (H, W, C)"
6 changes: 6 additions & 0 deletions vision_agent/agent/agent_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,9 @@ def extract_code(code: str) -> str:
if code.startswith("python\n"):
code = code[len("python\n") :]
return code


def remove_installs_from_code(code: str) -> str:
pattern = r"\n!pip install.*?(\n|\Z)\n"
code = re.sub(pattern, "", code, flags=re.DOTALL)
return code
2 changes: 0 additions & 2 deletions vision_agent/agent/vision_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,8 +407,6 @@ def chat_with_code(
code_interpreter.download_file(
str(remote_artifacts_path.name), str(self.local_artifacts_path)
)
artifacts.load(self.local_artifacts_path)
artifacts.save()
return orig_chat, artifacts

def streaming_message(self, message: Dict[str, Any]) -> None:
Expand Down
10 changes: 7 additions & 3 deletions vision_agent/agent/vision_agent_coder.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,11 @@

import vision_agent.tools as T
from vision_agent.agent import Agent
from vision_agent.agent.agent_utils import extract_code, extract_json
from vision_agent.agent.agent_utils import (
extract_code,
extract_json,
remove_installs_from_code,
)
from vision_agent.agent.vision_agent_coder_prompts import (
CODE,
FIX_BUG,
Expand Down Expand Up @@ -836,8 +840,8 @@ def chat_with_workflow(
media=media_list,
)
success = cast(bool, results["success"])
code = cast(str, results["code"])
test = cast(str, results["test"])
code = remove_installs_from_code(cast(str, results["code"]))
test = remove_installs_from_code(cast(str, results["test"]))
working_memory.extend(results["working_memory"]) # type: ignore
plan.append({"code": code, "test": test, "plan": plan_i})

Expand Down
13 changes: 7 additions & 6 deletions vision_agent/agent/vision_agent_prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@
1. **Understand and Clarify**: Make sure you understand the task, ask clarifying questions if the task is not clear.
2. **Code Generation**: Only use code provided in the Documentation in your <execute_python> tags. Only use `edit_vision_code` to modify code written by `generate_vision_code`.
3. **Execute**: Do only what the user asked you to do and no more. If you need to ask the user a question, set `let_user_respond` to `true`.
4. **Output in JSON**: Respond in the following format in JSON:
4. **Response**: Keep your responses short and concise. Provide the user only with the information they need to continue the conversation.
5. **Output in JSON**: Respond in the following format in JSON:
```json
{{"thoughts": <your thoughts>, "response": <your response to the user>, "let_user_respond": <a boolean whether or not to let the user respond>}}.
Expand Down Expand Up @@ -62,7 +63,7 @@
[{'score': 0.99, 'label': 'dog', 'box': [0.1, 0.2, 0.3, 0.4]}, {'score': 0.23, 'label': 'dog', 'box': [0.2, 0.3, 0.4, 0.5]}]
AGENT: {"thoughts": "Two dogs are detected, I will show this to the user and ask them if the result looks good.", "response": "I have written the code to detect dogs and shown the output, do the results look good to you?", "let_user_respond": true}
AGENT: {"thoughts": "Two dogs are detected, I will show this to the user and ask them if the result looks good.", "response": "The code detectd two dogs, do the results look good to you?", "let_user_respond": true}
"""

EXAMPLES_CODE1_EXTRA = """
Expand Down Expand Up @@ -91,7 +92,7 @@
----- stdout -----
[{'score': 0.99, 'label': 'dog', 'box': [0.1, 0.2, 0.3, 0.4]}]
AGENT: {"thoughts": "One dog is detected, I will show this to the user and ask them if the result looks good.", "response": "I have written the code to detect one dog and shown you the output, do the results look good to you?", "let_user_respond": true}
AGENT: {"thoughts": "One dog is detected, I will show this to the user and ask them if the result looks good.", "response": "The code detected one dog, do these results look good to you?", "let_user_respond": true}
"""

EXAMPLES_CODE2 = """
Expand Down Expand Up @@ -157,16 +158,16 @@
----- stdout -----
2
AGENT: {"thoughts": "Two workers with helmets are detected, I will show this to the user and ask them if the result looks good.", "response": "I have written the code to count the workers wearing helmets in code.py and saved the visualization under 'workers_viz.png'.", "let_user_respond": true}
AGENT: {"thoughts": "Two workers with helmets are detected, I will show this to the user and ask them if the result looks good.", "response": "The code to detect workers with helmets is saved in code.py and the visualization under 'workers_viz.png'.", "let_user_respond": true}
USER: The detections are slightly off. Can you fine tune florence2 using these labels? "[{'image_path': 'image1.png': 'labels': ['worker', 'helmet'], 'bboxes': [[235, 118, 294, 241], [232, 118, 294, 128]]}, {'image_path': 'image2.png': 'labels': ['worker', 'helmet'], 'bboxes': [[235, 118, 294, 241], [232, 118, 294, 128]]}]"
AGENT: {"thoughts": "Because the user has supplied me with labels I can call object_detection_fine_tuning on their behalf to fine tune the model", "response": "I will fine tune florence2 with the labels you provided <execute_python>object_detection_fine_tuning([{'image_path': 'image1.png': 'labels': ['worker', 'helmet'], 'bboxes': [[235, 118, 294, 241], [232, 118, 294, 128]]}, {'image_path': 'image2.png': 'labels': ['worker', 'helmet'], 'bboxes': [[235, 118, 294, 241], [232, 118, 294, 128]]}])</execute_python>", "let_user_respond": false}
AGENT: {"thoughts": "Because the user has supplied me with labels I can call object_detection_fine_tuning on their behalf to fine tune the model", "response": "<execute_python>object_detection_fine_tuning([{'image_path': 'image1.png': 'labels': ['worker', 'helmet'], 'bboxes': [[235, 118, 294, 241], [232, 118, 294, 128]]}, {'image_path': 'image2.png': 'labels': ['worker', 'helmet'], 'bboxes': [[235, 118, 294, 241], [232, 118, 294, 128]]}])</execute_python>", "let_user_respond": false}
OBSERVATION:
[Fine tuning id: 23b3b022-5ebf-4798-9373-20ef36429abf]
AGENT: {"thoughts": "The model has finished fine tuning, I will now replace the original florence2_phrase_grounding call with the fine tuning id.", "response": "I will not update the code to use the fine tuned model. <execute_python>use_object_detection_fine_tuning(artifacts, "code.py", "23b3b022-5ebf-4798-9373-20ef36429abf")</execute_python>", "let_user_respond": false}
AGENT: {"thoughts": "The model has finished fine tuning, I will now replace the original florence2_phrase_grounding call with the fine tuning id.", "response": "<execute_python>use_object_detection_fine_tuning(artifacts, "code.py", "23b3b022-5ebf-4798-9373-20ef36429abf")</execute_python>", "let_user_respond": false}
OBSERVATION:
[Artifact code.py edits]
Expand Down
1 change: 0 additions & 1 deletion vision_agent/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@
loca_zero_shot_counting,
ocr,
overlay_bounding_boxes,
overlay_counting_results,
overlay_heat_map,
overlay_segmentation_masks,
owl_v2_image,
Expand Down
4 changes: 3 additions & 1 deletion vision_agent/tools/meta_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,9 @@ def show(self, uploaded_file_path: Optional[Union[str, Path]] = None) -> str:
)
output_str = "[Artifacts loaded]\n"
for k in self.artifacts.keys():
output_str += f"Artifact {k} loaded to {str(loaded_path / k)}\n"
output_str += (
f"Artifact name: {k}, loaded to path: {str(loaded_path / k)}\n"
)
output_str += "[End of artifacts]\n"
print(output_str)
return output_str
Expand Down
2 changes: 1 addition & 1 deletion vision_agent/tools/tool_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os
import inspect
import logging
import os
from base64 import b64encode
from typing import Any, Callable, Dict, List, MutableMapping, Optional, Tuple

Expand Down
Loading

0 comments on commit 0eaf6ce

Please sign in to comment.