Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix product issues #257

Merged
merged 13 commits into from
Oct 4, 2024
22 changes: 21 additions & 1 deletion tests/unit/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
from vision_agent.agent.agent_utils import extract_code, extract_json
from vision_agent.agent.agent_utils import (
extract_code,
extract_json,
remove_installs_from_code,
)


def test_basic_json_extract():
Expand Down Expand Up @@ -43,3 +47,19 @@ def test_basic_json_extract():
a_code = extract_code(a)
assert "def test_basic_json_extract():" in a_code
assert "assert extract_json(a) == {" in a_code


def test_remove_installs_from_code():
a = """import os
imoprt sys

!pip install pandas


def test():
print("!pip install dummy")
"""
out = remove_installs_from_code(a)
assert "import os" in out
assert "!pip install pandas" not in out
assert "!pip install dummy" in out
76 changes: 60 additions & 16 deletions tests/unit/tools/test_tools.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,69 @@
# Generated by CodiumAI
import os
import tempfile
from pathlib import Path

import numpy as np

from vision_agent.tools.tools import save_video
from vision_agent.tools.tools import save_image, save_video


class TestSaveVideo:
def test_saves_frames_without_output_path(self):
frames = [
np.random.randint(0, 256, (480, 640, 3), dtype=np.uint8) for _ in range(10)
]
output_path = save_video(frames)
assert Path(output_path).exists()
def test_saves_frames_without_output_path():
frames = [
np.random.randint(0, 256, (480, 640, 3), dtype=np.uint8) for _ in range(10)
]
output_path = save_video(frames)
assert Path(output_path).exists()
os.remove(output_path)


def test_saves_frames_with_output_path(self, tmp_path):
frames = [
np.random.randint(0, 256, (480, 640, 3), dtype=np.uint8) for _ in range(10)
]
video_output_path = str(tmp_path / "output.mp4")
output_path = save_video(frames, video_output_path)
def test_saves_frames_with_output_path():
frames = [
np.random.randint(0, 256, (480, 640, 3), dtype=np.uint8) for _ in range(10)
]

assert output_path == video_output_path
with tempfile.TemporaryDirectory() as tmp_dir:
video_output_path = Path(tmp_dir) / "output.mp4"
output_path = save_video(frames, str(video_output_path))

assert output_path == str(video_output_path)
assert Path(output_path).exists()


def test_save_null_image():
image = None
try:
save_image(image, "tmp.jpg")
except ValueError as e:
assert str(e) == "The image is not a valid NumPy array with shape (H, W, C)"


def test_save_empty_image():
image = np.zeros((0, 0, 3), dtype=np.uint8)
try:
save_image(image, "tmp.jpg")
except ValueError as e:
assert str(e) == "The image is not a valid NumPy array with shape (H, W, C)"


def test_save_null_video():
frames = None
try:
save_video(frames, "tmp.mp4")
except ValueError as e:
assert str(e) == "Frames must be a list of NumPy arrays"


def test_save_empty_list():
frames = []
try:
save_video(frames, "tmp.mp4")
except ValueError as e:
assert str(e) == "Frames must be a list of NumPy arrays"


def test_save_invalid_frame():
frames = [np.zeros((0, 0, 3), dtype=np.uint8)]
try:
save_video(frames, "tmp.mp4")
except ValueError as e:
assert str(e) == "A frame is not a valid NumPy array with shape (H, W, C)"
6 changes: 6 additions & 0 deletions vision_agent/agent/agent_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,9 @@ def extract_code(code: str) -> str:
if code.startswith("python\n"):
code = code[len("python\n") :]
return code


def remove_installs_from_code(code: str) -> str:
pattern = r"\n!pip install.*?(\n|\Z)\n"
code = re.sub(pattern, "", code, flags=re.DOTALL)
return code
2 changes: 0 additions & 2 deletions vision_agent/agent/vision_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,8 +407,6 @@ def chat_with_code(
code_interpreter.download_file(
str(remote_artifacts_path.name), str(self.local_artifacts_path)
)
artifacts.load(self.local_artifacts_path)
artifacts.save()
Comment on lines -410 to -411
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure if this is intended. but I'm not too familiar with the code.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was causing front end some issues. I did some local tests and I don't think removing it causes any issues

return orig_chat, artifacts

def streaming_message(self, message: Dict[str, Any]) -> None:
Expand Down
10 changes: 7 additions & 3 deletions vision_agent/agent/vision_agent_coder.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,11 @@

import vision_agent.tools as T
from vision_agent.agent import Agent
from vision_agent.agent.agent_utils import extract_code, extract_json
from vision_agent.agent.agent_utils import (
extract_code,
extract_json,
remove_installs_from_code,
)
from vision_agent.agent.vision_agent_coder_prompts import (
CODE,
FIX_BUG,
Expand Down Expand Up @@ -836,8 +840,8 @@ def chat_with_workflow(
media=media_list,
)
success = cast(bool, results["success"])
code = cast(str, results["code"])
test = cast(str, results["test"])
code = remove_installs_from_code(cast(str, results["code"]))
test = remove_installs_from_code(cast(str, results["test"]))
working_memory.extend(results["working_memory"]) # type: ignore
plan.append({"code": code, "test": test, "plan": plan_i})

Expand Down
13 changes: 7 additions & 6 deletions vision_agent/agent/vision_agent_prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@
1. **Understand and Clarify**: Make sure you understand the task, ask clarifying questions if the task is not clear.
2. **Code Generation**: Only use code provided in the Documentation in your <execute_python> tags. Only use `edit_vision_code` to modify code written by `generate_vision_code`.
3. **Execute**: Do only what the user asked you to do and no more. If you need to ask the user a question, set `let_user_respond` to `true`.
4. **Output in JSON**: Respond in the following format in JSON:
4. **Response**: Keep your responses short and concise. Provide the user only with the information they need to continue the conversation.
5. **Output in JSON**: Respond in the following format in JSON:

```json
{{"thoughts": <your thoughts>, "response": <your response to the user>, "let_user_respond": <a boolean whether or not to let the user respond>}}.
Expand Down Expand Up @@ -62,7 +63,7 @@
[{'score': 0.99, 'label': 'dog', 'box': [0.1, 0.2, 0.3, 0.4]}, {'score': 0.23, 'label': 'dog', 'box': [0.2, 0.3, 0.4, 0.5]}]


AGENT: {"thoughts": "Two dogs are detected, I will show this to the user and ask them if the result looks good.", "response": "I have written the code to detect dogs and shown the output, do the results look good to you?", "let_user_respond": true}
AGENT: {"thoughts": "Two dogs are detected, I will show this to the user and ask them if the result looks good.", "response": "The code detectd two dogs, do the results look good to you?", "let_user_respond": true}
"""

EXAMPLES_CODE1_EXTRA = """
Expand Down Expand Up @@ -91,7 +92,7 @@
----- stdout -----
[{'score': 0.99, 'label': 'dog', 'box': [0.1, 0.2, 0.3, 0.4]}]

AGENT: {"thoughts": "One dog is detected, I will show this to the user and ask them if the result looks good.", "response": "I have written the code to detect one dog and shown you the output, do the results look good to you?", "let_user_respond": true}
AGENT: {"thoughts": "One dog is detected, I will show this to the user and ask them if the result looks good.", "response": "The code detected one dog, do these results look good to you?", "let_user_respond": true}
"""

EXAMPLES_CODE2 = """
Expand Down Expand Up @@ -157,16 +158,16 @@
----- stdout -----
2

AGENT: {"thoughts": "Two workers with helmets are detected, I will show this to the user and ask them if the result looks good.", "response": "I have written the code to count the workers wearing helmets in code.py and saved the visualization under 'workers_viz.png'.", "let_user_respond": true}
AGENT: {"thoughts": "Two workers with helmets are detected, I will show this to the user and ask them if the result looks good.", "response": "The code to detect workers with helmets is saved in code.py and the visualization under 'workers_viz.png'.", "let_user_respond": true}

USER: The detections are slightly off. Can you fine tune florence2 using these labels? "[{'image_path': 'image1.png': 'labels': ['worker', 'helmet'], 'bboxes': [[235, 118, 294, 241], [232, 118, 294, 128]]}, {'image_path': 'image2.png': 'labels': ['worker', 'helmet'], 'bboxes': [[235, 118, 294, 241], [232, 118, 294, 128]]}]"

AGENT: {"thoughts": "Because the user has supplied me with labels I can call object_detection_fine_tuning on their behalf to fine tune the model", "response": "I will fine tune florence2 with the labels you provided <execute_python>object_detection_fine_tuning([{'image_path': 'image1.png': 'labels': ['worker', 'helmet'], 'bboxes': [[235, 118, 294, 241], [232, 118, 294, 128]]}, {'image_path': 'image2.png': 'labels': ['worker', 'helmet'], 'bboxes': [[235, 118, 294, 241], [232, 118, 294, 128]]}])</execute_python>", "let_user_respond": false}
AGENT: {"thoughts": "Because the user has supplied me with labels I can call object_detection_fine_tuning on their behalf to fine tune the model", "response": "<execute_python>object_detection_fine_tuning([{'image_path': 'image1.png': 'labels': ['worker', 'helmet'], 'bboxes': [[235, 118, 294, 241], [232, 118, 294, 128]]}, {'image_path': 'image2.png': 'labels': ['worker', 'helmet'], 'bboxes': [[235, 118, 294, 241], [232, 118, 294, 128]]}])</execute_python>", "let_user_respond": false}

OBSERVATION:
[Fine tuning id: 23b3b022-5ebf-4798-9373-20ef36429abf]

AGENT: {"thoughts": "The model has finished fine tuning, I will now replace the original florence2_phrase_grounding call with the fine tuning id.", "response": "I will not update the code to use the fine tuned model. <execute_python>use_object_detection_fine_tuning(artifacts, "code.py", "23b3b022-5ebf-4798-9373-20ef36429abf")</execute_python>", "let_user_respond": false}
AGENT: {"thoughts": "The model has finished fine tuning, I will now replace the original florence2_phrase_grounding call with the fine tuning id.", "response": "<execute_python>use_object_detection_fine_tuning(artifacts, "code.py", "23b3b022-5ebf-4798-9373-20ef36429abf")</execute_python>", "let_user_respond": false}

OBSERVATION:
[Artifact code.py edits]
Expand Down
1 change: 0 additions & 1 deletion vision_agent/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@
loca_zero_shot_counting,
ocr,
overlay_bounding_boxes,
overlay_counting_results,
overlay_heat_map,
overlay_segmentation_masks,
owl_v2_image,
Expand Down
4 changes: 3 additions & 1 deletion vision_agent/tools/meta_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,9 @@ def show(self, uploaded_file_path: Optional[Union[str, Path]] = None) -> str:
)
output_str = "[Artifacts loaded]\n"
for k in self.artifacts.keys():
output_str += f"Artifact {k} loaded to {str(loaded_path / k)}\n"
output_str += (
f"Artifact name: {k}, loaded to path: {str(loaded_path / k)}\n"
)
output_str += "[End of artifacts]\n"
print(output_str)
return output_str
Expand Down
2 changes: 1 addition & 1 deletion vision_agent/tools/tool_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os
import inspect
import logging
import os
from base64 import b64encode
from typing import Any, Callable, Dict, List, MutableMapping, Optional, Tuple

Expand Down
Loading
Loading