Skip to content

Commit

Permalink
Fix Bugs (#265)
Browse files Browse the repository at this point in the history
* separated out planner, renamed chat methods

* fixed circular imports

* added type for plan context

* add planner as separate call to vision agent

* export plan context

* fixed circular imports

* fixed wrong key

* better json parsing

* more test cases for json parsing

* have planner visualize results

* add more guard rails to remove double chat

* revert changes with planning step for now

* revert to original prompts

* fix type issue

* fix format issue

* skip examples for flake8

* fix names and readme

* fixed type error

* fix countgd integ test

* synced code with new code interpreter arg

* separated out planner, renamed chat methods

* add planner as separate call to vision agent

* revert changes with planning step for now

* strip extra function calls from generated code

* fix code rewrite issue with ()

* fix issue if plan format is incorrect

* increase count threshold and size

* switch to using tags to fix issue of mixing up code and tests

* skip tests for flake8

* fix type issues

* fix test case

* remove extra planning import

* fixed type issues

* fixed type issues

* fix test case

* fix format issue
  • Loading branch information
dillonalaird authored Oct 11, 2024
1 parent 5775fdd commit d9445e3
Show file tree
Hide file tree
Showing 14 changed files with 402 additions and 98 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci_cd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ jobs:
- name: Linting
run: |
# stop the build if there are Python syntax errors or undefined names
poetry run flake8 . --exclude .venv,examples --count --show-source --statistics
poetry run flake8 . --exclude .venv,examples,tests --count --show-source --statistics
- name: Check Format
run: |
poetry run black --check --diff --color .
Expand Down
60 changes: 58 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ pytube = "15.0.0"
anthropic = "^0.31.0"
pydantic = "2.7.4"
av = "^11.0.0"
redbaron = "^0.9.2"

[tool.poetry.group.dev.dependencies]
autoflake = "1.*"
Expand Down
4 changes: 2 additions & 2 deletions tests/integ/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
blip_image_caption,
clip,
closest_mask_distance,
countgd_counting,
countgd_example_based_counting,
depth_anything_v2,
detr_segmentation,
dpt_hybrid_midas,
Expand All @@ -32,8 +34,6 @@
template_match,
vit_image_classification,
vit_nsfw_classification,
countgd_counting,
countgd_example_based_counting,
)

FINE_TUNE_ID = "65ebba4a-88b7-419f-9046-0750e30250da"
Expand Down
35 changes: 35 additions & 0 deletions tests/unit/test_meta_tools.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from vision_agent.tools.meta_tools import (
Artifacts,
check_and_load_image,
use_extra_vision_agent_args,
use_object_detection_fine_tuning,
)

Expand Down Expand Up @@ -71,3 +72,37 @@ def test_use_object_detection_fine_tuning_twice():
assert 'owl_v2_image("two", image2, "456")' in output
assert 'florence2_sam2_image("three", image3, "456")' in output
assert artifacts["code"] == expected_code2


def test_use_object_detection_fine_tuning_real_case():
artifacts = Artifacts("test")
code = "florence2_phrase_grounding('(strange arg)', image1)"
expected_code = 'florence2_phrase_grounding("(strange arg)", image1, "123")'
artifacts["code"] = code
output = use_object_detection_fine_tuning(artifacts, "code", "123")
assert 'florence2_phrase_grounding("(strange arg)", image1, "123")' in output
assert artifacts["code"] == expected_code


def test_use_extra_vision_agent_args_real_case():
code = "generate_vision_code(artifacts, 'code.py', 'write code', ['/home/user/n0xn5X6_IMG_2861%20(1).mov'])"
expected_code = "generate_vision_code(artifacts, 'code.py', 'write code', ['/home/user/n0xn5X6_IMG_2861%20(1).mov'], test_multi_plan=True)"
out_code = use_extra_vision_agent_args(code)
assert out_code == expected_code

code = "edit_vision_code(artifacts, 'code.py', ['write code 1', 'write code 2'], ['/home/user/n0xn5X6_IMG_2861%20(1).mov'])"
expected_code = "edit_vision_code(artifacts, 'code.py', ['write code 1', 'write code 2'], ['/home/user/n0xn5X6_IMG_2861%20(1).mov'], test_multi_plan=True)"
out_code = use_extra_vision_agent_args(code)
assert out_code == expected_code


def test_use_extra_vision_args_with_custom_tools():
code = "generate_vision_code(artifacts, 'code.py', 'write code', ['/home/user/n0xn5X6_IMG_2861%20(1).mov'])"
expected_code = "generate_vision_code(artifacts, 'code.py', 'write code', ['/home/user/n0xn5X6_IMG_2861%20(1).mov'], test_multi_plan=True, custom_tool_names=['tool1', 'tool2'])"
out_code = use_extra_vision_agent_args(code, custom_tool_names=["tool1", "tool2"])
assert out_code == expected_code

code = "edit_vision_code(artifacts, 'code.py', 'write code', ['/home/user/n0xn5X6_IMG_2861%20(1).mov'])"
expected_code = "edit_vision_code(artifacts, 'code.py', 'write code', ['/home/user/n0xn5X6_IMG_2861%20(1).mov'], test_multi_plan=True, custom_tool_names=['tool1', 'tool2'])"
out_code = use_extra_vision_agent_args(code, custom_tool_names=["tool1", "tool2"])
assert out_code == expected_code
14 changes: 5 additions & 9 deletions tests/unit/test_va.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,27 +23,23 @@ def test_parse_execution_no_test_multi_plan_edit():
code = "<execute_python>edit_vision_code(artifacts, 'code.py', ['Generate code'], ['image.png'])</execute_python>"
assert (
parse_execution(code, False)
== "edit_vision_code(artifacts, 'code.py', ['Generate code'], ['image.png'])"
== "edit_vision_code(artifacts, 'code.py', ['Generate code'], ['image.png'], test_multi_plan=False)"
)


def test_parse_execution_custom_tool_names_generate():
code = "<execute_python>generate_vision_code(artifacts, 'code.py', 'Generate code', ['image.png'])</execute_python>"
assert (
parse_execution(
code, test_multi_plan=False, customed_tool_names=["owl_v2_image"]
)
parse_execution(code, test_multi_plan=False, custom_tool_names=["owl_v2_image"])
== "generate_vision_code(artifacts, 'code.py', 'Generate code', ['image.png'], test_multi_plan=False, custom_tool_names=['owl_v2_image'])"
)


def test_prase_execution_custom_tool_names_edit():
def test_parse_execution_custom_tool_names_edit():
code = "<execute_python>edit_vision_code(artifacts, 'code.py', ['Generate code'], ['image.png'])</execute_python>"
assert (
parse_execution(
code, test_multi_plan=False, customed_tool_names=["owl_v2_image"]
)
== "edit_vision_code(artifacts, 'code.py', ['Generate code'], ['image.png'], custom_tool_names=['owl_v2_image'])"
parse_execution(code, test_multi_plan=False, custom_tool_names=["owl_v2_image"])
== "edit_vision_code(artifacts, 'code.py', ['Generate code'], ['image.png'], test_multi_plan=False, custom_tool_names=['owl_v2_image'])"
)


Expand Down
143 changes: 143 additions & 0 deletions tests/unit/test_vac.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
from vision_agent.agent.vision_agent_coder import strip_function_calls


def test_strip_non_function_real_case():
code = """import os
import numpy as np
from vision_agent.tools import *
from typing import *
from pillow_heif import register_heif_opener
register_heif_opener()
import vision_agent as va
from vision_agent.tools import register_tool
from vision_agent.tools import load_image, owl_v2_image, overlay_bounding_boxes, save_image, save_json
def check_helmets(image_path):
# Load the image
image = load_image(image_path)
# Detect people and helmets
detections = owl_v2_image("person, helmet", image, box_threshold=0.2)
# Separate people and helmets
people = [d for d in detections if d['label'] == 'person']
helmets = [d for d in detections if d['label'] == 'helmet']
people_with_helmets = 0
people_without_helmets = 0
height, width = image.shape[:2]
for person in people:
person_x = (person['bbox'][0] + person['bbox'][2]) / 2
person_y = person['bbox'][1] # Top of the bounding box
helmet_found = False
for helmet in helmets:
helmet_x = (helmet['bbox'][0] + helmet['bbox'][2]) / 2
helmet_y = (helmet['bbox'][1] + helmet['bbox'][3]) / 2
# Check if the helmet is within 20 pixels of the person's head
if (abs((helmet_x - person_x) * width) < 20 and
-5 < ((helmet_y - person_y) * height) < 20):
helmet_found = True
break
if helmet_found:
people_with_helmets += 1
person['label'] = 'person with helmet'
else:
people_without_helmets += 1
person['label'] = 'person without helmet'
# Create the count dictionary
count_dict = {
"people_with_helmets": people_with_helmets,
"people_without_helmets": people_without_helmets
}
# Visualize the results
visualized_image = overlay_bounding_boxes(image, detections)
# Save the visualized image
save_image(visualized_image, "/home/user/visualized_result.png")
# Save the count dictionary as JSON
save_json(count_dict, "/home/user/helmet_counts.json")
return count_dict
# The function can be called with the image path
result = check_helmets("/home/user/edQPXGK_workers.png")"""
expected_code = """import os
import numpy as np
from vision_agent.tools import *
from typing import *
from pillow_heif import register_heif_opener
register_heif_opener()
import vision_agent as va
from vision_agent.tools import register_tool
from vision_agent.tools import load_image, owl_v2_image, overlay_bounding_boxes, save_image, save_json
def check_helmets(image_path):
# Load the image
image = load_image(image_path)
# Detect people and helmets
detections = owl_v2_image("person, helmet", image, box_threshold=0.2)
# Separate people and helmets
people = [d for d in detections if d['label'] == 'person']
helmets = [d for d in detections if d['label'] == 'helmet']
people_with_helmets = 0
people_without_helmets = 0
height, width = image.shape[:2]
for person in people:
person_x = (person['bbox'][0] + person['bbox'][2]) / 2
person_y = person['bbox'][1] # Top of the bounding box
helmet_found = False
for helmet in helmets:
helmet_x = (helmet['bbox'][0] + helmet['bbox'][2]) / 2
helmet_y = (helmet['bbox'][1] + helmet['bbox'][3]) / 2
# Check if the helmet is within 20 pixels of the person's head
if (abs((helmet_x - person_x) * width) < 20 and
-5 < ((helmet_y - person_y) * height) < 20):
helmet_found = True
break
if helmet_found:
people_with_helmets += 1
person['label'] = 'person with helmet'
else:
people_without_helmets += 1
person['label'] = 'person without helmet'
# Create the count dictionary
count_dict = {
"people_with_helmets": people_with_helmets,
"people_without_helmets": people_without_helmets
}
# Visualize the results
visualized_image = overlay_bounding_boxes(image, detections)
# Save the visualized image
save_image(visualized_image, "/home/user/visualized_result.png")
# Save the count dictionary as JSON
save_json(count_dict, "/home/user/helmet_counts.json")
return count_dict
# The function can be called with the image path"""
code_out = strip_function_calls(code, exclusions=["register_heif_opener"])
assert code_out == expected_code
22 changes: 22 additions & 0 deletions vision_agent/agent/agent_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
logging.basicConfig(stream=sys.stdout)
_LOGGER = logging.getLogger(__name__)
_CONSOLE = Console()
_MAX_TABULATE_COL_WIDTH = 80


def _extract_sub_json(json_str: str) -> Optional[Dict[str, Any]]:
Expand Down Expand Up @@ -91,6 +92,27 @@ def extract_code(code: str) -> str:
return code


def extract_tag(
content: str,
tag: str,
) -> Optional[str]:
inner_content = None
remaning = content
all_inner_content = []

while f"<{tag}>" in remaning:
inner_content_i = remaning[remaning.find(f"<{tag}>") + len(f"<{tag}>") :]
if f"</{tag}>" not in inner_content_i:
break
inner_content_i = inner_content_i[: inner_content_i.find(f"</{tag}>")]
remaning = remaning[remaning.find(f"</{tag}>") + len(f"</{tag}>") :]
all_inner_content.append(inner_content_i)

if len(all_inner_content) > 0:
inner_content = "\n".join(all_inner_content)
return inner_content


def remove_installs_from_code(code: str) -> str:
pattern = r"\n!pip install.*?(\n|\Z)\n"
code = re.sub(pattern, "", code, flags=re.DOTALL)
Expand Down
Loading

0 comments on commit d9445e3

Please sign in to comment.