Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Bugs #265

Merged
merged 37 commits into from
Oct 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
ba4fe87
separated out planner, renamed chat methods
dillonalaird Sep 28, 2024
fe64f02
fixed circular imports
dillonalaird Sep 30, 2024
841945b
added type for plan context
dillonalaird Sep 30, 2024
43b2a2c
add planner as separate call to vision agent
dillonalaird Oct 2, 2024
b0b56a6
export plan context
dillonalaird Oct 2, 2024
e0075c7
fixed circular imports
dillonalaird Oct 2, 2024
4786133
fixed wrong key
dillonalaird Oct 2, 2024
69691ae
better json parsing
dillonalaird Oct 2, 2024
14332f6
more test cases for json parsing
dillonalaird Oct 2, 2024
87ac467
have planner visualize results
dillonalaird Oct 2, 2024
b2cd1e5
add more guard rails to remove double chat
dillonalaird Oct 2, 2024
f317fc3
revert changes with planning step for now
dillonalaird Oct 3, 2024
6805bfa
revert to original prompts
dillonalaird Oct 4, 2024
0f6b1bb
fix type issue
dillonalaird Oct 4, 2024
cf20778
fix format issue
dillonalaird Oct 4, 2024
e7a9c5b
skip examples for flake8
dillonalaird Oct 10, 2024
8448255
fix names and readme
dillonalaird Oct 10, 2024
dedae07
fixed type error
dillonalaird Oct 10, 2024
969c420
fix countgd integ test
dillonalaird Oct 10, 2024
ad6edf1
synced code with new code interpreter arg
dillonalaird Oct 11, 2024
ba63a60
separated out planner, renamed chat methods
dillonalaird Sep 28, 2024
7ec4838
add planner as separate call to vision agent
dillonalaird Oct 2, 2024
29d934e
revert changes with planning step for now
dillonalaird Oct 3, 2024
4d184bf
strip extra function calls from generated code
dillonalaird Oct 10, 2024
a740a28
fix code rewrite issue with ()
dillonalaird Oct 10, 2024
4e3cfaa
fix issue if plan format is incorrect
dillonalaird Oct 11, 2024
c454756
increase count threshold and size
dillonalaird Oct 11, 2024
265b227
switch to using tags to fix issue of mixing up code and tests
dillonalaird Oct 11, 2024
8a84f4d
skip tests for flake8
dillonalaird Oct 11, 2024
7b2e87f
fix type issues
dillonalaird Oct 11, 2024
0cf9b4d
Merge branch 'main' into fix-issues
dillonalaird Oct 11, 2024
bfd91c5
fix test case
dillonalaird Oct 11, 2024
1c55d90
remove extra planning import
dillonalaird Oct 11, 2024
eb7f7d2
fixed type issues
dillonalaird Oct 11, 2024
645ca02
fixed type issues
dillonalaird Oct 11, 2024
7f1aba9
fix test case
dillonalaird Oct 11, 2024
b09dd9b
fix format issue
dillonalaird Oct 11, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci_cd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ jobs:
- name: Linting
run: |
# stop the build if there are Python syntax errors or undefined names
poetry run flake8 . --exclude .venv,examples --count --show-source --statistics
poetry run flake8 . --exclude .venv,examples,tests --count --show-source --statistics
- name: Check Format
run: |
poetry run black --check --diff --color .
Expand Down
60 changes: 58 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ pytube = "15.0.0"
anthropic = "^0.31.0"
pydantic = "2.7.4"
av = "^11.0.0"
redbaron = "^0.9.2"

[tool.poetry.group.dev.dependencies]
autoflake = "1.*"
Expand Down
4 changes: 2 additions & 2 deletions tests/integ/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
blip_image_caption,
clip,
closest_mask_distance,
countgd_counting,
countgd_example_based_counting,
depth_anything_v2,
detr_segmentation,
dpt_hybrid_midas,
Expand All @@ -32,8 +34,6 @@
template_match,
vit_image_classification,
vit_nsfw_classification,
countgd_counting,
countgd_example_based_counting,
)

FINE_TUNE_ID = "65ebba4a-88b7-419f-9046-0750e30250da"
Expand Down
35 changes: 35 additions & 0 deletions tests/unit/test_meta_tools.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from vision_agent.tools.meta_tools import (
Artifacts,
check_and_load_image,
use_extra_vision_agent_args,
use_object_detection_fine_tuning,
)

Expand Down Expand Up @@ -71,3 +72,37 @@ def test_use_object_detection_fine_tuning_twice():
assert 'owl_v2_image("two", image2, "456")' in output
assert 'florence2_sam2_image("three", image3, "456")' in output
assert artifacts["code"] == expected_code2


def test_use_object_detection_fine_tuning_real_case():
artifacts = Artifacts("test")
code = "florence2_phrase_grounding('(strange arg)', image1)"
expected_code = 'florence2_phrase_grounding("(strange arg)", image1, "123")'
artifacts["code"] = code
output = use_object_detection_fine_tuning(artifacts, "code", "123")
assert 'florence2_phrase_grounding("(strange arg)", image1, "123")' in output
assert artifacts["code"] == expected_code


def test_use_extra_vision_agent_args_real_case():
code = "generate_vision_code(artifacts, 'code.py', 'write code', ['/home/user/n0xn5X6_IMG_2861%20(1).mov'])"
expected_code = "generate_vision_code(artifacts, 'code.py', 'write code', ['/home/user/n0xn5X6_IMG_2861%20(1).mov'], test_multi_plan=True)"
out_code = use_extra_vision_agent_args(code)
assert out_code == expected_code

code = "edit_vision_code(artifacts, 'code.py', ['write code 1', 'write code 2'], ['/home/user/n0xn5X6_IMG_2861%20(1).mov'])"
expected_code = "edit_vision_code(artifacts, 'code.py', ['write code 1', 'write code 2'], ['/home/user/n0xn5X6_IMG_2861%20(1).mov'], test_multi_plan=True)"
out_code = use_extra_vision_agent_args(code)
assert out_code == expected_code


def test_use_extra_vision_args_with_custom_tools():
code = "generate_vision_code(artifacts, 'code.py', 'write code', ['/home/user/n0xn5X6_IMG_2861%20(1).mov'])"
expected_code = "generate_vision_code(artifacts, 'code.py', 'write code', ['/home/user/n0xn5X6_IMG_2861%20(1).mov'], test_multi_plan=True, custom_tool_names=['tool1', 'tool2'])"
out_code = use_extra_vision_agent_args(code, custom_tool_names=["tool1", "tool2"])
assert out_code == expected_code

code = "edit_vision_code(artifacts, 'code.py', 'write code', ['/home/user/n0xn5X6_IMG_2861%20(1).mov'])"
expected_code = "edit_vision_code(artifacts, 'code.py', 'write code', ['/home/user/n0xn5X6_IMG_2861%20(1).mov'], test_multi_plan=True, custom_tool_names=['tool1', 'tool2'])"
out_code = use_extra_vision_agent_args(code, custom_tool_names=["tool1", "tool2"])
assert out_code == expected_code
14 changes: 5 additions & 9 deletions tests/unit/test_va.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,27 +23,23 @@ def test_parse_execution_no_test_multi_plan_edit():
code = "<execute_python>edit_vision_code(artifacts, 'code.py', ['Generate code'], ['image.png'])</execute_python>"
assert (
parse_execution(code, False)
== "edit_vision_code(artifacts, 'code.py', ['Generate code'], ['image.png'])"
== "edit_vision_code(artifacts, 'code.py', ['Generate code'], ['image.png'], test_multi_plan=False)"
)


def test_parse_execution_custom_tool_names_generate():
code = "<execute_python>generate_vision_code(artifacts, 'code.py', 'Generate code', ['image.png'])</execute_python>"
assert (
parse_execution(
code, test_multi_plan=False, customed_tool_names=["owl_v2_image"]
)
parse_execution(code, test_multi_plan=False, custom_tool_names=["owl_v2_image"])
== "generate_vision_code(artifacts, 'code.py', 'Generate code', ['image.png'], test_multi_plan=False, custom_tool_names=['owl_v2_image'])"
)


def test_prase_execution_custom_tool_names_edit():
def test_parse_execution_custom_tool_names_edit():
code = "<execute_python>edit_vision_code(artifacts, 'code.py', ['Generate code'], ['image.png'])</execute_python>"
assert (
parse_execution(
code, test_multi_plan=False, customed_tool_names=["owl_v2_image"]
)
== "edit_vision_code(artifacts, 'code.py', ['Generate code'], ['image.png'], custom_tool_names=['owl_v2_image'])"
parse_execution(code, test_multi_plan=False, custom_tool_names=["owl_v2_image"])
== "edit_vision_code(artifacts, 'code.py', ['Generate code'], ['image.png'], test_multi_plan=False, custom_tool_names=['owl_v2_image'])"
)


Expand Down
143 changes: 143 additions & 0 deletions tests/unit/test_vac.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
from vision_agent.agent.vision_agent_coder import strip_function_calls


def test_strip_non_function_real_case():
code = """import os
import numpy as np
from vision_agent.tools import *
from typing import *
from pillow_heif import register_heif_opener
register_heif_opener()
import vision_agent as va
from vision_agent.tools import register_tool


from vision_agent.tools import load_image, owl_v2_image, overlay_bounding_boxes, save_image, save_json

def check_helmets(image_path):
# Load the image
image = load_image(image_path)

# Detect people and helmets
detections = owl_v2_image("person, helmet", image, box_threshold=0.2)

# Separate people and helmets
people = [d for d in detections if d['label'] == 'person']
helmets = [d for d in detections if d['label'] == 'helmet']

people_with_helmets = 0
people_without_helmets = 0

height, width = image.shape[:2]

for person in people:
person_x = (person['bbox'][0] + person['bbox'][2]) / 2
person_y = person['bbox'][1] # Top of the bounding box

helmet_found = False
for helmet in helmets:
helmet_x = (helmet['bbox'][0] + helmet['bbox'][2]) / 2
helmet_y = (helmet['bbox'][1] + helmet['bbox'][3]) / 2

# Check if the helmet is within 20 pixels of the person's head
if (abs((helmet_x - person_x) * width) < 20 and
-5 < ((helmet_y - person_y) * height) < 20):
helmet_found = True
break

if helmet_found:
people_with_helmets += 1
person['label'] = 'person with helmet'
else:
people_without_helmets += 1
person['label'] = 'person without helmet'

# Create the count dictionary
count_dict = {
"people_with_helmets": people_with_helmets,
"people_without_helmets": people_without_helmets
}

# Visualize the results
visualized_image = overlay_bounding_boxes(image, detections)

# Save the visualized image
save_image(visualized_image, "/home/user/visualized_result.png")

# Save the count dictionary as JSON
save_json(count_dict, "/home/user/helmet_counts.json")

return count_dict

# The function can be called with the image path
result = check_helmets("/home/user/edQPXGK_workers.png")"""
expected_code = """import os
import numpy as np
from vision_agent.tools import *
from typing import *
from pillow_heif import register_heif_opener
register_heif_opener()
import vision_agent as va
from vision_agent.tools import register_tool


from vision_agent.tools import load_image, owl_v2_image, overlay_bounding_boxes, save_image, save_json

def check_helmets(image_path):
# Load the image
image = load_image(image_path)

# Detect people and helmets
detections = owl_v2_image("person, helmet", image, box_threshold=0.2)

# Separate people and helmets
people = [d for d in detections if d['label'] == 'person']
helmets = [d for d in detections if d['label'] == 'helmet']

people_with_helmets = 0
people_without_helmets = 0

height, width = image.shape[:2]

for person in people:
person_x = (person['bbox'][0] + person['bbox'][2]) / 2
person_y = person['bbox'][1] # Top of the bounding box

helmet_found = False
for helmet in helmets:
helmet_x = (helmet['bbox'][0] + helmet['bbox'][2]) / 2
helmet_y = (helmet['bbox'][1] + helmet['bbox'][3]) / 2

# Check if the helmet is within 20 pixels of the person's head
if (abs((helmet_x - person_x) * width) < 20 and
-5 < ((helmet_y - person_y) * height) < 20):
helmet_found = True
break

if helmet_found:
people_with_helmets += 1
person['label'] = 'person with helmet'
else:
people_without_helmets += 1
person['label'] = 'person without helmet'

# Create the count dictionary
count_dict = {
"people_with_helmets": people_with_helmets,
"people_without_helmets": people_without_helmets
}

# Visualize the results
visualized_image = overlay_bounding_boxes(image, detections)

# Save the visualized image
save_image(visualized_image, "/home/user/visualized_result.png")

# Save the count dictionary as JSON
save_json(count_dict, "/home/user/helmet_counts.json")

return count_dict

# The function can be called with the image path"""
code_out = strip_function_calls(code, exclusions=["register_heif_opener"])
assert code_out == expected_code
22 changes: 22 additions & 0 deletions vision_agent/agent/agent_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
logging.basicConfig(stream=sys.stdout)
_LOGGER = logging.getLogger(__name__)
_CONSOLE = Console()
_MAX_TABULATE_COL_WIDTH = 80


def _extract_sub_json(json_str: str) -> Optional[Dict[str, Any]]:
Expand Down Expand Up @@ -91,6 +92,27 @@ def extract_code(code: str) -> str:
return code


def extract_tag(
content: str,
tag: str,
) -> Optional[str]:
inner_content = None
remaning = content
all_inner_content = []

while f"<{tag}>" in remaning:
inner_content_i = remaning[remaning.find(f"<{tag}>") + len(f"<{tag}>") :]
if f"</{tag}>" not in inner_content_i:
break
inner_content_i = inner_content_i[: inner_content_i.find(f"</{tag}>")]
remaning = remaning[remaning.find(f"</{tag}>") + len(f"</{tag}>") :]
all_inner_content.append(inner_content_i)

if len(all_inner_content) > 0:
inner_content = "\n".join(all_inner_content)
return inner_content


def remove_installs_from_code(code: str) -> str:
pattern = r"\n!pip install.*?(\n|\Z)\n"
code = re.sub(pattern, "", code, flags=re.DOTALL)
Expand Down
Loading
Loading