Fix Bugs (#265)

* separated out planner, renamed chat methods * fixed circular imports * added type for plan context * add planner as separate call to vision agent * export plan context * fixed circular imports * fixed wrong key * better json parsing * more test cases for json parsing * have planner visualize results * add more guard rails to remove double chat * revert changes with planning step for now * revert to original prompts * fix type issue * fix format issue * skip examples for flake8 * fix names and readme * fixed type error * fix countgd integ test * synced code with new code interpreter arg * separated out planner, renamed chat methods * add planner as separate call to vision agent * revert changes with planning step for now * strip extra function calls from generated code * fix code rewrite issue with () * fix issue if plan format is incorrect * increase count threshold and size * switch to using tags to fix issue of mixing up code and tests * skip tests for flake8 * fix type issues * fix test case * remove extra planning import * fixed type issues * fixed type issues * fix test case * fix format issue
landing-ai · Oct 11, 2024 · d9445e3 · d9445e3
1 parent 5775fdd
commit d9445e3
Show file tree

Hide file tree

Showing 14 changed files with 402 additions and 98 deletions.
diff --git a/.github/workflows/ci_cd.yml b/.github/workflows/ci_cd.yml
@@ -43,7 +43,7 @@ jobs:
  - name: Linting
  run: |
  # stop the build if there are Python syntax errors or undefined names
- poetry run flake8 . --exclude .venv,examples --count --show-source --statistics
+ poetry run flake8 . --exclude .venv,examples,tests --count --show-source --statistics
  - name: Check Format
  run: |
  poetry run black --check --diff --color .

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -43,6 +43,7 @@ pytube = "15.0.0"
 anthropic = "^0.31.0"
 pydantic = "2.7.4"
 av = "^11.0.0"
+redbaron = "^0.9.2"
 
 [tool.poetry.group.dev.dependencies]
 autoflake = "1.*"

diff --git a/tests/integ/test_tools.py b/tests/integ/test_tools.py
@@ -6,6 +6,8 @@
  blip_image_caption,
  clip,
  closest_mask_distance,
+ countgd_counting,
+ countgd_example_based_counting,
  depth_anything_v2,
  detr_segmentation,
  dpt_hybrid_midas,
@@ -32,8 +34,6 @@
  template_match,
  vit_image_classification,
  vit_nsfw_classification,
- countgd_counting,
- countgd_example_based_counting,
 )
 
 FINE_TUNE_ID = "65ebba4a-88b7-419f-9046-0750e30250da"

diff --git a/tests/unit/test_meta_tools.py b/tests/unit/test_meta_tools.py
@@ -1,6 +1,7 @@
 from vision_agent.tools.meta_tools import (
  Artifacts,
  check_and_load_image,
+ use_extra_vision_agent_args,
  use_object_detection_fine_tuning,
 )
 
@@ -71,3 +72,37 @@ def test_use_object_detection_fine_tuning_twice():
  assert 'owl_v2_image("two", image2, "456")' in output
  assert 'florence2_sam2_image("three", image3, "456")' in output
  assert artifacts["code"] == expected_code2
+
+
+def test_use_object_detection_fine_tuning_real_case():
+ artifacts = Artifacts("test")
+ code = "florence2_phrase_grounding('(strange arg)', image1)"
+ expected_code = 'florence2_phrase_grounding("(strange arg)", image1, "123")'
+ artifacts["code"] = code
+ output = use_object_detection_fine_tuning(artifacts, "code", "123")
+ assert 'florence2_phrase_grounding("(strange arg)", image1, "123")' in output
+ assert artifacts["code"] == expected_code
+
+
+def test_use_extra_vision_agent_args_real_case():
+ code = "generate_vision_code(artifacts, 'code.py', 'write code', ['/home/user/n0xn5X6_IMG_2861%20(1).mov'])"
+ expected_code = "generate_vision_code(artifacts, 'code.py', 'write code', ['/home/user/n0xn5X6_IMG_2861%20(1).mov'], test_multi_plan=True)"
+ out_code = use_extra_vision_agent_args(code)
+ assert out_code == expected_code
+
+ code = "edit_vision_code(artifacts, 'code.py', ['write code 1', 'write code 2'], ['/home/user/n0xn5X6_IMG_2861%20(1).mov'])"
+ expected_code = "edit_vision_code(artifacts, 'code.py', ['write code 1', 'write code 2'], ['/home/user/n0xn5X6_IMG_2861%20(1).mov'], test_multi_plan=True)"
+ out_code = use_extra_vision_agent_args(code)
+ assert out_code == expected_code
+
+
+def test_use_extra_vision_args_with_custom_tools():
+ code = "generate_vision_code(artifacts, 'code.py', 'write code', ['/home/user/n0xn5X6_IMG_2861%20(1).mov'])"
+ expected_code = "generate_vision_code(artifacts, 'code.py', 'write code', ['/home/user/n0xn5X6_IMG_2861%20(1).mov'], test_multi_plan=True, custom_tool_names=['tool1', 'tool2'])"
+ out_code = use_extra_vision_agent_args(code, custom_tool_names=["tool1", "tool2"])
+ assert out_code == expected_code
+
+ code = "edit_vision_code(artifacts, 'code.py', 'write code', ['/home/user/n0xn5X6_IMG_2861%20(1).mov'])"
+ expected_code = "edit_vision_code(artifacts, 'code.py', 'write code', ['/home/user/n0xn5X6_IMG_2861%20(1).mov'], test_multi_plan=True, custom_tool_names=['tool1', 'tool2'])"
+ out_code = use_extra_vision_agent_args(code, custom_tool_names=["tool1", "tool2"])
+ assert out_code == expected_code
diff --git a/tests/unit/test_va.py b/tests/unit/test_va.py
@@ -23,27 +23,23 @@ def test_parse_execution_no_test_multi_plan_edit():
  code = "<execute_python>edit_vision_code(artifacts, 'code.py', ['Generate code'], ['image.png'])</execute_python>"
  assert (
  parse_execution(code, False)
- == "edit_vision_code(artifacts, 'code.py', ['Generate code'], ['image.png'])"
+ == "edit_vision_code(artifacts, 'code.py', ['Generate code'], ['image.png'], test_multi_plan=False)"
  )
 
 
 def test_parse_execution_custom_tool_names_generate():
  code = "<execute_python>generate_vision_code(artifacts, 'code.py', 'Generate code', ['image.png'])</execute_python>"
  assert (
- parse_execution(
- code, test_multi_plan=False, customed_tool_names=["owl_v2_image"]
- )
+ parse_execution(code, test_multi_plan=False, custom_tool_names=["owl_v2_image"])
  == "generate_vision_code(artifacts, 'code.py', 'Generate code', ['image.png'], test_multi_plan=False, custom_tool_names=['owl_v2_image'])"
  )
 
 
-def test_prase_execution_custom_tool_names_edit():
+def test_parse_execution_custom_tool_names_edit():
  code = "<execute_python>edit_vision_code(artifacts, 'code.py', ['Generate code'], ['image.png'])</execute_python>"
  assert (
- parse_execution(
- code, test_multi_plan=False, customed_tool_names=["owl_v2_image"]
- )
- == "edit_vision_code(artifacts, 'code.py', ['Generate code'], ['image.png'], custom_tool_names=['owl_v2_image'])"
+ parse_execution(code, test_multi_plan=False, custom_tool_names=["owl_v2_image"])
+ == "edit_vision_code(artifacts, 'code.py', ['Generate code'], ['image.png'], test_multi_plan=False, custom_tool_names=['owl_v2_image'])"
  )
 
 

diff --git a/tests/unit/test_vac.py b/tests/unit/test_vac.py
@@ -0,0 +1,143 @@
+from vision_agent.agent.vision_agent_coder import strip_function_calls
+
+
+def test_strip_non_function_real_case():
+ code = """import os
+import numpy as np
+from vision_agent.tools import *
+from typing import *
+from pillow_heif import register_heif_opener
+register_heif_opener()
+import vision_agent as va
+from vision_agent.tools import register_tool
+
+
+from vision_agent.tools import load_image, owl_v2_image, overlay_bounding_boxes, save_image, save_json
+
+def check_helmets(image_path):
+ # Load the image
+ image = load_image(image_path)
+ 
+ # Detect people and helmets
+ detections = owl_v2_image("person, helmet", image, box_threshold=0.2)
+ 
+ # Separate people and helmets
+ people = [d for d in detections if d['label'] == 'person']
+ helmets = [d for d in detections if d['label'] == 'helmet']
+ 
+ people_with_helmets = 0
+ people_without_helmets = 0
+ 
+ height, width = image.shape[:2]
+ 
+ for person in people:
+ person_x = (person['bbox'][0] + person['bbox'][2]) / 2
+ person_y = person['bbox'][1] # Top of the bounding box
+ 
+ helmet_found = False
+ for helmet in helmets:
+ helmet_x = (helmet['bbox'][0] + helmet['bbox'][2]) / 2
+ helmet_y = (helmet['bbox'][1] + helmet['bbox'][3]) / 2
+ 
+ # Check if the helmet is within 20 pixels of the person's head
+ if (abs((helmet_x - person_x) * width) < 20 and
+ -5 < ((helmet_y - person_y) * height) < 20):
+ helmet_found = True
+ break
+ 
+ if helmet_found:
+ people_with_helmets += 1
+ person['label'] = 'person with helmet'
+ else:
+ people_without_helmets += 1
+ person['label'] = 'person without helmet'
+ 
+ # Create the count dictionary
+ count_dict = {
+ "people_with_helmets": people_with_helmets,
+ "people_without_helmets": people_without_helmets
+ }
+ 
+ # Visualize the results
+ visualized_image = overlay_bounding_boxes(image, detections)
+ 
+ # Save the visualized image
+ save_image(visualized_image, "/home/user/visualized_result.png")
+ 
+ # Save the count dictionary as JSON
+ save_json(count_dict, "/home/user/helmet_counts.json")
+ 
+ return count_dict
+
+# The function can be called with the image path
+result = check_helmets("/home/user/edQPXGK_workers.png")"""
+ expected_code = """import os
+import numpy as np
+from vision_agent.tools import *
+from typing import *
+from pillow_heif import register_heif_opener
+register_heif_opener()
+import vision_agent as va
+from vision_agent.tools import register_tool
+
+
+from vision_agent.tools import load_image, owl_v2_image, overlay_bounding_boxes, save_image, save_json
+
+def check_helmets(image_path):
+ # Load the image
+ image = load_image(image_path)
+ 
+ # Detect people and helmets
+ detections = owl_v2_image("person, helmet", image, box_threshold=0.2)
+ 
+ # Separate people and helmets
+ people = [d for d in detections if d['label'] == 'person']
+ helmets = [d for d in detections if d['label'] == 'helmet']
+ 
+ people_with_helmets = 0
+ people_without_helmets = 0
+ 
+ height, width = image.shape[:2]
+ 
+ for person in people:
+ person_x = (person['bbox'][0] + person['bbox'][2]) / 2
+ person_y = person['bbox'][1] # Top of the bounding box
+ 
+ helmet_found = False
+ for helmet in helmets:
+ helmet_x = (helmet['bbox'][0] + helmet['bbox'][2]) / 2
+ helmet_y = (helmet['bbox'][1] + helmet['bbox'][3]) / 2
+ 
+ # Check if the helmet is within 20 pixels of the person's head
+ if (abs((helmet_x - person_x) * width) < 20 and
+ -5 < ((helmet_y - person_y) * height) < 20):
+ helmet_found = True
+ break
+ 
+ if helmet_found:
+ people_with_helmets += 1
+ person['label'] = 'person with helmet'
+ else:
+ people_without_helmets += 1
+ person['label'] = 'person without helmet'
+ 
+ # Create the count dictionary
+ count_dict = {
+ "people_with_helmets": people_with_helmets,
+ "people_without_helmets": people_without_helmets
+ }
+ 
+ # Visualize the results
+ visualized_image = overlay_bounding_boxes(image, detections)
+ 
+ # Save the visualized image
+ save_image(visualized_image, "/home/user/visualized_result.png")
+ 
+ # Save the count dictionary as JSON
+ save_json(count_dict, "/home/user/helmet_counts.json")
+ 
+ return count_dict
+
+# The function can be called with the image path"""
+ code_out = strip_function_calls(code, exclusions=["register_heif_opener"])
+ assert code_out == expected_code
diff --git a/vision_agent/agent/agent_utils.py b/vision_agent/agent/agent_utils.py
@@ -13,6 +13,7 @@
 logging.basicConfig(stream=sys.stdout)
 _LOGGER = logging.getLogger(__name__)
 _CONSOLE = Console()
+_MAX_TABULATE_COL_WIDTH = 80
 
 
 def _extract_sub_json(json_str: str) -> Optional[Dict[str, Any]]:
@@ -91,6 +92,27 @@ def extract_code(code: str) -> str:
  return code
 
 
+def extract_tag(
+ content: str,
+ tag: str,
+) -> Optional[str]:
+ inner_content = None
+ remaning = content
+ all_inner_content = []
+
+ while f"<{tag}>" in remaning:
+ inner_content_i = remaning[remaning.find(f"<{tag}>") + len(f"<{tag}>") :]
+ if f"</{tag}>" not in inner_content_i:
+ break
+ inner_content_i = inner_content_i[: inner_content_i.find(f"</{tag}>")]
+ remaning = remaning[remaning.find(f"</{tag}>") + len(f"</{tag}>") :]
+ all_inner_content.append(inner_content_i)
+
+ if len(all_inner_content) > 0:
+ inner_content = "\n".join(all_inner_content)
+ return inner_content
+
+
 def remove_installs_from_code(code: str) -> str:
  pattern = r"\n!pip install.*?(\n|\Z)\n"
  code = re.sub(pattern, "", code, flags=re.DOTALL)