-
Notifications
You must be signed in to change notification settings - Fork 128
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix for several issues with VisionAgent (#251)
* fix issues around vision agent coder * fix flake8 * fixed issue where it can't see media from view_media_artifact * fixed user exec obs * fixed side cases with agent * fixed bug with edit vision code * fixed bug with chat app * added more test cases for string replacement funcs * fix linting error
- Loading branch information
1 parent
d2074d7
commit d14a76f
Showing
7 changed files
with
267 additions
and
119 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
from vision_agent.tools.meta_tools import ( | ||
Artifacts, | ||
check_and_load_image, | ||
use_object_detection_fine_tuning, | ||
) | ||
|
||
|
||
def test_check_and_load_image_none(): | ||
assert check_and_load_image("print('Hello, World!')") == [] | ||
|
||
|
||
def test_check_and_load_image_one(): | ||
assert check_and_load_image("view_media_artifact(artifacts, 'image.jpg')") == [ | ||
"image.jpg" | ||
] | ||
|
||
|
||
def test_check_and_load_image_two(): | ||
code = "view_media_artifact(artifacts, 'image1.jpg')\nview_media_artifact(artifacts, 'image2.jpg')" | ||
assert check_and_load_image(code) == ["image1.jpg", "image2.jpg"] | ||
|
||
|
||
def test_use_object_detection_fine_tuning_none(): | ||
artifacts = Artifacts("test") | ||
code = "print('Hello, World!')" | ||
artifacts["code"] = code | ||
output = use_object_detection_fine_tuning(artifacts, "code", "123") | ||
assert ( | ||
output == "[No function calls to replace with fine tuning id in artifact code]" | ||
) | ||
assert artifacts["code"] == code | ||
|
||
|
||
def test_use_object_detection_fine_tuning(): | ||
artifacts = Artifacts("test") | ||
code = """florence2_phrase_grounding('one', image1) | ||
owl_v2_image('two', image2) | ||
florence2_sam2_image('three', image3)""" | ||
expected_code = """florence2_phrase_grounding("one", image1, "123") | ||
owl_v2_image("two", image2, "123") | ||
florence2_sam2_image("three", image3, "123")""" | ||
artifacts["code"] = code | ||
|
||
output = use_object_detection_fine_tuning(artifacts, "code", "123") | ||
assert 'florence2_phrase_grounding("one", image1, "123")' in output | ||
assert 'owl_v2_image("two", image2, "123")' in output | ||
assert 'florence2_sam2_image("three", image3, "123")' in output | ||
assert artifacts["code"] == expected_code | ||
|
||
|
||
def test_use_object_detection_fine_tuning_twice(): | ||
artifacts = Artifacts("test") | ||
code = """florence2_phrase_grounding('one', image1) | ||
owl_v2_image('two', image2) | ||
florence2_sam2_image('three', image3)""" | ||
expected_code1 = """florence2_phrase_grounding("one", image1, "123") | ||
owl_v2_image("two", image2, "123") | ||
florence2_sam2_image("three", image3, "123")""" | ||
expected_code2 = """florence2_phrase_grounding("one", image1, "456") | ||
owl_v2_image("two", image2, "456") | ||
florence2_sam2_image("three", image3, "456")""" | ||
artifacts["code"] = code | ||
output = use_object_detection_fine_tuning(artifacts, "code", "123") | ||
assert 'florence2_phrase_grounding("one", image1, "123")' in output | ||
assert 'owl_v2_image("two", image2, "123")' in output | ||
assert 'florence2_sam2_image("three", image3, "123")' in output | ||
assert artifacts["code"] == expected_code1 | ||
|
||
output = use_object_detection_fine_tuning(artifacts, "code", "456") | ||
assert 'florence2_phrase_grounding("one", image1, "456")' in output | ||
assert 'owl_v2_image("two", image2, "456")' in output | ||
assert 'florence2_sam2_image("three", image3, "456")' in output | ||
assert artifacts["code"] == expected_code2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
from vision_agent.agent.vision_agent import parse_execution | ||
|
||
|
||
def test_parse_execution_zero(): | ||
code = "print('Hello, World!')" | ||
assert parse_execution(code) is None | ||
|
||
|
||
def test_parse_execution_one(): | ||
code = "<execute_python>print('Hello, World!')</execute_python>" | ||
assert parse_execution(code) == "print('Hello, World!')" | ||
|
||
|
||
def test_parse_execution_no_test_multi_plan_generate(): | ||
code = "<execute_python>generate_vision_code(artifacts, 'code.py', 'Generate code', ['image.png'])</execute_python>" | ||
assert ( | ||
parse_execution(code, False) | ||
== "generate_vision_code(artifacts, 'code.py', 'Generate code', ['image.png'], test_multi_plan=False)" | ||
) | ||
|
||
|
||
def test_parse_execution_no_test_multi_plan_edit(): | ||
code = "<execute_python>edit_vision_code(artifacts, 'code.py', ['Generate code'], ['image.png'])</execute_python>" | ||
assert ( | ||
parse_execution(code, False) | ||
== "edit_vision_code(artifacts, 'code.py', ['Generate code'], ['image.png'])" | ||
) | ||
|
||
|
||
def test_parse_execution_custom_tool_names_generate(): | ||
code = "<execute_python>generate_vision_code(artifacts, 'code.py', 'Generate code', ['image.png'])</execute_python>" | ||
assert ( | ||
parse_execution( | ||
code, test_multi_plan=False, customed_tool_names=["owl_v2_image"] | ||
) | ||
== "generate_vision_code(artifacts, 'code.py', 'Generate code', ['image.png'], test_multi_plan=False, custom_tool_names=['owl_v2_image'])" | ||
) | ||
|
||
|
||
def test_prase_execution_custom_tool_names_edit(): | ||
code = "<execute_python>edit_vision_code(artifacts, 'code.py', ['Generate code'], ['image.png'])</execute_python>" | ||
assert ( | ||
parse_execution( | ||
code, test_multi_plan=False, customed_tool_names=["owl_v2_image"] | ||
) | ||
== "edit_vision_code(artifacts, 'code.py', ['Generate code'], ['image.png'], custom_tool_names=['owl_v2_image'])" | ||
) | ||
|
||
|
||
def test_parse_execution_multiple_executes(): | ||
code = "<execute_python>print('Hello, World!')</execute_python><execute_python>print('Hello, World!')</execute_python>" | ||
assert parse_execution(code) == "print('Hello, World!')\nprint('Hello, World!')" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.