From 6d6895c87ba37994874d8a04d9407c5961938db6 Mon Sep 17 00:00:00 2001 From: Dayanne Fernandes Date: Thu, 3 Oct 2024 23:29:03 -0300 Subject: [PATCH] hide florence2_phrase_grounding_video --- tests/integ/test_tools.py | 50 +++++++++++++++++----------------- vision_agent/tools/__init__.py | 1 - 2 files changed, 25 insertions(+), 26 deletions(-) diff --git a/tests/integ/test_tools.py b/tests/integ/test_tools.py index 9958894d..796fcdce 100644 --- a/tests/integ/test_tools.py +++ b/tests/integ/test_tools.py @@ -12,7 +12,7 @@ florence2_image_caption, florence2_ocr, florence2_phrase_grounding_image, - florence2_phrase_grounding_video, + # florence2_phrase_grounding_video, florence2_roberta_vqa, florence2_sam2_image, florence2_sam2_video_tracking, @@ -117,30 +117,30 @@ def test_florence2_phrase_grounding_image_fine_tune_id(): assert [res["label"] for res in result] == ["coin"] * len(result) -def test_florence2_phrase_grounding_video(): - frames = [ - np.array(Image.fromarray(ski.data.coins()).convert("RGB")) for _ in range(10) - ] - result = florence2_phrase_grounding_video( - prompt="coin", - frames=frames, - ) - assert len(result) == 10 - assert 2 <= len([res["label"] for res in result[0]]) <= 26 - - -def test_florence2_phrase_grounding_video_fine_tune_id(): - frames = [ - np.array(Image.fromarray(ski.data.coins()).convert("RGB")) for _ in range(10) - ] - # this calls a fine-tuned florence2 model which is going to be worse at this task - result = florence2_phrase_grounding_video( - prompt="coin", - frames=frames, - fine_tune_id=FINE_TUNE_ID, - ) - assert len(result) == 10 - assert 16 <= len([res["label"] for res in result[0]]) <= 26 +# def test_florence2_phrase_grounding_video(): +# frames = [ +# np.array(Image.fromarray(ski.data.coins()).convert("RGB")) for _ in range(10) +# ] +# result = florence2_phrase_grounding_video( +# prompt="coin", +# frames=frames, +# ) +# assert len(result) == 10 +# assert 2 <= len([res["label"] for res in result[0]]) <= 26 + + +# def test_florence2_phrase_grounding_video_fine_tune_id(): +# frames = [ +# np.array(Image.fromarray(ski.data.coins()).convert("RGB")) for _ in range(10) +# ] +# # this calls a fine-tuned florence2 model which is going to be worse at this task +# result = florence2_phrase_grounding_video( +# prompt="coin", +# frames=frames, +# fine_tune_id=FINE_TUNE_ID, +# ) +# assert len(result) == 10 +# assert 16 <= len([res["label"] for res in result[0]]) <= 26 def test_template_match(): diff --git a/vision_agent/tools/__init__.py b/vision_agent/tools/__init__.py index ebf98c08..2ed88789 100644 --- a/vision_agent/tools/__init__.py +++ b/vision_agent/tools/__init__.py @@ -25,7 +25,6 @@ florence2_image_caption, florence2_ocr, florence2_phrase_grounding_image, - florence2_phrase_grounding_video, florence2_roberta_vqa, florence2_sam2_image, florence2_sam2_video_tracking,