From 6d6895c87ba37994874d8a04d9407c5961938db6 Mon Sep 17 00:00:00 2001
From: Dayanne Fernandes <dayannefernandesc@gmail.com>
Date: Thu, 3 Oct 2024 23:29:03 -0300
Subject: [PATCH] hide florence2_phrase_grounding_video

---
 tests/integ/test_tools.py      | 50 +++++++++++++++++-----------------
 vision_agent/tools/__init__.py |  1 -
 2 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/tests/integ/test_tools.py b/tests/integ/test_tools.py
index 9958894d..796fcdce 100644
--- a/tests/integ/test_tools.py
+++ b/tests/integ/test_tools.py
@@ -12,7 +12,7 @@
     florence2_image_caption,
     florence2_ocr,
     florence2_phrase_grounding_image,
-    florence2_phrase_grounding_video,
+    # florence2_phrase_grounding_video,
     florence2_roberta_vqa,
     florence2_sam2_image,
     florence2_sam2_video_tracking,
@@ -117,30 +117,30 @@ def test_florence2_phrase_grounding_image_fine_tune_id():
     assert [res["label"] for res in result] == ["coin"] * len(result)
 
 
-def test_florence2_phrase_grounding_video():
-    frames = [
-        np.array(Image.fromarray(ski.data.coins()).convert("RGB")) for _ in range(10)
-    ]
-    result = florence2_phrase_grounding_video(
-        prompt="coin",
-        frames=frames,
-    )
-    assert len(result) == 10
-    assert 2 <= len([res["label"] for res in result[0]]) <= 26
-
-
-def test_florence2_phrase_grounding_video_fine_tune_id():
-    frames = [
-        np.array(Image.fromarray(ski.data.coins()).convert("RGB")) for _ in range(10)
-    ]
-    # this calls a fine-tuned florence2 model which is going to be worse at this task
-    result = florence2_phrase_grounding_video(
-        prompt="coin",
-        frames=frames,
-        fine_tune_id=FINE_TUNE_ID,
-    )
-    assert len(result) == 10
-    assert 16 <= len([res["label"] for res in result[0]]) <= 26
+# def test_florence2_phrase_grounding_video():
+#     frames = [
+#         np.array(Image.fromarray(ski.data.coins()).convert("RGB")) for _ in range(10)
+#     ]
+#     result = florence2_phrase_grounding_video(
+#         prompt="coin",
+#         frames=frames,
+#     )
+#     assert len(result) == 10
+#     assert 2 <= len([res["label"] for res in result[0]]) <= 26
+
+
+# def test_florence2_phrase_grounding_video_fine_tune_id():
+#     frames = [
+#         np.array(Image.fromarray(ski.data.coins()).convert("RGB")) for _ in range(10)
+#     ]
+#     # this calls a fine-tuned florence2 model which is going to be worse at this task
+#     result = florence2_phrase_grounding_video(
+#         prompt="coin",
+#         frames=frames,
+#         fine_tune_id=FINE_TUNE_ID,
+#     )
+#     assert len(result) == 10
+#     assert 16 <= len([res["label"] for res in result[0]]) <= 26
 
 
 def test_template_match():
diff --git a/vision_agent/tools/__init__.py b/vision_agent/tools/__init__.py
index ebf98c08..2ed88789 100644
--- a/vision_agent/tools/__init__.py
+++ b/vision_agent/tools/__init__.py
@@ -25,7 +25,6 @@
     florence2_image_caption,
     florence2_ocr,
     florence2_phrase_grounding_image,
-    florence2_phrase_grounding_video,
     florence2_roberta_vqa,
     florence2_sam2_image,
     florence2_sam2_video_tracking,