From 57ba26824c6cc416dbbd8df9dfeaef43ca7c6a51 Mon Sep 17 00:00:00 2001
From: Yazhou Cao <yazhou.cao@landing.ai>
Date: Tue, 26 Mar 2024 16:38:32 -0700
Subject: [PATCH] minor updates

---
 docs/api/tools.md           |  4 +++-
 poetry.lock                 |  2 +-
 pyproject.toml              |  2 --
 tests/tools/test_video.py   |  1 +
 vision_agent/tools/tools.py | 16 ++++++++++------
 vision_agent/tools/video.py | 13 +++----------
 6 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/docs/api/tools.md b/docs/api/tools.md
index fa3fba93..71935d46 100644
--- a/docs/api/tools.md
+++ b/docs/api/tools.md
@@ -2,4 +2,6 @@
 
 ::: vision_agent.tools.prompts
 
-::: vision_agent.tools.tools
\ No newline at end of file
+::: vision_agent.tools.tools
+
+::: vision_agent.tools.video
\ No newline at end of file
diff --git a/poetry.lock b/poetry.lock
index d6ba3c83..e50ff090 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2610,4 +2610,4 @@ watchmedo = ["PyYAML (>=3.10)"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<3.12"
-content-hash = "c22b1c0eb7fbae1f326837eacfe7af3dd0ee754d7a074c9ae1b465e05d65e98e"
+content-hash = "93a4e362ddaf2a1e65a6457c212896853b1c4409e0456f9209f33b795b5ec748"
diff --git a/pyproject.toml b/pyproject.toml
index e18a0d7c..7b7686d0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -28,8 +28,6 @@ torch = "2.1.*"  # 2.2 causes sentence-transformers to seg fault
 sentence-transformers = "2.*"
 openai = "1.*"
 typing_extensions = "4.*"
-
-[tool.poetry.group.video.dependencies]
 moviepy = "1.*"
 opencv-python-headless = "4.*"
 
diff --git a/tests/tools/test_video.py b/tests/tools/test_video.py
index 8952c238..a19b529f 100644
--- a/tests/tools/test_video.py
+++ b/tests/tools/test_video.py
@@ -2,6 +2,7 @@
 
 
 def test_extract_frames_from_video():
+    # TODO: consider generating a video on the fly instead
     video_path = "tests/data/video/test.mp4"
     res = extract_frames_from_video(video_path)
     assert len(res) == 1
diff --git a/vision_agent/tools/tools.py b/vision_agent/tools/tools.py
index 787f1914..99afcee3 100644
--- a/vision_agent/tools/tools.py
+++ b/vision_agent/tools/tools.py
@@ -11,6 +11,7 @@
 from PIL.Image import Image as ImageType
 
 from vision_agent.image_utils import convert_to_b64, get_image_size
+from vision_agent.tools.video import extract_frames_from_video
 
 _LOGGER = logging.getLogger(__name__)
 
@@ -523,12 +524,15 @@ class ExtractFrames(Tool):
     }
 
     def __call__(self, video_uri: str) -> list[tuple[str, float]]:
-        try:
-            from vision_agent.tools.video import extract_frames_from_video
-        except Exception as e:
-            raise ImportError(
-                "vision_agent is not installed correctly (cause: missing dependencies), please run 'pip install vision-agent[video]' instead."
-            ) from e
+        """Extract frames from a video clip with start and end time in seconds.
+
+
+        Parameters:
+            video_uri: the path to the video file or a url points to the video data
+
+        Returns:
+            a list of tuples containing the extracted frame and the timestamp in seconds. E.g. [(path_to_frame1, 0.0), (path_to_frame2, 0.5), ...]. The timestamp is the time in seconds from the start of the video. E.g. 12.125 means 12.125 seconds from the start of the video. The frames are sorted by the timestamp in ascending order.
+        """
         frames = extract_frames_from_video(video_uri)
         result = []
         _LOGGER.info(
diff --git a/vision_agent/tools/video.py b/vision_agent/tools/video.py
index 1957915d..a86efc49 100644
--- a/vision_agent/tools/video.py
+++ b/vision_agent/tools/video.py
@@ -23,9 +23,9 @@ def extract_frames_from_video(
     ----------
     video_uri: str, the path to the video file or a video file url
     fps: int, the frame rate per second to extract the frames
-    motion_detection_threshold: float, the threshold to detect the motion between frames.
-        A value between 0-1, the percentage change that is considered a different frame.
-        A lower value means more frames will be extracted.
+    motion_detection_threshold: float, The threshold to detect motion between changes/frames.
+        A value between 0-1, which represents the percentage change required for the frames to be considered in motion.
+        For example, a lower value means more frames will be extracted.
 
     Returns
     -------
@@ -181,10 +181,3 @@ def _similar_frame(
     )
     _LOGGER.debug(f"Image diff: {change_percentage}")
     return change_percentage < threshold
-
-
-# res = extract_frames(video)
-if __name__ == "__main__":
-    video_path = "/Users/asia/Downloads/frames/baby_cam1.MP4"
-    res = extract_frames_from_video(video_path)
-    print("done, extracted num frames: ", len(res))