Skip to content

Commit

Permalink
Add frame extraction tool for video processing
Browse files Browse the repository at this point in the history
  • Loading branch information
AsiaCao committed Mar 25, 2024
1 parent 7fe61ab commit 31791be
Show file tree
Hide file tree
Showing 6 changed files with 353 additions and 1 deletion.
124 changes: 123 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ sentence-transformers = "2.*"
openai = "1.*"
typing_extensions = "4.*"

[tool.poetry.group.video.dependencies]
moviepy = "1.*"
opencv-python-headless = "4.*"

[tool.poetry.group.dev.dependencies]
autoflake = "1.*"
pytest = "7.*"
Expand Down Expand Up @@ -84,4 +88,5 @@ module = [
"faiss.*",
"openai.*",
"sentence_transformers.*",
"moviepy.*",
]
Binary file added tests/data/video/test.mp4
Binary file not shown.
7 changes: 7 additions & 0 deletions tests/tools/test_video.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from vision_agent.tools.video import extract_frames_from_video


def test_extract_frames_from_video():
video_path = "tests/data/video/test.mp4"
res = extract_frames_from_video(video_path)
assert len(res) == 1
28 changes: 28 additions & 0 deletions vision_agent/tools/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,33 @@ def __call__(self, input: List[int]) -> float:
return round(input[0] / input[1], 2)


class ExtractFrames(Tool):
name = "extract_frames_"
description = "'extract_frames_' extract image frames from the input video, return a list of tuple (frame, timestamp), where the timestamp is the relative time in seconds of the frame occurred in the video."
usage = {
"required_parameters": [{"name": "video_uri", "type": "str"}],
"examples": [
{
"scenario": "Can you extract the frames from this video? Video: www.foobar.com/video?name=test.mp4",
"parameters": {"video_uri": "www.foobar.com/video?name=test.mp4"},
},
{
"scenario": "Can you extract the images from this video file? Video path: tests/data/test.mp4",
"parameters": {"video_uri": "tests/data/test.mp4"},
},
],
}

def __call__(self, video_uri: str) -> list[tuple[np.ndarray, float]]:
try:
from vision_agent.tools.video import extract_frames_from_video
except Exception as e:
raise ImportError(
"vision_agent is not installed correctly (cause: missing dependencies), please run 'pip install vision-agent[video]' instead."
) from e
return extract_frames_from_video(video_uri)


TOOLS = {
i: {"name": c.name, "description": c.description, "usage": c.usage, "class": c}
for i, c in enumerate(
Expand All @@ -472,6 +499,7 @@ def __call__(self, input: List[int]) -> float:
Subtract,
Multiply,
Divide,
ExtractFrames,
]
)
if (hasattr(c, "name") and hasattr(c, "description") and hasattr(c, "usage"))
Expand Down
Loading

0 comments on commit 31791be

Please sign in to comment.