Skip to content

Commit

Permalink
fix: use pyav to save video (#228)
Browse files Browse the repository at this point in the history
* add .mp4 to the generated video file

* use pyav

* install pyav

* resize

* fix lint

* fix type

* fix

* minor
  • Loading branch information
yzld2002 authored Sep 6, 2024
1 parent 777079c commit 42c2238
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 17 deletions.
76 changes: 65 additions & 11 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ pytube = "15.0.0"
anthropic = "^0.31.0"
pydantic = "2.7.4"
eva-decord = "^0.6.1"
av = "^11.0.0"

[tool.poetry.group.dev.dependencies]
autoflake = "1.*"
Expand Down
4 changes: 3 additions & 1 deletion vision_agent/tools/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -1518,7 +1518,9 @@ def save_video(
raise ValueError(f"fps must be greater than 0 got {fps}")

if output_video_path is None:
output_video_path = tempfile.NamedTemporaryFile(delete=False).name
output_video_path = tempfile.NamedTemporaryFile(
delete=False, suffix=".mp4"
).name

output_video_path = video_writer(frames, fps, output_video_path)
_save_video_to_result(output_video_path)
Expand Down
29 changes: 24 additions & 5 deletions vision_agent/utils/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import List, Optional, Tuple

import cv2
import av # type: ignore
import numpy as np
from decord import VideoReader # type: ignore

Expand Down Expand Up @@ -43,18 +44,36 @@ def play_video(video_base64: str) -> None:
cv2.destroyAllWindows()


def _resize_frame(frame: np.ndarray) -> np.ndarray:
height, width = frame.shape[:2]
new_width = width - (width % 2)
new_height = height - (height % 2)
return cv2.resize(frame, (new_width, new_height))


def video_writer(
frames: List[np.ndarray], fps: float = 1.0, filename: Optional[str] = None
) -> str:
if filename is None:
filename = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name

fourcc = cv2.VideoWriter_fourcc(*"mp4v") # type: ignore
container = av.open(filename, mode="w")
stream = container.add_stream("h264", rate=fps)
height, width = frames[0].shape[:2]
writer = cv2.VideoWriter(filename, fourcc, fps, (width, height))
stream.height = height - (height % 2)
stream.width = width - (width % 2)
stream.pix_fmt = "yuv420p"
for frame in frames:
writer.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
writer.release()
# Remove the alpha channel (convert RGBA to RGB)
frame_rgb = frame[:, :, :3]
# Resize the frame to make dimensions divisible by 2
frame_rgb = _resize_frame(frame_rgb)
av_frame = av.VideoFrame.from_ndarray(frame_rgb, format="rgb24")
for packet in stream.encode(av_frame):
container.mux(packet)

for packet in stream.encode():
container.mux(packet)
container.close()
return filename


Expand Down

0 comments on commit 42c2238

Please sign in to comment.