From 31791beef757f2652f9752f005b0c20fc58fa57f Mon Sep 17 00:00:00 2001 From: Yazhou Cao Date: Mon, 25 Mar 2024 15:07:32 -0700 Subject: [PATCH] Add frame extraction tool for video processing --- poetry.lock | 124 ++++++++++++++++++++++- pyproject.toml | 5 + tests/data/video/test.mp4 | Bin 0 -> 17317 bytes tests/tools/test_video.py | 7 ++ vision_agent/tools/tools.py | 28 ++++++ vision_agent/tools/video.py | 190 ++++++++++++++++++++++++++++++++++++ 6 files changed, 353 insertions(+), 1 deletion(-) create mode 100644 tests/data/video/test.mp4 create mode 100644 tests/tools/test_video.py create mode 100644 vision_agent/tools/video.py diff --git a/poetry.lock b/poetry.lock index 3f479663..d7c66b5d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -257,6 +257,17 @@ files = [ [package.extras] dev = ["black", "flake8", "flake8-pyi", "matplotlib", "mypy (==0.770)", "numpy", "pandas", "pytest"] +[[package]] +name = "decorator" +version = "4.4.2" +description = "Decorators for Humans" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*" +files = [ + {file = "decorator-4.4.2-py2.py3-none-any.whl", hash = "sha256:41fa54c2a0cc4ba648be4fd43cff00aedf5b9465c9bf18d64325bc225f08f760"}, + {file = "decorator-4.4.2.tar.gz", hash = "sha256:e3a62f0520172440ca0dcc823749319382e377f37f140a0b99ef45fecb84bfe7"}, +] + [[package]] name = "distro" version = "1.9.0" @@ -518,6 +529,56 @@ files = [ {file = "idna-3.6.tar.gz", hash = "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca"}, ] +[[package]] +name = "imageio" +version = "2.34.0" +description = "Library for reading and writing a wide range of image, video, scientific, and volumetric data formats." +optional = false +python-versions = ">=3.8" +files = [ + {file = "imageio-2.34.0-py3-none-any.whl", hash = "sha256:08082bf47ccb54843d9c73fe9fc8f3a88c72452ab676b58aca74f36167e8ccba"}, + {file = "imageio-2.34.0.tar.gz", hash = "sha256:ae9732e10acf807a22c389aef193f42215718e16bd06eed0c5bb57e1034a4d53"}, +] + +[package.dependencies] +numpy = "*" +pillow = ">=8.3.2" + +[package.extras] +all-plugins = ["astropy", "av", "imageio-ffmpeg", "pillow-heif", "psutil", "tifffile"] +all-plugins-pypy = ["av", "imageio-ffmpeg", "pillow-heif", "psutil", "tifffile"] +build = ["wheel"] +dev = ["black", "flake8", "fsspec[github]", "pytest", "pytest-cov"] +docs = ["numpydoc", "pydata-sphinx-theme", "sphinx (<6)"] +ffmpeg = ["imageio-ffmpeg", "psutil"] +fits = ["astropy"] +full = ["astropy", "av", "black", "flake8", "fsspec[github]", "gdal", "imageio-ffmpeg", "itk", "numpydoc", "pillow-heif", "psutil", "pydata-sphinx-theme", "pytest", "pytest-cov", "sphinx (<6)", "tifffile", "wheel"] +gdal = ["gdal"] +itk = ["itk"] +linting = ["black", "flake8"] +pillow-heif = ["pillow-heif"] +pyav = ["av"] +test = ["fsspec[github]", "pytest", "pytest-cov"] +tifffile = ["tifffile"] + +[[package]] +name = "imageio-ffmpeg" +version = "0.4.9" +description = "FFMPEG wrapper for Python" +optional = false +python-versions = ">=3.5" +files = [ + {file = "imageio-ffmpeg-0.4.9.tar.gz", hash = "sha256:39bcd1660118ef360fa4047456501071364661aa9d9021d3d26c58f1ee2081f5"}, + {file = "imageio_ffmpeg-0.4.9-py3-none-macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:24095e882a126a0d217197b86265f821b4bb3cf9004104f67c1384a2b4b49168"}, + {file = "imageio_ffmpeg-0.4.9-py3-none-manylinux2010_x86_64.whl", hash = "sha256:2996c64af3e5489227096580269317719ea1a8121d207f2e28d6c24ebc4a253e"}, + {file = "imageio_ffmpeg-0.4.9-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7eead662d2f46d748c0ab446b68f423eb63d2b54d0a8ef96f80607245540866d"}, + {file = "imageio_ffmpeg-0.4.9-py3-none-win32.whl", hash = "sha256:b6de1e18911687c538d5585d8287ab1a23624ca9dc2044fcc4607de667bcf11e"}, + {file = "imageio_ffmpeg-0.4.9-py3-none-win_amd64.whl", hash = "sha256:7e900c695c6541b1cb17feb1baacd4009b30a53a45b81c23d53a67ab13ffb766"}, +] + +[package.dependencies] +setuptools = "*" + [[package]] name = "iniconfig" version = "2.0.0" @@ -803,6 +864,30 @@ files = [ griffe = ">=0.37" mkdocstrings = ">=0.20" +[[package]] +name = "moviepy" +version = "1.0.3" +description = "Video editing with Python" +optional = false +python-versions = "*" +files = [ + {file = "moviepy-1.0.3.tar.gz", hash = "sha256:2884e35d1788077db3ff89e763c5ba7bfddbd7ae9108c9bc809e7ba58fa433f5"}, +] + +[package.dependencies] +decorator = ">=4.0.2,<5.0" +imageio = {version = ">=2.5,<3.0", markers = "python_version >= \"3.4\""} +imageio_ffmpeg = {version = ">=0.2.0", markers = "python_version >= \"3.4\""} +numpy = {version = ">=1.17.3", markers = "python_version > \"2.7\""} +proglog = "<=1.0.0" +requests = ">=2.8.1,<3.0" +tqdm = ">=4.11.2,<5.0" + +[package.extras] +doc = ["Sphinx (>=1.5.2,<2.0)", "numpydoc (>=0.6.0,<1.0)", "pygame (>=1.9.3,<2.0)", "sphinx_rtd_theme (>=0.1.10b0,<1.0)"] +optional = ["matplotlib (>=2.0.0,<3.0)", "opencv-python (>=3.0,<4.0)", "scikit-image (>=0.13.0,<1.0)", "scikit-learn", "scipy (>=0.19.0,<1.5)", "youtube_dl"] +test = ["coverage (<5.0)", "coveralls (>=1.1,<2.0)", "pytest (>=3.0.0,<4.0)", "pytest-cov (>=2.5.1,<3.0)", "requests (>=2.8.1,<3.0)"] + [[package]] name = "mpmath" version = "1.3.0" @@ -1106,6 +1191,29 @@ typing-extensions = ">=4.7,<5" [package.extras] datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] +[[package]] +name = "opencv-python-headless" +version = "4.9.0.80" +description = "Wrapper package for OpenCV python bindings." +optional = false +python-versions = ">=3.6" +files = [ + {file = "opencv-python-headless-4.9.0.80.tar.gz", hash = "sha256:71a4cd8cf7c37122901d8e81295db7fb188730e33a0e40039a4e59c1030b0958"}, + {file = "opencv_python_headless-4.9.0.80-cp37-abi3-macosx_10_16_x86_64.whl", hash = "sha256:2ea8a2edc4db87841991b2fbab55fc07b97ecb602e0f47d5d485bd75cee17c1a"}, + {file = "opencv_python_headless-4.9.0.80-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:e0ee54e27be493e8f7850847edae3128e18b540dac1d7b2e4001b8944e11e1c6"}, + {file = "opencv_python_headless-4.9.0.80-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:57ce2865e8fec431c6f97a81e9faaf23fa5be61011d0a75ccf47a3c0d65fa73d"}, + {file = "opencv_python_headless-4.9.0.80-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:976656362d68d9f40a5c66f83901430538002465f7db59142784f3893918f3df"}, + {file = "opencv_python_headless-4.9.0.80-cp37-abi3-win32.whl", hash = "sha256:11e3849d83e6651d4e7699aadda9ec7ed7c38957cbbcb99db074f2a2d2de9670"}, + {file = "opencv_python_headless-4.9.0.80-cp37-abi3-win_amd64.whl", hash = "sha256:a8056c2cb37cd65dfcdf4153ca16f7362afcf3a50d600d6bb69c660fc61ee29c"}, +] + +[package.dependencies] +numpy = [ + {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""}, + {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""}, + {version = ">=1.23.5", markers = "python_version >= \"3.11\""}, +] + [[package]] name = "packaging" version = "24.0" @@ -1325,6 +1433,20 @@ files = [ dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] +[[package]] +name = "proglog" +version = "0.1.10" +description = "Log and progress bar manager for console, notebooks, web..." +optional = false +python-versions = "*" +files = [ + {file = "proglog-0.1.10-py3-none-any.whl", hash = "sha256:19d5da037e8c813da480b741e3fa71fb1ac0a5b02bf21c41577c7f327485ec50"}, + {file = "proglog-0.1.10.tar.gz", hash = "sha256:658c28c9c82e4caeb2f25f488fff9ceace22f8d69b15d0c1c86d64275e4ddab4"}, +] + +[package.dependencies] +tqdm = "*" + [[package]] name = "pycodestyle" version = "2.9.1" @@ -2488,4 +2610,4 @@ watchmedo = ["PyYAML (>=3.10)"] [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.12" -content-hash = "dbb1f3241c006408ab8056349c63d7f947450c01fd518a758af66e2e5c000916" +content-hash = "c22b1c0eb7fbae1f326837eacfe7af3dd0ee754d7a074c9ae1b465e05d65e98e" diff --git a/pyproject.toml b/pyproject.toml index 5af13d3a..2ea59ef3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,6 +29,10 @@ sentence-transformers = "2.*" openai = "1.*" typing_extensions = "4.*" +[tool.poetry.group.video.dependencies] +moviepy = "1.*" +opencv-python-headless = "4.*" + [tool.poetry.group.dev.dependencies] autoflake = "1.*" pytest = "7.*" @@ -84,4 +88,5 @@ module = [ "faiss.*", "openai.*", "sentence_transformers.*", + "moviepy.*", ] diff --git a/tests/data/video/test.mp4 b/tests/data/video/test.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..596eea389407ac08915148bbd2212567a1382b4a GIT binary patch literal 17317 zcmeIaV|3-));G9gvtqkq+jfN&RczZfDy+C-I~CiuZB)g!(L3kd``+{1en)qY{?s3O zjr`Z-Z_YL6+sRO$YgED$^-xapiLZ%jR63D zJZmF;C!kLSB*goB>6+MK``)r-V-oEW(K6}Txf?S(D-j)$p{;{45fhMgU}5B9BQnr8 zHfCjH1R5mifes8`6vQOxSc!yGMSz}0#)d$Hh^?Kwm9dEv5i=tr3mr2fGc!PqioZe(n0rEf!T>tM?8k%ivO$=V9&V{7MRZfoPnO=PHVpl`^>MC4#>!pA~n zWNct%YiP;G#LdXfNThG0Z{_Z2%*W`)!p-Q$#Kb~mZOmt8>_+71Yyjjqi0mBQfl%PC z>tMvkM9&BW0XHIRb2non-H(V&K!mP?zKy9d9}_!~p_zlNwZ1M8%0%SkU~FY&?g->K z-8hX5oq&v?y)_>r@Cf=w9=0~de9TO=OiV;3`i@Szc8-?jb{`&p2e7x(wKXwuGn zVwgBdIKwG2Yq*4LtAS*eJ7x72yBs)gTA>9Fa*%iLI15Ocy$|vpe|C8W9f16NMBINBNbSLF@c`0qSj9Fp_dSX7Og(pm zAl;Fd(rIs}rBki(Y?(IwL6VgpoYDcqKc%a>#4B~NY1DI$*^MwixZw$^dlE~k###zP zm8OGwwFxQ3@cq-C=#MQU-CI0`)#@OQVe7ItII_e*7}P*DF#V(9^7pN#n+=^qYPdB1 z0sQlRA*2H&IOrEs?Z=vdDB-OP(~68WeRnL05wr7CBLQ;?>JtvTB<~cxKJd8Y@6o6A z#ha&5EEhthmfmU&%syXP5+&P_VyashV6>VI7>Kewra-;>$5lfpefOU15Cf`u{#YfX z_79R}xuV@kodiZAJB9GY1-{;axx-bIHubGj&N~FQR!^rR-&riy5o3%qmFYA&uk7|6Lkqj7F|iiHX2Au80n@)~5K| zW`$1oK9>0&vN`CX@pg^EdgSJQk8p+_VoM`AeFKD4+Ufz8ztV*8s0{;HaCT z)_()aEfc3WD&I4;A2WuBq!r(-svUHJO~BCu6(Q0O>x%T?e8J;BC_wc}+@kH}r_9a> zpYG9T)j3MrmJU$F)-$k{wXJ)~h5_a-S{I3R7Wum73GuV!tSFLXJWh=WwntIPc%*N8 zO%zo`W@31@R|8PldIf(x3)Ie>#wW8gDudZcU%$$)F+9HpXS^yxbSPp_IpxdfF$mm{ z=D!kk-b&P!Q|(5eRTzpLmYRcW22LoLym8pYKesALfg>LSDYrsU*2AlEI`Qs2}HD z6aC7cO5Sm!PVi(>u?p>bf^*mr$mm((>UP*e0dn2pUSlGc_|Ahk9NO-1#mgg~LS0$B z7Zs7kDWslkx)psDTUw!+R28PGr{$bN(LD>S_WbAtoRNegDdB6-K9jvo3EH5L2quqn z#ZiKnF>Q5mntUff&{6YbG-u{E#5(#6dgX;zr6JOX!Jm>rYiQ*D&ddp|#2H>#IS(DF z{SpuLsm^>Mx0Q~oVZjVCp^RG8b``Bs_NI?_s7t$9cED#rH1@}w(OaF~z3BJ)qQchJ zR8Or=S!sr7;^HF38NnG2WWtx{{l=juuCZ>g!wx)^aAW&0p6a>c-H1JEK}Z;C9v`MX zRt4~gl-aF7IE`2~Zwlj73RjO2yI7%0U1cj-YcT!Plo+nPX`8DCz{Qm3A(2#qbR6O*c??DwNMH4Hopd25`+9g4aOiLpT68RpCTgQ&2`_AVmC ztk9+Mk1=k0otVZuoW(`qM&pKnVtCk0b+xk6JWq-^f8afQH|Sa+9qzIUUIb@BXeK7=o_ zxcQa;+N2+9#B&)Pj;gD2B+bn;oox;mXy8{4;aT(vt~8>f=%$!fRkp(1tecGa+Qre4 z*23$ztw4{J+-!Z^QUW9j-!p2SejeI&;SrlNx@kH!M?lWdasE)$SSvuwBhN^kIfMHBW*g zcNXKMaM4M**Fd88CRQP|X*MHl_qrAo`F$S*c&@U9NZpCR<1|h}$MX>+kzX}XhoHLp zc6b7AZrKz%T-U4l#pMN|r#-66cK-a4m+hMA*tdt7Q)L*=Q=>m6_becrwWcAIw1SpvM$_cUh!; zSfBtL*eyxjMX+vs-Ov8$!n3Lyc3FwwwzZ^vEuA96t@n5gk=%nO!K8>%>8Of~Km&S)F z*){IH1j>4l@-$z+TAte%y2%)@nRapBFvF@Esc(Z3qHO#jCg})Mf5+on=o3Ml>O_{h{`~c-9LQ=fyUVcQZ9pu>5q`y08PH_7T3H=u+UrR zd?vkPCI|Ni4ar2erKALN?92gfLXd+4 zuZebH)wrdLatArJ2-ByZ5T{c~Atd-K-4_bAbaC1C=9=_M8B*J;&FUoG_&R6&7I8t_9gev+b8d^#6^`g3F*87LTC}w zk+!C(b%F^yQK`8%hXiqgT=H2SsYCH*pN^*-Z_9*SRdtOFK8P^2+}Ac`h~H0`%|4T#<7*t)_Z5}lSaP;b<9GG~aZc8+9j6VqOrtl@ z-=ci zOFF@PO>X-R{T?QgflL)#OV*sWvp@rfcR9Ab=7nI`H23-;TYLX6pj1pcNa1qM~yX%UWs&^^JbICR#Wd{9=Q~B=<7UP);L% zHy&G%ahNrbZwK#ebER0GTH3&Nd@qm-vQG|{Ul2b?uosY|?o57byV&)5#Gyet;0k^i z8~(gBhA#JsQn0%5H)KC=<>bqv-K#YH90$MiI0!1O1!GI}x4Wn`5-SV{E_|IYH2e|uiwB{;EsRI! z;-8wEmM`24Pi10I+#r@c zeH(TJ+uUe5$jvtm`x&|79VbOFVr8x{11FKU=TNAZh(C4ka6G)S40pt(bNl^)J+&?l%!<_RCDiVJMm@T;9WSY4c}BsB z(=#tdRw$!YgjK&-OyZpLrzQ>70zQB1sghxov^Ar98-%DF&RY(NSJGgv2&EyidZGF} z9-RfD)1X0+BM>V#Qo`lYZ9ljtBX(qRxFSe#plw_OXm!CEZQ{P}NS#|seU()roQ9SS z$*dpH5rDOTh9GNsTPjRlCufvYjwBC>FP9p zo{?WYZZTA%UJSvA19OCTti|K?7PTgXGG9BTN1sf`Il;xFO5d^5(UZ{~U!K&a5=aE= zg6Vn_w_n7M9rC^~Ksh4W5$h$!f>UCHI(KRGyO(44d;|@^l>}B?r4EzGb(@FQ0FJ}N zyYy{-*yi`pn#!5^fNTr9sv(^X3QsC&e^l?+Wb7@7K?V!m@9n1+S-)|w;>mrM6Pv)* z`{p=1wYP2k#@hY;dk5pudt8^$pB6|Vz)~Mlk%iGg87-^4G;5q<10b8=SMub)7+DC> zb+!6$Qf2JJK@Z7`gIdx>i8>=TWe39=Ur;YimWlAM``<5n;MBcCL^zM4zO|2aaltG8 z$@97!cv{=gbm~fype~D3Ivq)h6Fe>{uJz+`?`@p7cR_Z_KrQ@Q?JTY_N3)?MCiD4X zSlcJSKku{^$y$9ab*YFR*R5s@0mJHR)2H2~JJ8vVm6dYq*18`aI=*|eE6UK07I#LS zEjY__J|YN@23a0}Gc@J2_gQAvL8;`+iSa)xk*(nqXucgJ>OR^2#h&GE_%^WRmV73) zq0p)RZ}mrt8Pu=_u2BnU>93+AKk>lS!lsgiTXRMnU;bD#tCF06_;2rRURp52>5I1gT$^=l)kGgn%(T>zjr4Q3lVI}Tau8pgH4Sr{W z--zo3{bW^K1r%*oYDFvTqXnuRjWc^4H9O(cWg)t|@#+C$#WUQa(b$*OZ zuHrdJPd~spTZ3s}KiG2sjE8w>&-zq(Z^Wf4i}U-JOWO6MBj3e)*>L)xfT&Rp1&<<#JCh-EUObD(RamS%FD(|whq_6bbx?r%PaVN4W6 zvdZzt+i#HNeUWD?Y{Ono{OWw08jkRXYK=2m@eBqU-(~m;`uD8srLgFF%Xd|SpP944o_(n}BE_?UJu4PPQ8pqQ{+;-G0qsQ|QB&i6&ctc=90TUOW!c zdzOa&#H)2*qesm;aepx?8uDRWm*@FH1M(A-|6oC|buaF&z_nvV?A7yJl}-nuz1p_h z{ZB)FVu3RYKb&JlrtdCZjz=gG-n@0KbGia;>CaLEwa?iMO0rbK`?MEYReZ`C3uXhK ze4CpdFycgS<>`l!i>afU%4x238gZNEu_I3zDOrJnXkd87D5vRS8-KR=>8Mb06f$jRE}tM*py01QCR(kGwz+Mj5N7ivL4({^h8T%pA(g#3 z9l0-}B>Nj+@JIHGIB-(#SgxD)vw~CToQ?5W7ELyZP-MA#n(z zo1S4;(Bb9;7OgUBU>+Gy!-CyA-fBkf6z3^OBsveFZK=w5~SJIxp4HCa-MRLWii*pa~3;1vy2q?sU$3VS|a3 zv0ABf;L_*PPK=^ul!xNFp^zr$Q#%()y^=&e=gk-C_78-+i#P-puv?ap?beEb8H=Nv z2d6T)=R}EL7^~?SaqPvqI~$4lFz@IiyyFSQv;l|(tMEO$C-^rwU4|s|W`Wk6EIcT> zu)?dWhmF_r{$4jn4z0U}@PeHgdQ_~jzgjuJbb}XPG*^N1{~=WZ7%oF+H(pcx?(p=T z2tr#p#s}eDL88HR#WuOjJacn0Q19kTdcD3dyiIV=WcZwzs(6S}pLc+8k5z16=Y+Bc z{*HR(!8#!|sjEiyuB(lYZVTN|n;SJxq9WeKDvt@lz_759XCB%5&Vd)~wX^m{^W=GV zMC2NvkO~TE<`SjEHY#jH^p4s817SIo^6?MeH9Di##k%J0Imymx3jnTD)lTIJ6eD%MMVy|e6hlcFeD>Umf0GNRO-HUBji)AB)?^T7_BJq%AiU_uw27Pq#$J+ z%@HBYFIl74T zAErhxzB<($!l*6WB=N=zZ5>TcE|bO~ZVPSPJ%{9L=-}cM7Qnu=;}$BEKNt!n%U7IL z?ufcy)VHNbD?(CcZ5bolob!Z*v(FM9_bvNStNo=Quw@lLhtxG%(H8U47vk<^`f3Qs zo-`7WSHuyJTV2}AOB{rB*;45m#g9M3mAM|$=`ch)>lR8%-v;d0R{WYq6Ij3A1(Zk2 zzFUM;R=u>Z<#^AY{UrN>W;!5Ymy&(z-ox?oXQ<@R;re}paPzP~vQ?ahRI1+tdJuz+ zUmx<)Zo)m)E&5Zxj*I-J$6R>_*4~)kY!R!j_%ti)@kp=eVQu!{j4cq zvWWF+V1i5B@A}GmZw;SKYd!Hsyynw zo+>A zE@4*&PSqPRTTEee3|>9_fvahRD_N}A@Ku#R?APgI5qHP74Q(V8-SK{fYm1J1RC_|- zVS~HG5MmRwte*ty6VFxOrK%BbbGb>!zW0xE9z)*9OUC3*ntBj0D$vVC5N?Mj6t?&! zX3hbA>TEE3n2rK7ymL(fULL^8JU(<-udt(!(`;GlS7$&-aj=|m#UQ>)WVQCP0Wx|XZRPJrgzG97j$bXezN!Sp2%q;>Pv=@sQny?J9{M(r(p zVwmtVe$ioGvEn#KSmaX#(wt$XSv{zGLcQ42)r2iR!m44|fr$$&tzbRIZ~vNP)Cy1z zn7G0q5Vt+y@A!fIEH27u^ zX67&r5uQ)*8lIBh?9$q^WWOi(J#OvhK?>AwIn~!iuJP0kM>Cs*-(lBNC1ykF6zkQM zoFygBR~u6TuA%a@Mla{>V9aSGaq*YZ$NdUdaOARuIlJnKiV->8z6_ONMmVL+4YmoEr9a4?%pMkj z{6c}3D6018n4F-)+r`;?b%pYd0bz)p@p#sVGJr{;aFjKh7Qg5L1$~Fxs>$m570+4z z#p3y4UAUB?(X`k#PC}*F2+X!?hb3)y?+-b?U_Nrw7bI9nEt_(GiBrTF73q#3p>sUK zF)lYDv<>i#LRI6)$tCoG*Ohr4Ikk(r*f$C-Cco98R`46J?%vm(Z-l{1HBn|V*~;QD zlM(h_S^m#B!r^C4Mdvw_JWVXHf*cCA$!#j$x&B&#ND#C}UZULOHM6~;0_Iz=#5kc< zm|qxK=hSK+wc&Ky>4?b$Aw_XW`ax5@^R+#UkD{Q6L(l5nTP8WDLKpoY+Bm;pfZ>cq z6UY$FVh;lh6rmJbk?%5#8#XXRPtOgAvwZj(t=qft@2yl~G3|wsbgsAuYKNlkOTd)P z&7iA$$7<2`UE1b_E_OxX*45VYh!{9JqlT^!sC+2;cefQXb^`9#z~M^sF1sK*-W)F1 zaI21*r>pSR--)n|6!%w3g3ZxH_n*bA@UIVJP73V?e5P2yqCj;Q+fyk<}wVR;=V$w)JKSD;j!GkiT3P2Y+O z0WtQ1_5#P=BYh0oY|2*qWi}1ue1?7Kg|^gJtp7em)Leq zaxa_*{s||m`psO1(8G{XNoeT2J5R294y{|uF}qdz9?r)`<(>)*NnO7SybPnaH(2tJ zk@x%LrRPLOGm2NsFFTe@erX>1)9h~}eJRt3WFiQMWdeHxl~s?r%mN2jxIb4dkHFe7 zIDZq-{2Hb*`T1_w*|);fI(h+XCa;}SsY5?)i;OnF-_v)J)53n=2Ii`a(&d1aR#1?V z6W^Dn5l`&0CY9^Nyx_by9~?Q`v@@ak^L@5a*KyJ;Do=j)|0RDUWe?G0{AAIZyZdFQy>2W0z$1 z-;-)coSaK5rD$$1w?DsVu84z0o4_8IFiVD5py&u8){;N4@5^T>zNqkwcu7&+kg;)P z2XRQt2o2gw{$Z4 z-s?|-kJL%;{)OB`cIWTJo;>dEmS2#?_9910*aAAQt0#Go#2{EM6^Vu>4x7{5v2Q~{ zMZOcQI+(20Xa^NXqI{}*XaI7qSEF{*?UdEiD{b`xR85?w4DZmwLlfAQ4t_A1Oq_Gm zYAL55O~Zb-W5VpMh?e_|0cR-@Yjb5n@;VzM8jr4q2rc}ZC1uF)dpIR^420%;QxX-5 za*O@G@Mz-FgC6W_RT7QwPQWAlN4_Nm`V0C=McE-;XJGFQm2kppq z2F1h6(KGx4Mn)_YRI5o{Rv0#wdAi*Ico=ov;qC--IE3pv3#o@ zl|2o?ZJ{@l_vwt)dJH5DVx}dr8Djeqp}%`CM1mVGb>n&ve^6NMovZVHfmNR>NvT@q z2ehzAX`sZo9M#bTJ)gUW@8z})?Tz&JD)6PyJF@n-I0R@X7|;j76E(gO!_Vs4IvdUK z!aCV5aC>TbgG}Gw|IXyG<@3Wuncf9e#PfUj%!ps%(+%oAjXt0bQ2OSx!s;ZRSNJZ041|t1HZIR=`s+p$;fpDKk2Xr}{Mcx^* zFbjila>9>c9qG%?$!QRpus2HL_glq3n=@8}^|MLsq3-f(Jqq8Rx0fd6E=2QG-6+27 zB+2>5dD@ejsBOrXq_@1`C!XAM-#ryrpu>f5lNvz#31RL!VeI~p(JzU1QpVFyW?9=Z|0C?F#nMszMIg10j=OY4wozh5Bdrd(j9o$q@?Z3qCv~pr~XyPtM*4#)BpANq*Da2c_+g9ul~G50uP!!p^CGM@+~b`Xh9r-CMCin_|=ZgB0$ zDeSw}O#Nl6k2Tg*53G3K`;P*L<8|jROuaV^c_F&ydbD=Z`Q|6r**D_??KAbfo{=2H z2YISr2j0O{DA&|mSidwULE}&Pj&z6b+Lk9JfZ|W>=cqTcH|2hT&ubs4i3PY~<9vpf$e)rY3A<-s$S2kb@5b>^)0p=_^|A!#q!Z5AUkDjcVNh3&b! z9cks8%V#+dKztk=kK!ZPU#ytzn&|9yO@*b>pQhU^%ITR%S}qDJ^4n=>a>s8CRP3XXqwzlZ6+r>x*b74tB!o^S?9h~Vbc>0F`}xJeVi^skRN9B)jk&Ei zb`czD)_}8xa;gqBAZcn2dz5L1m${=fkzH)6loGru!=Tx-oT$Z=Uc4| znTHiKg?I!}Sm_=XO*B`746?kQF?`ScrEuq-v*+!`PiJ?$0DEND-9}Of>;#n=S!~c} zow={(y`Lfu7b(Xz2;1We}UE@(xw9uM$63B6o#X;CBkSzuO4 zjMU1#yv>(E8s?yenmP@mz@%!S{%E3&yZzS7b7|1SKM8l?fP0LlGW`-UM1)%2(`8TZ zWt#0I=4BfKP?mjt;tSY*kj;~e9aw!GG)tfM!XVNfT!_SWtIDHZrPFjkGQ{<|+jPrd zr%zBdp3iFreGIU_;H}kmH&~0%;?danQmx{xLvH2R(#jAENET(c z`C0Hoe0Tn}l2JxRCe$t8Sr+9y*UWABWNERapceTuDz(*l@}4WDtmpGv$J2qX*)01(p%jdl@ zy16Z>2%TNae$tbyFSC(S&2@CQ4WxOu&_wQ@hMG8x7|biw27e;z5}7RBZW+UTNaD6+ zb{0*WQ&I0f!PzSquO}+avT(*9h?8yQCeNU&AoUT|3plXm%V%||SM??bKvQ7HuW?El zzfM*xre-7zy5x^zCTsR6f45iR76@sqYh`HcUaVfhT>Kg@a*>TB*IR`#U_XsOnWtxs zamDm)dyNiu=UsyZMp>%zo~6MHF2o+tjB-`=z4^|u)FfB6#M1XktOmmTdYDtzJi&w0 z2i^`H)Kc;M`$o$N^O6YCPD?5z(XfCwLblv=?1L4nO`2!Spp{oUizzuvj%n4q^~`F! z?I-w=8m2mUNng2AkBAX}H_9^ffPLOiIf#+21>d}P==kljPyCkW(KFbQ7y#mIV|CwU z&|}{P_eE#D?G@s>1N9!fuM`?;=Nv)Az{U?wIIBo@+#MENx4+n-kN+%(OpoEY3ZMU6 zQDFJmKg^ld7#J1<0l z)j_dE58wQz)ptm%fRhBic02T}>ZK^Ve2G-r3g;WMgQwlYqb84&SXW(g2Xd&QN@$n@x3LtC z9x{#Cp*fGzBVF}6L~SHFI8UH#eiz0VO3~RbKa)<<78ZmYrQvUi6}LlsK{2l#gJTaP z3$_vNve*)3Eyk)z2tN2~ZqhS>Ut@Y_+C6kpr44^u1v1TYRD8%Sz_ZG`Vq+k4~*=xrGwI=prZKcqrKP1lD`jG`QpX6O;elox-QCG{V z;_@3V8Ry}KLM&fCeBc+gsZs0f8OofG{K;$SsV<$37vbQzlM^Hf^2Kqlb;r1Bks>KF z9jbkPFTD`qZ%QgKgUyDDoYs{+h}mD=XBF0rUsD*{*sD}E7PK-C;)g}-ift9!9|++Y ztV$$*mB~ND9D@|-t2%aQHbmz>gjaRGj;r0|#nVokW3sA!={ti`o!N#9U-}e#{pWJ-4+-Qs=z%Nbw`R?~X`^Vc9{gNnfmZIVwj7UBKDNWU4>$LD=eBfK|S0Yxl>Nb!6kU)7QPx61e?gp2!ipwcE^XTSs?PDuVf;&lV?us84>FHD zumH9Xdz{HbD7WlUhe@X+V_3I#PU7c2SprzgYfx5B4g-91+)UAmpT-`@R4HHfwaE|5oVsw(;V_)t-zOKjcmsJk`$~$AU@ejLrWKMah~&ouujVX z+@AG$z3Qb5efJSi(>qJOzVW%QEU-gCWF6llzwC;f*v=kW_!l_|%7bKkgK2gk=|w!4 z6p15BK4qlFD5#Q$e4&*bt~W_M?*GN;@a>M|o{yWd%b!m@@9sP}@oBv8^FCHxIEYOv zrkfLT;xkkbFa4}zqo)0NBM&6jAL-~z);Tvj3#HMqntI@%8qtPqMmmgA0i1){ z@bi5p6`8R5##yC8LE=p+Hfp}tWETCD=*^cm-P3n~){Emzh_6GR_nEGp3Z^fvn)m6_ zr-)moN$;^;3W=uymHUv0Go z))^DL{hz#{mH-p_Q~(zTF))HbuRN*B3hQQfKgLyI4B6pGc_Nq|hue#ckw=HV(YliZ*anigUY_3(BAnHT>6n zAhk%b`Pq%0%9wWw54F+#9Au88Gh+ULB?Kp$>aq69TAQ&S-At+>Zwkd}>lUN__y~wM zy89J%*jXW#QP#4fW?$~0d_sBjZC2BX3-7MY{e6+eOr}*kR_-erH_=g-hq-DCTlDax zDLl6m6HbL2`SRJ_Caw>@<$z<-Ay2{hbc|F!=t)_?h{p`QYt zssR-J4rWGkDTx=$H%D_6dLxt7X@MatNnR$1#=>^7LqJ~b_i(%t(@SoT4DSrIVLS`< zAB6E~%={uR!OC+j?8G7R&X@VruNP(6w=<+sOl-CFLiD7ETgyDF?Dt^vs&k1n0deb( z zGY8lYCRdTIDc4OA&?yf=$U!4wp)7+@0-|7jDs3<>1bG6?H*U}k_3H;Y`z^>^+NK}n zoJk7WBb0==zPa1BihLcNAAB7wvJR4Kgoma<9jjb-&pTU7rXy>LgP~pK4Jp%#fM{5W@{!0VXS^WK?2M zF`l}aTh$EfFi{bcDNn{pmV-;qti8$9EIxDjTHTyA79Ge)FyJ`k`xb8NL;A<=G1zg# z7J9LLXX=JjI=fxa$&}zG!9)|Ge(k014<46y<>y9N<1+!l<9%?|oWHIhI62+Ptcp%R z&U)++CtZHjQG5|Fd=vkoR7!rl^}9LlE#T)wg$2wq`3t@nqQsYq1psKE!Upqc^Urk^ z=Nb8({jIX^=ml+vnEL7wKkeV+z-h~Fx{+*Xu zZpzh_JRMZ6KS}qxvDfmk`v^Rp&PP;!MRpMVhQ2|2f}Q3{*CVc+b-QlWBd{p>9DHQ+ zEIaMbpo5<~k7Wf}Gxi4yDcwv20I!p*fz2a$%e(r61GLiubxCt=dO2m*XZqJ_$Y>|~ ziw8?s*O9>(WRU(Gr(8_|u_wVePWVgQk0#krmIeG01Id{rivoRnp-bA0d9{#8c`NMD z3rqUB$H(vyveM1E9Bk=<7s2`Q2mPoZ{ZS6fDX0a@l#3?vThY9$)q+`6N{UDO7Jr-H zs?_USq6H0(?NHT=Xuh8Q$~JJ5eELhHAv5<*5-O2+Nq!T}5d~i(leMyBB`1-Lgttyn zZ4_n$Dp&YG=X-{C73Wzl;oF&1sWDB(%JVtpcq?H&KQG6t5>~NKm?JhGB4?$u{C+{dJX+lU8!Wy8!(eMtP;olg{}Oe)3QE=)<9jJh1{& z&C96O#pO&8a{~J>rthMP8CzfZkqK~IyW!6AzOIN7wniGJ=i)PUM~tK@-4Tk)NX?Vw z5m}9=Fw}cq9(NKF|5%l~#OJ#7S;zk@`mw>IEopRvFWV>$K5EH6h2 z1P#Alr#;#m9r*W{7$9^sS_V_e@Kkr3+hwnLld8%Nah^ospb-X{uEJgmQoD#({DsQP zCaQ{#bF7|022qei!qFnN>3pA=d>L4e^(*LTsUpfFZ^}bTm-87 zW}3^L`TcX!8|uTgEANBRwsDq_wXQgemv4BVxRM698I}TZz1)?<30^x!%SHLGfu8 z?5sT#+~jdUUZ1F{H!4AbLr)~N`g}dJY@fWE-3$i@$ABUjvej5&Ivp>R$f@!!J|kG? z$ImJ3m*`hEeIp7Ad&mTqx;@mUMUl7TT# z2J{>08?g_Bd~r+R2x5Yo6LJI|%Gcg!@#UESPLdK3+J<{E1Go{8LN|POy88o`?T>r8 zLaAg{z5r0fzxYgHw;%yb#Sb1NQ{PEA3(xLv-p~Tb+XHz-p;R-eVj;5>S)_LWC=yTu zE0h@`hC)mH{%>5t2kyfc7^CO|_Ynh#D+l7N{>FV|1L8)+(Eouu{71gyf8Z|ugZn#Q z7UDl~S#-Z`}X0`JMlLPH5$KklDX* zSqQ*<J(#>jKnSS-Y4S0qH*$*Z}|lH3a|xe0=?9{SN`C{x4bZ zKc4?P4io?Yj{sgLwgeiroGky!3HA@ie?5Hga+VQaocT zN8sJ}zZNjUM_llKof#a=+Q?iVXd$vT`tRBqfoeT~O5a~NIx{0Hhrc-B_HnEI-~0zv z{kY{Wsc&OsW&Gg-Rsp={YyxD^T&({J`p;vN8U1a;GXh?w{s05lhaKM8!HVc_9t?Qt z+rSFQS2#L3{(B%FKA9gE`+@Xh{WIp@8b4s!l0f$3`B?vFkH7W**|FZ=fdVTqstAY? z0PcMofN2V(<;%k0W`e*v0Kg3%tRL3c4+H>dcKm0Qr0ezJ;RBBbyyyLQOF>_=0f2{( zw3#4?XY!2{ZhUFT>wD;7ITmBF~Fn`4Y z(*NK7cR&8$_6Pc-pTK7R)lWR&Yz#jX!1b@$e9Y}Hehz5!_{;M52GsL}0a^I}dQgG# z$2XRL>;Jzj17C}Vw!pJ~tdDv>YBg{+a?%IVZ`Q_tQ6LK-{^#2r_z~ftZ)f-Mj{IMX n&iwDs6!H=$+mC?wz<;mt5#%E%Kvv(ygpG@ynS-8*k@5ckH212{ literal 0 HcmV?d00001 diff --git a/tests/tools/test_video.py b/tests/tools/test_video.py new file mode 100644 index 00000000..8952c238 --- /dev/null +++ b/tests/tools/test_video.py @@ -0,0 +1,7 @@ +from vision_agent.tools.video import extract_frames_from_video + + +def test_extract_frames_from_video(): + video_path = "tests/data/video/test.mp4" + res = extract_frames_from_video(video_path) + assert len(res) == 1 diff --git a/vision_agent/tools/tools.py b/vision_agent/tools/tools.py index a2b75851..42b3a810 100644 --- a/vision_agent/tools/tools.py +++ b/vision_agent/tools/tools.py @@ -457,6 +457,33 @@ def __call__(self, input: List[int]) -> float: return round(input[0] / input[1], 2) +class ExtractFrames(Tool): + name = "extract_frames_" + description = "'extract_frames_' extract image frames from the input video, return a list of tuple (frame, timestamp), where the timestamp is the relative time in seconds of the frame occurred in the video." + usage = { + "required_parameters": [{"name": "video_uri", "type": "str"}], + "examples": [ + { + "scenario": "Can you extract the frames from this video? Video: www.foobar.com/video?name=test.mp4", + "parameters": {"video_uri": "www.foobar.com/video?name=test.mp4"}, + }, + { + "scenario": "Can you extract the images from this video file? Video path: tests/data/test.mp4", + "parameters": {"video_uri": "tests/data/test.mp4"}, + }, + ], + } + + def __call__(self, video_uri: str) -> list[tuple[np.ndarray, float]]: + try: + from vision_agent.tools.video import extract_frames_from_video + except Exception as e: + raise ImportError( + "vision_agent is not installed correctly (cause: missing dependencies), please run 'pip install vision-agent[video]' instead." + ) from e + return extract_frames_from_video(video_uri) + + TOOLS = { i: {"name": c.name, "description": c.description, "usage": c.usage, "class": c} for i, c in enumerate( @@ -472,6 +499,7 @@ def __call__(self, input: List[int]) -> float: Subtract, Multiply, Divide, + ExtractFrames, ] ) if (hasattr(c, "name") and hasattr(c, "description") and hasattr(c, "usage")) diff --git a/vision_agent/tools/video.py b/vision_agent/tools/video.py new file mode 100644 index 00000000..1957915d --- /dev/null +++ b/vision_agent/tools/video.py @@ -0,0 +1,190 @@ +import logging +import math +import os +from concurrent.futures import ProcessPoolExecutor, as_completed +from typing import cast + +import cv2 +import numpy as np +from moviepy.video.io.VideoFileClip import VideoFileClip +from tqdm import tqdm + +_LOGGER = logging.getLogger(__name__) +# The maximum length of the clip to extract frames from, in seconds +_CLIP_LENGTH = 30.0 + + +def extract_frames_from_video( + video_uri: str, fps: int = 2, motion_detection_threshold: float = 0.06 +) -> list[tuple[np.ndarray, float]]: + """Extract frames from a video + + Parameters + ---------- + video_uri: str, the path to the video file or a video file url + fps: int, the frame rate per second to extract the frames + motion_detection_threshold: float, the threshold to detect the motion between frames. + A value between 0-1, the percentage change that is considered a different frame. + A lower value means more frames will be extracted. + + Returns + ------- + list[tuple[np.ndarray, int]], a list of tuples containing the extracted frame and the timestamp in seconds. E.g. [(frame1, 0.0), (frame2, 0.5), ...] + The timestamp is the time in seconds from the start of the video. E.g. 12.125 means 12.125 seconds from the start of the video. + The frames are sorted by the timestamp in ascending order. + """ + with VideoFileClip(video_uri) as video: + video_duration: float = video.duration + num_workers = os.cpu_count() + clip_length: float = min(video_duration, _CLIP_LENGTH) + start_times = list(range(0, math.ceil(video_duration), math.ceil(clip_length))) + assert start_times, f"No frames to extract from the input video: {video_uri}" + segment_args = [ + { + "video_uri": video_uri, + "start": start, + "end": ( + start + clip_length if i < len(start_times) - 1 else video_duration + ), + "fps": fps, + "motion_detection_threshold": motion_detection_threshold, + } + for i, start in enumerate(start_times) + ] + if ( + cast(float, segment_args[-1]["end"]) + - cast(float, segment_args[-1]["start"]) + < 1 + ): + # If the last segment is less than 1s, merge it with the previous segment + # This is to avoid the failure of the last segment extraction + assert ( + len(segment_args) > 1 + ), "Development bug - Expect at least 2 segments." + segment_args[-2]["end"] = video_duration + segment_args.pop(-1) + _LOGGER.info( + f"""Created {len(segment_args)} segments from the input video {video_uri} of length {video.duration}s, with clip size: {clip_length}s and {num_workers} workers. + Segments: {segment_args} + """ + ) + frames = [] + with tqdm(total=len(segment_args)) as pbar: + with ProcessPoolExecutor(max_workers=num_workers) as executor: + futures = [ + executor.submit(_extract_frames_by_clip, **kwargs) # type: ignore + for kwargs in segment_args + ] + for future in as_completed(futures): + result = future.result() + frames.extend(result) + pbar.update(1) + frames.sort(key=lambda x: x[1]) + _LOGGER.info(f"Extracted {len(frames)} frames from video {video_uri}") + return frames + + +def _extract_frames_by_clip( + video_uri: str, + start: int = 0, + end: float = -1, + fps: int = 2, + motion_detection_threshold: float = 0.06, +) -> list[tuple[np.ndarray, float]]: + """Extract frames from a video clip with start and end time in seconds. + + Parameters + ---------- + video_uri: str, the path to the video file or a video file url + start: int, the start time (in seconds) of the clip to extract + end: float, the end time (in seconds, up to millisecond level precision) of the clip to extract, if -1, extract the whole video + fps: int, the frame rate to extract the frames + motion_detection_threshold: float, the threshold to detect the motion between frames + """ + with VideoFileClip(video_uri) as video: + source_fps = video.fps + if end <= 0: + end = video.duration + _LOGGER.info( + f"Extracting frames from video {video_uri} ({video.duration}s) with start={start}s and end={end}s" + ) + clip = video.subclip(start, end) + processable_frames = int(clip.duration * fps) + _LOGGER.info( + f"Extracting frames from video clip of length {clip.duration}s with FPS={fps} and start_time={start}s. Total number of frames in clip: {processable_frames}" + ) + frames = [] + total_count, skipped_count = 0, 0 + prev_processed_frame = None + pbar = tqdm( + total=processable_frames, desc=f"Extracting frames from clip {start}-{end}" + ) + for i, frame in enumerate(clip.iter_frames(fps=fps, dtype="uint8")): + curr_processed_frame = _preprocess_frame(frame) + total_count += 1 + pbar.update(1) + # Skip the frame if it is similar to the previous one + if prev_processed_frame is not None and _similar_frame( + prev_processed_frame, + curr_processed_frame, + threshold=motion_detection_threshold, + ): + skipped_count += 1 + continue + prev_processed_frame = curr_processed_frame + ts = round(clip.reader.pos / source_fps, 3) + frames.append((frame, ts)) + + _LOGGER.info( + f"""Finished! + Frames extracted: {len(frames)} + Extracted frame timestamp: {[f[1] for f in frames]} + Total processed frames: {total_count} + Skipped frames: {skipped_count} + Scan FPS: {fps} + Clip start time: {start}s, {clip.pos} + Clip end time: {end}s + Clip duration: {clip.duration}s + Clip total frames: {clip.duration * source_fps} + Video duration: {video.duration}s + Video FPS: {video.fps} + Video total frames: {video.reader.nframes}""" + ) + return frames + + +def _preprocess_frame(frame: np.ndarray) -> np.ndarray: + # Convert to grayscale + frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + frame = cv2.GaussianBlur(src=frame, ksize=(5, 5), sigmaX=0) + return frame + + +def _similar_frame( + prev_frame: np.ndarray, curr_frame: np.ndarray, threshold: float +) -> bool: + """Detect two frames are similar or not + + Parameters + ---------- + threshold : float, optional + Similarity threshold, a value between 0-1, the percentage change that is considered a different frame. + """ + # calculate difference and update previous frame TODO: don't assume the processed image is cached + diff_frame = cv2.absdiff(src1=prev_frame, src2=curr_frame) + # Only take different areas that are different enough (>20 / 255) + thresh_frame = cv2.threshold( + src=diff_frame, thresh=20, maxval=255, type=cv2.THRESH_BINARY + )[1] + change_percentage = cv2.countNonZero(thresh_frame) / ( + curr_frame.shape[0] * curr_frame.shape[1] + ) + _LOGGER.debug(f"Image diff: {change_percentage}") + return change_percentage < threshold + + +# res = extract_frames(video) +if __name__ == "__main__": + video_path = "/Users/asia/Downloads/frames/baby_cam1.MP4" + res = extract_frames_from_video(video_path) + print("done, extracted num frames: ", len(res))