diff --git a/vision_agent/lmm/lmm.py b/vision_agent/lmm/lmm.py index 27feff83..156f79ca 100644 --- a/vision_agent/lmm/lmm.py +++ b/vision_agent/lmm/lmm.py @@ -31,7 +31,9 @@ def encode_image_bytes(image: bytes) -> str: def encode_media(media: Union[str, Path]) -> str: if type(media) is str and media.startswith(("http", "https")): - if media.endswith(".mp4"): + # for mp4 video url, we assume there is a same url but ends with png + # vision-agent-ui will upload this png when uploading the video + if media.endswith((".mp4", "mov")): return media[:-4] + ".png" return media extension = "png" @@ -396,7 +398,6 @@ def chat( tmp_kwargs = self.kwargs | kwargs data.update(tmp_kwargs) if "stream" in tmp_kwargs and tmp_kwargs["stream"]: - json_data = json.dumps(data) def f() -> Iterator[Optional[str]]: @@ -430,7 +431,6 @@ def generate( media: Optional[List[Union[str, Path]]] = None, **kwargs: Any, ) -> Union[str, Iterator[Optional[str]]]: - url = f"{self.url}/generate" data: Dict[str, Any] = { "model": self.model_name, @@ -445,7 +445,6 @@ def generate( tmp_kwargs = self.kwargs | kwargs data.update(tmp_kwargs) if "stream" in tmp_kwargs and tmp_kwargs["stream"]: - json_data = json.dumps(data) def f() -> Iterator[Optional[str]]: