From 84bab5e57a3acbcfd603b181b6c1a7bdf7736d6d Mon Sep 17 00:00:00 2001 From: wuyiqunLu <132986242+wuyiqunLu@users.noreply.github.com> Date: Wed, 18 Sep 2024 13:19:35 +0800 Subject: [PATCH 1/4] feat: change the display on artifacts (#238) * feat: change the display on artifacts * clean up media download code * fix lint * fix lint --- vision_agent/agent/vision_agent.py | 23 ++-------------- vision_agent/tools/meta_tools.py | 43 ++++++++++++++++++++++++------ vision_agent/utils/execute.py | 8 +++--- 3 files changed, 41 insertions(+), 33 deletions(-) diff --git a/vision_agent/agent/vision_agent.py b/vision_agent/agent/vision_agent.py index 40195246..fa68e6b9 100644 --- a/vision_agent/agent/vision_agent.py +++ b/vision_agent/agent/vision_agent.py @@ -13,9 +13,8 @@ VA_CODE, ) from vision_agent.lmm import LMM, Message, OpenAILMM -from vision_agent.tools import META_TOOL_DOCSTRING, save_image, load_image +from vision_agent.tools import META_TOOL_DOCSTRING from vision_agent.tools.meta_tools import Artifacts, use_extra_vision_agent_args -from vision_agent.tools.tools import extract_frames, save_video from vision_agent.utils import CodeInterpreterFactory from vision_agent.utils.execute import CodeInterpreter, Execution @@ -223,25 +222,7 @@ def chat_with_code( for chat_i in int_chat: if "media" in chat_i: for media in chat_i["media"]: - if type(media) is str and media.startswith(("http", "https")): - # TODO: Ideally we should not call VA.tools here, we should come to revisit how to better support remote image later - file_path = str( - Path(self.local_artifacts_path).parent - / Path(media).name - ) - if file_path.lower().endswith( - ".mp4" - ) or file_path.lower().endswith(".mov"): - video_frames = extract_frames(media) - save_video( - [frame for frame, _ in video_frames], file_path - ) - else: - ndarray = load_image(media) - save_image(ndarray, file_path) - media = file_path - else: - media = cast(str, media) + media = cast(str, media) artifacts.artifacts[Path(media).name] = open(media, "rb").read() media_remote_path = ( diff --git a/vision_agent/tools/meta_tools.py b/vision_agent/tools/meta_tools.py index 477cbd30..bcdbefb6 100644 --- a/vision_agent/tools/meta_tools.py +++ b/vision_agent/tools/meta_tools.py @@ -1,4 +1,5 @@ import difflib +import json import os import pickle as pkl import re @@ -70,8 +71,8 @@ def redisplay_results(execution: Execution) -> None: display({MimeType.TEXT_LATEX: result.latex}, raw=True) if result.json is not None: display({MimeType.APPLICATION_JSON: result.json}, raw=True) - if result.artifact_name is not None: - display({MimeType.TEXT_ARTIFACT_NAME: result.artifact_name}, raw=True) + if result.artifact is not None: + display({MimeType.APPLICATION_ARTIFACT: result.artifact}, raw=True) if result.extra is not None: display(result.extra, raw=True) @@ -210,7 +211,14 @@ def create_code_artifact(artifacts: Artifacts, name: str) -> str: return_str = f"[Artifact {name} created]" print(return_str) - display({MimeType.TEXT_ARTIFACT_NAME: name}, raw=True) + display( + { + MimeType.APPLICATION_ARTIFACT: json.dumps( + {"name": name, "content": artifacts[name]} + ) + }, + raw=True, + ) return return_str @@ -294,7 +302,14 @@ def edit_code_artifact( artifacts[name] = "".join(edited_lines) - display({MimeType.TEXT_ARTIFACT_NAME: name}, raw=True) + display( + { + MimeType.APPLICATION_ARTIFACT: json.dumps( + {"name": name, "content": artifacts[name]} + ) + }, + raw=True, + ) return open_code_artifact(artifacts, name, cur_line) @@ -350,7 +365,10 @@ def detect_dogs(image_path: str): code_lines = code.splitlines(keepends=True) total_lines = len(code_lines) - display({MimeType.TEXT_ARTIFACT_NAME: name}, raw=True) + display( + {MimeType.APPLICATION_ARTIFACT: json.dumps({"name": name, "content": code})}, + raw=True, + ) return view_lines(code_lines, 0, total_lines, name, total_lines) @@ -415,7 +433,10 @@ def detect_dogs(image_path: str): code_lines = code.splitlines(keepends=True) total_lines = len(code_lines) - display({MimeType.TEXT_ARTIFACT_NAME: name}, raw=True) + display( + {MimeType.APPLICATION_ARTIFACT: json.dumps({"name": name, "content": code})}, + raw=True, + ) return view_lines(code_lines, 0, total_lines, name, total_lines) @@ -429,7 +450,6 @@ def write_media_artifact(artifacts: Artifacts, local_path: str) -> str: with open(local_path, "rb") as f: media = f.read() artifacts[Path(local_path).name] = media - display({MimeType.TEXT_ARTIFACT_NAME: Path(local_path).name}, raw=True) return f"[Media {Path(local_path).name} saved]" @@ -596,7 +616,14 @@ def replacer(match: re.Match) -> str: diff = get_diff_with_prompts(name, code, new_code) print(diff) - display({MimeType.TEXT_ARTIFACT_NAME: name}, raw=True) + display( + { + MimeType.APPLICATION_ARTIFACT: json.dumps( + {"name": name, "content": new_code} + ) + }, + raw=True, + ) return diff diff --git a/vision_agent/utils/execute.py b/vision_agent/utils/execute.py index cc01a89d..be29394a 100644 --- a/vision_agent/utils/execute.py +++ b/vision_agent/utils/execute.py @@ -56,7 +56,7 @@ class MimeType(str, Enum): TEXT_LATEX = "text/latex" APPLICATION_JSON = "application/json" APPLICATION_JAVASCRIPT = "application/javascript" - TEXT_ARTIFACT_NAME = "text/artifact/name" + APPLICATION_ARTIFACT = "application/artifact" class FileSerializer: @@ -129,7 +129,7 @@ def __init__(self, is_main_result: bool, data: Dict[str, Any]): self.latex = data.pop(MimeType.TEXT_LATEX, None) self.json = data.pop(MimeType.APPLICATION_JSON, None) self.javascript = data.pop(MimeType.APPLICATION_JAVASCRIPT, None) - self.artifact_name = data.pop(MimeType.TEXT_ARTIFACT_NAME, None) + self.artifact = data.pop(MimeType.APPLICATION_ARTIFACT, None) self.extra = data # Only keeping the PNG representation if both PNG and JPEG are present if self.png and self.jpeg: @@ -207,8 +207,8 @@ def formats(self) -> Iterable[str]: formats.append("javascript") if self.mp4: formats.append("mp4") - if self.artifact_name: - formats.append("artifact_name") + if self.artifact: + formats.append("artifact") if self.extra: formats.extend(iter(self.extra)) return formats From cc8be2e218772d5ac70e777ab69e382a46538dce Mon Sep 17 00:00:00 2001 From: GitHub Actions Bot Date: Wed, 18 Sep 2024 05:22:13 +0000 Subject: [PATCH 2/4] [skip ci] chore(release): vision-agent 0.2.136 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 7499eb7e..d827e131 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "vision-agent" -version = "0.2.135" +version = "0.2.136" description = "Toolset for Vision Agent" authors = ["Landing AI "] readme = "README.md" From 32c6c0a422dc386b986c3485bb965ab0c259ad50 Mon Sep 17 00:00:00 2001 From: wuyiqunLu <132986242+wuyiqunLu@users.noreply.github.com> Date: Thu, 19 Sep 2024 12:14:29 +0800 Subject: [PATCH 3/4] feat: new artifact structure (#240) * feat: new artifact structure * address comment * adddress comment --- vision_agent/tools/meta_tools.py | 33 ++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/vision_agent/tools/meta_tools.py b/vision_agent/tools/meta_tools.py index bcdbefb6..9012e9d4 100644 --- a/vision_agent/tools/meta_tools.py +++ b/vision_agent/tools/meta_tools.py @@ -214,7 +214,11 @@ def create_code_artifact(artifacts: Artifacts, name: str) -> str: display( { MimeType.APPLICATION_ARTIFACT: json.dumps( - {"name": name, "content": artifacts[name]} + { + "name": name, + "content": artifacts[name], + "action": "create", + } ) }, raw=True, @@ -305,7 +309,11 @@ def edit_code_artifact( display( { MimeType.APPLICATION_ARTIFACT: json.dumps( - {"name": name, "content": artifacts[name]} + { + "name": name, + "content": artifacts[name], + "action": "edit", + } ) }, raw=True, @@ -366,7 +374,16 @@ def detect_dogs(image_path: str): total_lines = len(code_lines) display( - {MimeType.APPLICATION_ARTIFACT: json.dumps({"name": name, "content": code})}, + { + MimeType.APPLICATION_ARTIFACT: json.dumps( + { + "name": name, + "content": code, + "contentType": "vision_code", + "action": "create", + } + ) + }, raw=True, ) return view_lines(code_lines, 0, total_lines, name, total_lines) @@ -434,7 +451,15 @@ def detect_dogs(image_path: str): total_lines = len(code_lines) display( - {MimeType.APPLICATION_ARTIFACT: json.dumps({"name": name, "content": code})}, + { + MimeType.APPLICATION_ARTIFACT: json.dumps( + { + "name": name, + "content": code, + "action": "edit", + } + ) + }, raw=True, ) return view_lines(code_lines, 0, total_lines, name, total_lines) From 0891ca20b2bd26bff11a28201c3badb0ad26a6ba Mon Sep 17 00:00:00 2001 From: GitHub Actions Bot Date: Thu, 19 Sep 2024 04:17:19 +0000 Subject: [PATCH 4/4] [skip ci] chore(release): vision-agent 0.2.137 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d827e131..78dedd8d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "vision-agent" -version = "0.2.136" +version = "0.2.137" description = "Toolset for Vision Agent" authors = ["Landing AI "] readme = "README.md"