Merge branch 'main' into full-claude-35-support

landing-ai · Sep 19, 2024 · b2c0612 · b2c0612
2 parents 20490b2 + 0891ca2
commit b2c0612
Show file tree

Hide file tree

Showing 4 changed files with 66 additions and 32 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
 name = "vision-agent"
-version = "0.2.135"
+version = "0.2.137"
 description = "Toolset for Vision Agent"
 authors = ["Landing AI <[email protected]>"]
 readme = "README.md"

diff --git a/vision_agent/agent/vision_agent.py b/vision_agent/agent/vision_agent.py
@@ -239,25 +239,7 @@ def chat_with_code(
  for chat_i in int_chat:
  if "media" in chat_i:
  for media in chat_i["media"]:
- if type(media) is str and media.startswith(("http", "https")):
- # TODO: Ideally we should not call VA.tools here, we should come to revisit how to better support remote image later
- file_path = str(
- Path(self.local_artifacts_path).parent
- / Path(media).name
- )
- if file_path.lower().endswith(
- ".mp4"
- ) or file_path.lower().endswith(".mov"):
- video_frames = extract_frames(media)
- save_video(
- [frame for frame, _ in video_frames], file_path
- )
- else:
- ndarray = load_image(media)
- save_image(ndarray, file_path)
- media = file_path
- else:
- media = cast(str, media)
+ media = cast(str, media)
  artifacts.artifacts[Path(media).name] = open(media, "rb").read()
 
  media_remote_path = (

diff --git a/vision_agent/tools/meta_tools.py b/vision_agent/tools/meta_tools.py
@@ -1,4 +1,5 @@
 import difflib
+import json
 import os
 import pickle as pkl
 import re
@@ -70,8 +71,8 @@ def redisplay_results(execution: Execution) -> None:
  display({MimeType.TEXT_LATEX: result.latex}, raw=True)
  if result.json is not None:
  display({MimeType.APPLICATION_JSON: result.json}, raw=True)
- if result.artifact_name is not None:
- display({MimeType.TEXT_ARTIFACT_NAME: result.artifact_name}, raw=True)
+ if result.artifact is not None:
+ display({MimeType.APPLICATION_ARTIFACT: result.artifact}, raw=True)
  if result.extra is not None:
  display(result.extra, raw=True)
 
@@ -210,7 +211,18 @@ def create_code_artifact(artifacts: Artifacts, name: str) -> str:
  return_str = f"[Artifact {name} created]"
  print(return_str)
 
- display({MimeType.TEXT_ARTIFACT_NAME: name}, raw=True)
+ display(
+ {
+ MimeType.APPLICATION_ARTIFACT: json.dumps(
+ {
+ "name": name,
+ "content": artifacts[name],
+ "action": "create",
+ }
+ )
+ },
+ raw=True,
+ )
  return return_str
 
 
@@ -294,7 +306,18 @@ def edit_code_artifact(
 
  artifacts[name] = "".join(edited_lines)
 
- display({MimeType.TEXT_ARTIFACT_NAME: name}, raw=True)
+ display(
+ {
+ MimeType.APPLICATION_ARTIFACT: json.dumps(
+ {
+ "name": name,
+ "content": artifacts[name],
+ "action": "edit",
+ }
+ )
+ },
+ raw=True,
+ )
  return open_code_artifact(artifacts, name, cur_line)
 
 
@@ -350,7 +373,19 @@ def detect_dogs(image_path: str):
  code_lines = code.splitlines(keepends=True)
  total_lines = len(code_lines)
 
- display({MimeType.TEXT_ARTIFACT_NAME: name}, raw=True)
+ display(
+ {
+ MimeType.APPLICATION_ARTIFACT: json.dumps(
+ {
+ "name": name,
+ "content": code,
+ "contentType": "vision_code",
+ "action": "create",
+ }
+ )
+ },
+ raw=True,
+ )
  return view_lines(code_lines, 0, total_lines, name, total_lines)
 
 
@@ -415,7 +450,18 @@ def detect_dogs(image_path: str):
  code_lines = code.splitlines(keepends=True)
  total_lines = len(code_lines)
 
- display({MimeType.TEXT_ARTIFACT_NAME: name}, raw=True)
+ display(
+ {
+ MimeType.APPLICATION_ARTIFACT: json.dumps(
+ {
+ "name": name,
+ "content": code,
+ "action": "edit",
+ }
+ )
+ },
+ raw=True,
+ )
  return view_lines(code_lines, 0, total_lines, name, total_lines)
 
 
@@ -429,7 +475,6 @@ def write_media_artifact(artifacts: Artifacts, local_path: str) -> str:
  with open(local_path, "rb") as f:
  media = f.read()
  artifacts[Path(local_path).name] = media
- display({MimeType.TEXT_ARTIFACT_NAME: Path(local_path).name}, raw=True)
  return f"[Media {Path(local_path).name} saved]"
 
 
@@ -627,7 +672,14 @@ def use_object_detection_fine_tuning(
  diff = get_diff_with_prompts(name, code, new_code)
  print(diff)
 
- display({MimeType.TEXT_ARTIFACT_NAME: name}, raw=True)
+ display(
+ {
+ MimeType.APPLICATION_ARTIFACT: json.dumps(
+ {"name": name, "content": new_code}
+ )
+ },
+ raw=True,
+ )
  return diff
 
 

diff --git a/vision_agent/utils/execute.py b/vision_agent/utils/execute.py
@@ -56,7 +56,7 @@ class MimeType(str, Enum):
  TEXT_LATEX = "text/latex"
  APPLICATION_JSON = "application/json"
  APPLICATION_JAVASCRIPT = "application/javascript"
- TEXT_ARTIFACT_NAME = "text/artifact/name"
+ APPLICATION_ARTIFACT = "application/artifact"
 
 
 class FileSerializer:
@@ -129,7 +129,7 @@ def __init__(self, is_main_result: bool, data: Dict[str, Any]):
  self.latex = data.pop(MimeType.TEXT_LATEX, None)
  self.json = data.pop(MimeType.APPLICATION_JSON, None)
  self.javascript = data.pop(MimeType.APPLICATION_JAVASCRIPT, None)
- self.artifact_name = data.pop(MimeType.TEXT_ARTIFACT_NAME, None)
+ self.artifact = data.pop(MimeType.APPLICATION_ARTIFACT, None)
  self.extra = data
  # Only keeping the PNG representation if both PNG and JPEG are present
  if self.png and self.jpeg:
@@ -207,8 +207,8 @@ def formats(self) -> Iterable[str]:
  formats.append("javascript")
  if self.mp4:
  formats.append("mp4")
- if self.artifact_name:
- formats.append("artifact_name")
+ if self.artifact:
+ formats.append("artifact")
  if self.extra:
  formats.extend(iter(self.extra))
  return formats