Add first version of programmer agent (#76)

* fix type errors' * fix type errors * add default system prompt * add notebook executor * moved execute * removed unnecessary key * added more tools * added similarity lookup * removed old execute code * added image viz * added utils module * fixed sim issue * add AVA * black and isort * fix mypy * changed file names, added comments * fix mypy * describe box output * updated dependencies * fixed api key client issue * fixed test cases * fixed imports * renamed viz tools * add warning for exceeding colors
landing-ai · May 8, 2024 · 141c3f8 · 141c3f8
1 parent d92f378
commit 141c3f8
Show file tree

Hide file tree

Showing 22 changed files with 1,564 additions and 340 deletions.
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -29,6 +29,9 @@ opencv-python-headless = "4.*"
 tabulate = "^0.9.0"
 pydantic-settings = "^2.2.1"
 scipy = "1.13.*"
+nbclient = "^0.10.0"
+nbformat = "^5.10.4"
+rich = "^13.7.1"
 
 [tool.poetry.group.dev.dependencies]
 autoflake = "1.*"

diff --git a/tests/test_type_defs.py b/tests/test_type_defs.py
@@ -1,6 +1,6 @@
 import os
 
-from vision_agent.type_defs import LandingaiAPIKey
+from vision_agent.utils.type_defs import LandingaiAPIKey
 
 
 def test_load_api_credential_from_env_var():

diff --git a/tests/tools/test_tools.py b/tests/tools/test_tools.py
@@ -6,7 +6,7 @@
 from PIL import Image
 
 from vision_agent.tools import TOOLS, Tool, register_tool
-from vision_agent.tools.tools import BboxIoU, BoxDistance, SegArea, SegIoU, MaskDistance
+from vision_agent.tools.tools import BboxIoU, BoxDistance, MaskDistance, SegArea, SegIoU
 
 
 def test_bbox_iou():

diff --git a/tests/tools/test_video.py b/tests/tools/test_video.py
@@ -1,4 +1,4 @@
-from vision_agent.tools.video import extract_frames_from_video
+from vision_agent.utils.video import extract_frames_from_video
 
 
 def test_extract_frames_from_video():

diff --git a/vision_agent/agent/__init__.py b/vision_agent/agent/__init__.py
@@ -3,3 +3,4 @@
 from .easytool import EasyTool
 from .reflexion import Reflexion
 from .vision_agent import VisionAgent
+from .vision_agent_v2 import VisionAgentV2
diff --git a/vision_agent/agent/agent_coder.py b/vision_agent/agent/agent_coder.py
@@ -6,15 +6,40 @@
 from typing import Dict, List, Optional, Union
 
 from vision_agent.agent import Agent
+from vision_agent.agent.agent_coder_prompts import (
+ DEBUG,
+ FIX_BUG,
+ PROGRAM,
+ TEST,
+ VISUAL_TEST,
+)
 from vision_agent.llm import LLM, OpenAILLM
 from vision_agent.lmm import LMM, OpenAILMM
-from vision_agent.tools.tools_v2 import TOOLS_DOCSTRING, UTILITIES_DOCSTRING
-
-from .agent_coder_prompts import DEBUG, FIX_BUG, PROGRAM, TEST, VISUAL_TEST
-from .execution import IMPORT_HELPER, check_correctness
+from vision_agent.tools.tools_v2 import TOOL_DOCSTRING, UTILITIES_DOCSTRING
+from vision_agent.utils import Execute
 
+IMPORT_HELPER = """
+import math
+import re
+import sys
+import copy
+import datetime
+import itertools
+import collections
+import heapq
+import statistics
+import functools
+import hashlib
+import numpy
+import numpy as np
+import string
+from typing import *
+from collections import *
+from vision_agent.tools.tools_v2 import *
+"""
 logging.basicConfig(stream=sys.stdout)
 _LOGGER = logging.getLogger(__name__)
+_EXECUTE = Execute()
 
 
 def write_tests(question: str, code: str, model: LLM) -> str:
@@ -40,7 +65,7 @@ def parse_file_name(s: str) -> str:
 
 def write_program(question: str, feedback: str, model: LLM) -> str:
  prompt = PROGRAM.format(
- docstring=TOOLS_DOCSTRING, question=question, feedback=feedback
+ docstring=TOOL_DOCSTRING, question=question, feedback=feedback
  )
  completion = model(prompt)
  return preprocess_data(completion)
@@ -59,14 +84,15 @@ def write_debug(question: str, code: str, feedback: str, model: LLM) -> str:
 
 def execute_tests(code: str, tests: str) -> Dict[str, Union[str, bool]]:
  full_code = f"{IMPORT_HELPER}\n{code}\n{tests}"
- return check_correctness(full_code, 20.0)
+ success, result = _EXECUTE.run_isolation(full_code)
+ return {"code": code, "result": result, "passed": success}
 
 
 def run_visual_tests(
  question: str, code: str, viz_file: str, feedback: str, model: LMM
 ) -> Dict[str, Union[str, bool]]:
  prompt = VISUAL_TEST.format(
- docstring=TOOLS_DOCSTRING,
+ docstring=TOOL_DOCSTRING,
  code=code,
  question=question,
  feedback=feedback,

diff --git a/vision_agent/agent/execution.py b/vision_agent/agent/execution.py