Support remote code execution and code sandbox (#103)

1. Introduce data models for Execution, Result, Error, Logs modeling the code execution result of local and remote code execution. 2. Introduce a baseclass for CodeInterpreter abstraction 3. Refactor the current Execute class to conform the CodeInterpreter interface, and renamed it to LocalCodeInterpreter 4. Update all the existing client code (i.e. agent) that uses LocalCodeInterpreter 5. Add a new E2BCodeInterpreter
landing-ai · May 31, 2024 · db3dc68 · db3dc68
1 parent 2b6bd80
commit db3dc68
Show file tree

Hide file tree

Showing 8 changed files with 1,169 additions and 120 deletions.
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -34,6 +34,9 @@ nbformat = "^5.10.4"
 rich = "^13.7.1"
 langsmith = "^0.1.58"
 ipykernel = "^6.29.4"
+e2b = "^0.17.0"
+e2b-code-interpreter = "^0.0.7"
+tenacity = "^8.3.0"
 
 [tool.poetry.group.dev.dependencies]
 autoflake = "1.*"
@@ -93,4 +96,6 @@ module = [
  "openai.*",
  "sentence_transformers.*",
  "moviepy.*",
+ "e2b_code_interpreter.*",
+ "e2b.*",
 ]
diff --git a/vision_agent/agent/__init__.py b/vision_agent/agent/__init__.py
@@ -1,7 +1,2 @@
 from .agent import Agent
-from .agent_coder import AgentCoder
-from .data_interpreter import DataInterpreter
-from .easytool import EasyTool
-from .easytool_v2 import EasyToolV2
-from .reflexion import Reflexion
 from .vision_agent import VisionAgent
diff --git a/vision_agent/agent/agent_coder.py b/vision_agent/agent/agent_coder.py
@@ -19,7 +19,7 @@
 from vision_agent.llm import LLM, OpenAILLM
 from vision_agent.lmm import LMM, OpenAILMM
 from vision_agent.tools import TOOL_DOCSTRING, UTILITIES_DOCSTRING
-from vision_agent.utils import Execute
+from vision_agent.utils import CodeInterpreterFactory
 
 IMPORT_HELPER = """
 import math
@@ -42,7 +42,7 @@
 """
 logging.basicConfig(stream=sys.stdout)
 _LOGGER = logging.getLogger(__name__)
-_EXECUTE = Execute()
+_EXECUTE = CodeInterpreterFactory.get_default_instance()
 _CONSOLE = Console()
 
 
@@ -94,8 +94,8 @@ def write_debug(question: str, code: str, feedback: str, model: LLM) -> str:
 
 def execute_tests(code: str, tests: str) -> Dict[str, Union[str, bool]]:
  full_code = f"{IMPORT_HELPER}\n{code}\n{tests}"
- success, result = _EXECUTE.run_isolation(full_code)
- return {"code": code, "result": result, "passed": success}
+ result = _EXECUTE.exec_isolation(full_code)
+ return {"code": code, "result": result.text(), "passed": result.success}
 
 
 def run_visual_tests(

diff --git a/vision_agent/agent/data_interpreter.py b/vision_agent/agent/data_interpreter.py
@@ -26,11 +26,12 @@
 )
 from vision_agent.llm import LLM, OpenAILLM
 from vision_agent.tools import TOOL_DESCRIPTIONS, TOOLS_DF
-from vision_agent.utils import Execute, Sim
+from vision_agent.utils import CodeInterpreter, CodeInterpreterFactory, Execution, Sim
 
 logging.basicConfig(level=logging.INFO)
 _LOGGER = logging.getLogger(__name__)
 _MAX_TABULATE_COL_WIDTH = 80
+_EXECUTE = CodeInterpreterFactory.get_default_instance()
 _CONSOLE = Console()
 
 
@@ -163,20 +164,21 @@ def write_and_exec_code(
  code_writer_call: Callable[..., str],
  model: LLM,
  tool_info: str,
- exec: Execute,
+ exec: CodeInterpreter,
  retrieved_ltm: str,
  log_progress: Callable[[Dict[str, Any]], None],
  max_retry: int = 3,
  verbosity: int = 0,
-) -> Tuple[bool, str, str, Dict[str, List[str]]]:
+) -> Tuple[bool, str, Execution, Dict[str, List[str]]]:
  success = False
  counter = 0
  reflection = ""
 
  code = code_writer_call(
  user_req, subtask, retrieved_ltm, tool_info, orig_code, model
  )
- success, result = exec.run_isolation(code)
+ result = exec.exec_isolation(code)
+ success = result.success
  if verbosity == 2:
  _CONSOLE.print(Syntax(code, "python", theme="gruvbox-dark", line_numbers=True))
  log_progress(
@@ -193,10 +195,10 @@ def write_and_exec_code(
  log_progress(
  {
  "log": "Result:",
- "result": str(result),
+ "result": result.to_json(),
  }
  )
- _LOGGER.info(f"\tCode success: {success}, result: {str(result)}")
+ _LOGGER.info(f"\tCode success: {success}, result: {result.text(False)}")
  working_memory: Dict[str, List[str]] = {}
  while not success and counter < max_retry:
  if subtask not in working_memory:
@@ -210,13 +212,13 @@ def write_and_exec_code(
  )
  else:
  working_memory[subtask].append(
- PREV_CODE_CONTEXT.format(code=code, result=result)
+ PREV_CODE_CONTEXT.format(code=code, result=result.text())
  )
 
  code, reflection = debug_code(
  user_req, subtask, retrieved_ltm, "\n".join(working_memory[subtask]), model
  )
- success, result = exec.run_isolation(code)
+ result = exec.exec_isolation(code)
  counter += 1
  if verbosity == 2:
  _CONSOLE.print(
@@ -231,27 +233,29 @@ def write_and_exec_code(
  log_progress(
  {
  "log": "Result:",
- "result": result,
+ "result": result.to_json(),
  }
  )
- _LOGGER.info(f"\tDebugging reflection: {reflection}, result: {result}")
+ _LOGGER.info(
+ f"\tDebugging reflection: {reflection}, result: {result.text(False)}"
+ )
 
  if success:
  working_memory[subtask].append(
  PREV_CODE_CONTEXT_WITH_REFLECTION.format(
- reflection=reflection, code=code, result=result
+ reflection=reflection, code=code, result=result.text()
  )
  )
 
- return success, code, result, working_memory
+ return result.success, code, result, working_memory
 
 
 @traceable(name="plan execution")
 def run_plan(
  user_req: str,
  plan: List[Dict[str, Any]],
  coder: LLM,
- exec: Execute,
+ exec: CodeInterpreter,
  code: str,
  tool_recommender: Sim,
  log_progress: Callable[[Dict[str, Any]], None],
@@ -316,10 +320,10 @@ def run_plan(
  log_progress(
  {
  "log": "Result:",
- "result": str(result),
+ "result": result.to_json(),
  }
  )
- _LOGGER.info(f"\tCode success: {success} result: {str(result)}")
+ _LOGGER.info(f"\tCode success: {success} result: {result.text(False)}")
 
  task["success"] = success
  task["result"] = result
@@ -360,7 +364,7 @@ def __init__(
  ) -> None:
  self.planner = OpenAILLM(temperature=0.0, json_mode=True)
  self.coder = OpenAILLM(temperature=0.0)
- self.exec = Execute(timeout=timeout)
+ self.exec = _EXECUTE
  self.report_progress_callback = report_progress_callback
  if tool_recommender is None:
  self.tool_recommender = Sim(TOOLS_DF, sim_key="desc")

diff --git a/vision_agent/agent/vision_agent.py b/vision_agent/agent/vision_agent.py
@@ -6,6 +6,7 @@
 from typing import Any, Callable, Dict, List, Optional, Union, cast
 
 from rich.console import Console
+from rich.style import Style
 from rich.syntax import Syntax
 from tabulate import tabulate
 
@@ -23,13 +24,13 @@
 )
 from vision_agent.llm import LLM, OpenAILLM
 from vision_agent.lmm import LMM, OpenAILMM
-from vision_agent.utils import Execute
+from vision_agent.utils import CodeInterpreterFactory, Execution
 from vision_agent.utils.sim import Sim
 
 logging.basicConfig(stream=sys.stdout)
 _LOGGER = logging.getLogger(__name__)
 _MAX_TABULATE_COL_WIDTH = 80
-_EXECUTE = Execute(600)
+_EXECUTE = CodeInterpreterFactory.get_default_instance()
 _CONSOLE = Console()
 _DEFAULT_IMPORT = "\n".join(T.__new_tools__)
 
@@ -157,28 +158,27 @@ def write_and_test_code(
  },
  }
  )
- success, result = _EXECUTE.run_isolation(f"{_DEFAULT_IMPORT}\n{code}\n{test}")
+ result = _EXECUTE.exec_isolation(f"{_DEFAULT_IMPORT}\n{code}\n{test}")
  log_progress(
  {
  "type": "code",
- "status": "completed" if success else "failed",
+ "status": "completed" if result.success else "failed",
  "payload": {
  "code": code,
  "test": test,
- "result": result,
+ "result": result.to_json(),
  },
  }
  )
  if verbosity == 2:
- _LOGGER.info("Initial code and tests:")
- _CONSOLE.print(
- Syntax(f"{code}\n{test}", "python", theme="gruvbox-dark", line_numbers=True)
+ _print_code("Initial code and tests:", code, test)
+ _LOGGER.info(
+ f"Initial code execution result:\n{result.text(include_logs=False)}"
  )
- _LOGGER.info(f"Initial result: {result}")
 
  count = 0
  new_working_memory = []
- while not success and count < max_retries:
+ while not result.success and count < max_retries:
  log_progress(
  {
  "type": "code",
@@ -188,7 +188,7 @@ def write_and_test_code(
  fixed_code_and_test = extract_json(
  debugger(
  FIX_BUG.format(
- code=code, tests=test, result=result, feedback=working_memory
+ code=code, tests=test, result=result.text(), feedback=working_memory
  )
  )
  )
@@ -210,46 +210,49 @@ def write_and_test_code(
  {"code": f"{code}\n{test}", "feedback": fixed_code_and_test["reflections"]}
  )
 
- success, result = _EXECUTE.run_isolation(f"{_DEFAULT_IMPORT}\n{code}\n{test}")
+ result = _EXECUTE.exec_isolation(f"{_DEFAULT_IMPORT}\n{code}\n{test}")
  log_progress(
  {
  "type": "code",
- "status": "completed" if success else "failed",
+ "status": "completed" if result.success else "failed",
  "payload": {
  "code": code,
  "test": test,
- "result": result,
+ "result": result.to_json(),
  },
  }
  )
  if verbosity == 2:
  _LOGGER.info(
  f"Debug attempt {count + 1}, reflection: {fixed_code_and_test['reflections']}"
  )
- _CONSOLE.print(
- Syntax(
- f"{code}\n{test}", "python", theme="gruvbox-dark", line_numbers=True
- )
+ _print_code("Code and test after attempted fix:", code, test)
+ _LOGGER.info(
+ f"Code execution result after attempted fix: {result.text(include_logs=False)}"
  )
- _LOGGER.info(f"Debug result: {result}")
  count += 1
 
  if verbosity >= 1:
- _LOGGER.info("Final code and tests:")
- _CONSOLE.print(
- Syntax(f"{code}\n{test}", "python", theme="gruvbox-dark", line_numbers=True)
- )
- _LOGGER.info(f"Final Result: {result}")
+ _print_code("Final code and tests:", code, test)
 
  return {
  "code": code,
  "test": test,
- "success": success,
+ "success": result.success,
  "test_result": result,
  "working_memory": new_working_memory,
  }
 
 
+def _print_code(title: str, code: str, test: Optional[str] = None) -> None:
+ _CONSOLE.print(title, style=Style(bgcolor="dark_orange3", bold=True))
+ _CONSOLE.print("=" * 30 + " Code " + "=" * 30)
+ _CONSOLE.print(Syntax(code, "python", theme="gruvbox-dark", line_numbers=True))
+ if test:
+ _CONSOLE.print("=" * 30 + " Test " + "=" * 30)
+ _CONSOLE.print(Syntax(test, "python", theme="gruvbox-dark", line_numbers=True))
+
+
 def retrieve_tools(
  plan: List[Dict[str, str]],
  tool_recommender: Sim,
@@ -279,8 +282,10 @@ def retrieve_tools(
  "payload": tool_list,
  }
  )
+
  if verbosity == 2:
- _LOGGER.info(f"Tools: {tool_desc}")
+ tool_desc_str = "\n".join(tool_desc)
+ _LOGGER.info(f"Tools Description:\n{tool_desc_str}")
  tool_info_set = set(tool_info)
  return "\n\n".join(tool_info_set)
 
@@ -386,10 +391,11 @@ def chat_with_workflow(
  and working memory of the agent.
  """
 
- if len(chat) == 0:
+ if not chat:
  raise ValueError("Chat cannot be empty.")
 
  if media is not None:
+ media = _EXECUTE.upload_file(media)
  for chat_i in chat:
  if chat_i["role"] == "user":
  chat_i["content"] += f" Image name {media}"
@@ -497,7 +503,7 @@ def chat_with_workflow(
  "payload": {
  "code": code,
  "test": test,
- "result": results["test_result"],
+ "result": cast(Execution, results["test_result"]).to_json(),
  },
  }
  )
@@ -513,4 +519,3 @@ def chat_with_workflow(
  def log_progress(self, data: Dict[str, Any]) -> None:
  if self.report_progress_callback is not None:
  self.report_progress_callback(data)
- pass
diff --git a/vision_agent/utils/__init__.py b/vision_agent/utils/__init__.py
@@ -1,3 +1,10 @@
-from .execute import Execute
+from .execute import (
+ CodeInterpreter,
+ CodeInterpreterFactory,
+ Error,
+ Execution,
+ Logs,
+ Result,
+)
 from .sim import Sim, load_sim, merge_sim
 from .video import extract_frames_from_video