Skip to content

Commit

Permalink
Support remote code execution and code sandbox (#103)
Browse files Browse the repository at this point in the history
1. Introduce data models for Execution, Result, Error, Logs modeling the code execution result of local and remote code execution.
2. Introduce a baseclass for CodeInterpreter abstraction
3. Refactor the current Execute class to conform the CodeInterpreter interface, and renamed it to LocalCodeInterpreter
4. Update all the existing client code (i.e. agent) that uses LocalCodeInterpreter
5. Add a new E2BCodeInterpreter
  • Loading branch information
humpydonkey authored May 31, 2024
1 parent 2b6bd80 commit db3dc68
Show file tree
Hide file tree
Showing 8 changed files with 1,169 additions and 120 deletions.
586 changes: 585 additions & 1 deletion poetry.lock

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ nbformat = "^5.10.4"
rich = "^13.7.1"
langsmith = "^0.1.58"
ipykernel = "^6.29.4"
e2b = "^0.17.0"
e2b-code-interpreter = "^0.0.7"
tenacity = "^8.3.0"

[tool.poetry.group.dev.dependencies]
autoflake = "1.*"
Expand Down Expand Up @@ -93,4 +96,6 @@ module = [
"openai.*",
"sentence_transformers.*",
"moviepy.*",
"e2b_code_interpreter.*",
"e2b.*",
]
5 changes: 0 additions & 5 deletions vision_agent/agent/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,2 @@
from .agent import Agent
from .agent_coder import AgentCoder
from .data_interpreter import DataInterpreter
from .easytool import EasyTool
from .easytool_v2 import EasyToolV2
from .reflexion import Reflexion
from .vision_agent import VisionAgent
8 changes: 4 additions & 4 deletions vision_agent/agent/agent_coder.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from vision_agent.llm import LLM, OpenAILLM
from vision_agent.lmm import LMM, OpenAILMM
from vision_agent.tools import TOOL_DOCSTRING, UTILITIES_DOCSTRING
from vision_agent.utils import Execute
from vision_agent.utils import CodeInterpreterFactory

IMPORT_HELPER = """
import math
Expand All @@ -42,7 +42,7 @@
"""
logging.basicConfig(stream=sys.stdout)
_LOGGER = logging.getLogger(__name__)
_EXECUTE = Execute()
_EXECUTE = CodeInterpreterFactory.get_default_instance()
_CONSOLE = Console()


Expand Down Expand Up @@ -94,8 +94,8 @@ def write_debug(question: str, code: str, feedback: str, model: LLM) -> str:

def execute_tests(code: str, tests: str) -> Dict[str, Union[str, bool]]:
full_code = f"{IMPORT_HELPER}\n{code}\n{tests}"
success, result = _EXECUTE.run_isolation(full_code)
return {"code": code, "result": result, "passed": success}
result = _EXECUTE.exec_isolation(full_code)
return {"code": code, "result": result.text(), "passed": result.success}


def run_visual_tests(
Expand Down
36 changes: 20 additions & 16 deletions vision_agent/agent/data_interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,12 @@
)
from vision_agent.llm import LLM, OpenAILLM
from vision_agent.tools import TOOL_DESCRIPTIONS, TOOLS_DF
from vision_agent.utils import Execute, Sim
from vision_agent.utils import CodeInterpreter, CodeInterpreterFactory, Execution, Sim

logging.basicConfig(level=logging.INFO)
_LOGGER = logging.getLogger(__name__)
_MAX_TABULATE_COL_WIDTH = 80
_EXECUTE = CodeInterpreterFactory.get_default_instance()
_CONSOLE = Console()


Expand Down Expand Up @@ -163,20 +164,21 @@ def write_and_exec_code(
code_writer_call: Callable[..., str],
model: LLM,
tool_info: str,
exec: Execute,
exec: CodeInterpreter,
retrieved_ltm: str,
log_progress: Callable[[Dict[str, Any]], None],
max_retry: int = 3,
verbosity: int = 0,
) -> Tuple[bool, str, str, Dict[str, List[str]]]:
) -> Tuple[bool, str, Execution, Dict[str, List[str]]]:
success = False
counter = 0
reflection = ""

code = code_writer_call(
user_req, subtask, retrieved_ltm, tool_info, orig_code, model
)
success, result = exec.run_isolation(code)
result = exec.exec_isolation(code)
success = result.success
if verbosity == 2:
_CONSOLE.print(Syntax(code, "python", theme="gruvbox-dark", line_numbers=True))
log_progress(
Expand All @@ -193,10 +195,10 @@ def write_and_exec_code(
log_progress(
{
"log": "Result:",
"result": str(result),
"result": result.to_json(),
}
)
_LOGGER.info(f"\tCode success: {success}, result: {str(result)}")
_LOGGER.info(f"\tCode success: {success}, result: {result.text(False)}")
working_memory: Dict[str, List[str]] = {}
while not success and counter < max_retry:
if subtask not in working_memory:
Expand All @@ -210,13 +212,13 @@ def write_and_exec_code(
)
else:
working_memory[subtask].append(
PREV_CODE_CONTEXT.format(code=code, result=result)
PREV_CODE_CONTEXT.format(code=code, result=result.text())
)

code, reflection = debug_code(
user_req, subtask, retrieved_ltm, "\n".join(working_memory[subtask]), model
)
success, result = exec.run_isolation(code)
result = exec.exec_isolation(code)
counter += 1
if verbosity == 2:
_CONSOLE.print(
Expand All @@ -231,27 +233,29 @@ def write_and_exec_code(
log_progress(
{
"log": "Result:",
"result": result,
"result": result.to_json(),
}
)
_LOGGER.info(f"\tDebugging reflection: {reflection}, result: {result}")
_LOGGER.info(
f"\tDebugging reflection: {reflection}, result: {result.text(False)}"
)

if success:
working_memory[subtask].append(
PREV_CODE_CONTEXT_WITH_REFLECTION.format(
reflection=reflection, code=code, result=result
reflection=reflection, code=code, result=result.text()
)
)

return success, code, result, working_memory
return result.success, code, result, working_memory


@traceable(name="plan execution")
def run_plan(
user_req: str,
plan: List[Dict[str, Any]],
coder: LLM,
exec: Execute,
exec: CodeInterpreter,
code: str,
tool_recommender: Sim,
log_progress: Callable[[Dict[str, Any]], None],
Expand Down Expand Up @@ -316,10 +320,10 @@ def run_plan(
log_progress(
{
"log": "Result:",
"result": str(result),
"result": result.to_json(),
}
)
_LOGGER.info(f"\tCode success: {success} result: {str(result)}")
_LOGGER.info(f"\tCode success: {success} result: {result.text(False)}")

task["success"] = success
task["result"] = result
Expand Down Expand Up @@ -360,7 +364,7 @@ def __init__(
) -> None:
self.planner = OpenAILLM(temperature=0.0, json_mode=True)
self.coder = OpenAILLM(temperature=0.0)
self.exec = Execute(timeout=timeout)
self.exec = _EXECUTE
self.report_progress_callback = report_progress_callback
if tool_recommender is None:
self.tool_recommender = Sim(TOOLS_DF, sim_key="desc")
Expand Down
63 changes: 34 additions & 29 deletions vision_agent/agent/vision_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typing import Any, Callable, Dict, List, Optional, Union, cast

from rich.console import Console
from rich.style import Style
from rich.syntax import Syntax
from tabulate import tabulate

Expand All @@ -23,13 +24,13 @@
)
from vision_agent.llm import LLM, OpenAILLM
from vision_agent.lmm import LMM, OpenAILMM
from vision_agent.utils import Execute
from vision_agent.utils import CodeInterpreterFactory, Execution
from vision_agent.utils.sim import Sim

logging.basicConfig(stream=sys.stdout)
_LOGGER = logging.getLogger(__name__)
_MAX_TABULATE_COL_WIDTH = 80
_EXECUTE = Execute(600)
_EXECUTE = CodeInterpreterFactory.get_default_instance()
_CONSOLE = Console()
_DEFAULT_IMPORT = "\n".join(T.__new_tools__)

Expand Down Expand Up @@ -157,28 +158,27 @@ def write_and_test_code(
},
}
)
success, result = _EXECUTE.run_isolation(f"{_DEFAULT_IMPORT}\n{code}\n{test}")
result = _EXECUTE.exec_isolation(f"{_DEFAULT_IMPORT}\n{code}\n{test}")
log_progress(
{
"type": "code",
"status": "completed" if success else "failed",
"status": "completed" if result.success else "failed",
"payload": {
"code": code,
"test": test,
"result": result,
"result": result.to_json(),
},
}
)
if verbosity == 2:
_LOGGER.info("Initial code and tests:")
_CONSOLE.print(
Syntax(f"{code}\n{test}", "python", theme="gruvbox-dark", line_numbers=True)
_print_code("Initial code and tests:", code, test)
_LOGGER.info(
f"Initial code execution result:\n{result.text(include_logs=False)}"
)
_LOGGER.info(f"Initial result: {result}")

count = 0
new_working_memory = []
while not success and count < max_retries:
while not result.success and count < max_retries:
log_progress(
{
"type": "code",
Expand All @@ -188,7 +188,7 @@ def write_and_test_code(
fixed_code_and_test = extract_json(
debugger(
FIX_BUG.format(
code=code, tests=test, result=result, feedback=working_memory
code=code, tests=test, result=result.text(), feedback=working_memory
)
)
)
Expand All @@ -210,46 +210,49 @@ def write_and_test_code(
{"code": f"{code}\n{test}", "feedback": fixed_code_and_test["reflections"]}
)

success, result = _EXECUTE.run_isolation(f"{_DEFAULT_IMPORT}\n{code}\n{test}")
result = _EXECUTE.exec_isolation(f"{_DEFAULT_IMPORT}\n{code}\n{test}")
log_progress(
{
"type": "code",
"status": "completed" if success else "failed",
"status": "completed" if result.success else "failed",
"payload": {
"code": code,
"test": test,
"result": result,
"result": result.to_json(),
},
}
)
if verbosity == 2:
_LOGGER.info(
f"Debug attempt {count + 1}, reflection: {fixed_code_and_test['reflections']}"
)
_CONSOLE.print(
Syntax(
f"{code}\n{test}", "python", theme="gruvbox-dark", line_numbers=True
)
_print_code("Code and test after attempted fix:", code, test)
_LOGGER.info(
f"Code execution result after attempted fix: {result.text(include_logs=False)}"
)
_LOGGER.info(f"Debug result: {result}")
count += 1

if verbosity >= 1:
_LOGGER.info("Final code and tests:")
_CONSOLE.print(
Syntax(f"{code}\n{test}", "python", theme="gruvbox-dark", line_numbers=True)
)
_LOGGER.info(f"Final Result: {result}")
_print_code("Final code and tests:", code, test)

return {
"code": code,
"test": test,
"success": success,
"success": result.success,
"test_result": result,
"working_memory": new_working_memory,
}


def _print_code(title: str, code: str, test: Optional[str] = None) -> None:
_CONSOLE.print(title, style=Style(bgcolor="dark_orange3", bold=True))
_CONSOLE.print("=" * 30 + " Code " + "=" * 30)
_CONSOLE.print(Syntax(code, "python", theme="gruvbox-dark", line_numbers=True))
if test:
_CONSOLE.print("=" * 30 + " Test " + "=" * 30)
_CONSOLE.print(Syntax(test, "python", theme="gruvbox-dark", line_numbers=True))


def retrieve_tools(
plan: List[Dict[str, str]],
tool_recommender: Sim,
Expand Down Expand Up @@ -279,8 +282,10 @@ def retrieve_tools(
"payload": tool_list,
}
)

if verbosity == 2:
_LOGGER.info(f"Tools: {tool_desc}")
tool_desc_str = "\n".join(tool_desc)
_LOGGER.info(f"Tools Description:\n{tool_desc_str}")
tool_info_set = set(tool_info)
return "\n\n".join(tool_info_set)

Expand Down Expand Up @@ -386,10 +391,11 @@ def chat_with_workflow(
and working memory of the agent.
"""

if len(chat) == 0:
if not chat:
raise ValueError("Chat cannot be empty.")

if media is not None:
media = _EXECUTE.upload_file(media)
for chat_i in chat:
if chat_i["role"] == "user":
chat_i["content"] += f" Image name {media}"
Expand Down Expand Up @@ -497,7 +503,7 @@ def chat_with_workflow(
"payload": {
"code": code,
"test": test,
"result": results["test_result"],
"result": cast(Execution, results["test_result"]).to_json(),
},
}
)
Expand All @@ -513,4 +519,3 @@ def chat_with_workflow(
def log_progress(self, data: Dict[str, Any]) -> None:
if self.report_progress_callback is not None:
self.report_progress_callback(data)
pass
9 changes: 8 additions & 1 deletion vision_agent/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
from .execute import Execute
from .execute import (
CodeInterpreter,
CodeInterpreterFactory,
Error,
Execution,
Logs,
Result,
)
from .sim import Sim, load_sim, merge_sim
from .video import extract_frames_from_video
Loading

0 comments on commit db3dc68

Please sign in to comment.