Skip to content

Commit

Permalink
Better exception handling for remote code sandbox (#160)
Browse files Browse the repository at this point in the history
* Introudce new exceptions for remote sandbox errors

* Bug fix: avoid double close the sandbox; Catch exception in E2B close()

* Better handle failure when checking sandbox liveness

* Add ntoebook.close

* Add missing file

* Bump e2b version; Add timeout to _check_sandbox_liveness
  • Loading branch information
humpydonkey authored Jul 3, 2024
1 parent 80b747f commit 30fd01f
Show file tree
Hide file tree
Showing 6 changed files with 124 additions and 44 deletions.
18 changes: 9 additions & 9 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ rich = "^13.7.1"
langsmith = "^0.1.58"
ipykernel = "^6.29.4"
e2b = "^0.17.1"
e2b-code-interpreter = "0.0.11a1"
e2b-code-interpreter = "0.0.11a2"
tenacity = "^8.3.0"
pillow-heif = "^0.16.0"
pytube = "15.0.0"
Expand Down
11 changes: 10 additions & 1 deletion vision_agent/agent/vision_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,7 @@ def __init__(
tool_recommender: Optional[Sim] = None,
verbosity: int = 0,
report_progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None,
code_sandbox_runtime: Optional[str] = None,
) -> None:
"""Initialize the Vision Agent.
Expand All @@ -487,6 +488,11 @@ def __init__(
This is useful for streaming logs in a web application where multiple
VisionAgent instances are running in parallel. This callback ensures
that the progress are not mixed up.
code_sandbox_runtime: the code sandbox runtime to use. A code sandbox is
used to run the generated code. It can be one of the following
values: None, "local" or "e2b". If None, Vision Agent will read the
value from the environment variable CODE_SANDBOX_RUNTIME. If it's
also None, the local python runtime environment will be used.
"""

self.planner = (
Expand All @@ -506,6 +512,7 @@ def __init__(
self.verbosity = verbosity
self.max_retries = 2
self.report_progress_callback = report_progress_callback
self.code_sandbox_runtime = code_sandbox_runtime

def __call__(
self,
Expand Down Expand Up @@ -560,7 +567,9 @@ def chat_with_workflow(
raise ValueError("Chat cannot be empty.")

# NOTE: each chat should have a dedicated code interpreter instance to avoid concurrency issues
with CodeInterpreterFactory.new_instance() as code_interpreter:
with CodeInterpreterFactory.new_instance(
code_sandbox_runtime=self.code_sandbox_runtime
) as code_interpreter:
chat = copy.deepcopy(chat)
media_list = []
for chat_i in chat:
Expand Down
42 changes: 42 additions & 0 deletions vision_agent/utils/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"""Vision Agent exceptions."""


class InvalidApiKeyError(Exception):
"""Exception raised when the an invalid API key is provided. This error could be raised from any SDK code, not limited to a HTTP client."""

def __init__(self, message: str):
self.message = f"""{message}
For more information, see https://landing-ai.github.io/landingai-python/landingai.html#manage-api-credentials"""
super().__init__(self.message)

def __str__(self) -> str:
return self.message


class RemoteSandboxError(Exception):
"""Exception related to remote sandbox."""

is_retryable = False


class RemoteSandboxCreationError(RemoteSandboxError):
"""Exception raised when failed to create a remote sandbox.
This could be due to the remote sandbox service is unavailable.
"""

is_retryable = False


class RemoteSandboxExecutionError(RemoteSandboxError):
"""Exception raised when failed in a remote sandbox code execution."""

is_retryable = False


class RemoteSandboxClosedError(RemoteSandboxError):
"""Exception raised when a remote sandbox is dead.
This is retryable in the sense that the user can try again with a new sandbox. Can't be retried in the same sandbox.
When this error is raised, the user should retry by create a new VisionAgent (i.e. a new sandbox).
"""

is_retryable = True
81 changes: 60 additions & 21 deletions vision_agent/utils/execute.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import abc
import atexit
import base64
import copy
import logging
Expand All @@ -18,7 +17,6 @@
import nbformat
import tenacity
from dotenv import load_dotenv
from e2b.api.v2.client.exceptions import ServiceException
from e2b_code_interpreter import CodeInterpreter as E2BCodeInterpreterImpl
from e2b_code_interpreter import Execution as E2BExecution
from e2b_code_interpreter import Result as E2BResult
Expand All @@ -30,9 +28,15 @@
from pydantic import BaseModel, field_serializer
from typing_extensions import Self

from vision_agent.utils.exceptions import (
RemoteSandboxClosedError,
RemoteSandboxCreationError,
RemoteSandboxExecutionError,
)

load_dotenv()
_LOGGER = logging.getLogger(__name__)
_SESSION_TIMEOUT = 300 # 5 minutes
_SESSION_TIMEOUT = 600 # 10 minutes


class MimeType(str, Enum):
Expand Down Expand Up @@ -417,7 +421,15 @@ class E2BCodeInterpreter(CodeInterpreter):
def __init__(self, *args: Any, **kwargs: Any) -> None:
super().__init__(*args, **kwargs)
assert os.getenv("E2B_API_KEY"), "E2B_API_KEY environment variable must be set"
self.interpreter = E2BCodeInterpreter._new_e2b_interpreter_impl(*args, **kwargs)
try:
self.interpreter = E2BCodeInterpreter._new_e2b_interpreter_impl(
*args, **kwargs
)
except Exception as e:
raise RemoteSandboxCreationError(
f"Failed to create a remote sandbox due to {e}"
) from e

result = self.exec_cell(
"""
import platform
Expand All @@ -433,27 +445,40 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
_LOGGER.info(f"E2BCodeInterpreter initialized:\n{sys_versions}")

def close(self, *args: Any, **kwargs: Any) -> None:
self.interpreter.close()
self.interpreter.kill()
try:
self.interpreter.notebook.close()
self.interpreter.kill(request_timeout=2)
_LOGGER.info(
f"The sandbox {self.interpreter.sandbox_id} is closed successfully."
)
except Exception as e:
_LOGGER.warn(
f"Failed to close the remote sandbox ({self.interpreter.sandbox_id}) due to {e}. This is not an issue. It's likely that the sandbox is already closed due to timeout."
)

def restart_kernel(self) -> None:
self._check_sandbox_liveness()
self.interpreter.notebook.restart_kernel()

@tenacity.retry(
wait=tenacity.wait_exponential_jitter(),
stop=tenacity.stop_after_attempt(2),
# TODO: change TimeoutError to a more specific exception when e2b team provides more granular retryable exceptions
retry=tenacity.retry_if_exception_type(TimeoutError),
)
def exec_cell(self, code: str) -> Execution:
if not self.interpreter.is_running():
raise ConnectionResetError(
"Remote sandbox is closed unexpectedly. Please retry the operation."
)
self._check_sandbox_liveness()
self.interpreter.set_timeout(_SESSION_TIMEOUT) # Extend the life of the sandbox
execution = self.interpreter.notebook.exec_cell(code, timeout=self.timeout)
return Execution.from_e2b_execution(execution)
try:
execution = self.interpreter.notebook.exec_cell(code, timeout=self.timeout)
return Execution.from_e2b_execution(execution)
except Exception as e:
raise RemoteSandboxExecutionError(
f"Failed executing code in remote sandbox due to {e}: {code}"
) from e

def upload_file(self, file: Union[str, Path]) -> str:
self._check_sandbox_liveness()
file_name = Path(file).name
remote_path = f"/home/user/{file_name}"
with open(file, "rb") as f:
Expand All @@ -462,17 +487,26 @@ def upload_file(self, file: Union[str, Path]) -> str:
return remote_path

def download_file(self, file_path: str) -> Path:
self._check_sandbox_liveness()
with tempfile.NamedTemporaryFile(mode="w+b", delete=False) as file:
file.write(self.interpreter.files.read(path=file_path, format="bytes"))
_LOGGER.info(f"File ({file_path}) is downloaded to: {file.name}")
return Path(file.name)

def _check_sandbox_liveness(self) -> None:
try:
alive = self.interpreter.is_running(request_timeout=2)
except Exception as e:
_LOGGER.error(
f"Failed to check the health of the remote sandbox ({self.interpreter.sandbox_id}) due to {e}. Consider the sandbox as dead."
)
alive = False
if not alive:
raise RemoteSandboxClosedError(
"Remote sandbox is closed unexpectedly. Please start a new VisionAgent instance."
)

@staticmethod
@tenacity.retry(
wait=tenacity.wait_exponential_jitter(),
stop=tenacity.stop_after_delay(60),
retry=tenacity.retry_if_exception_type(ServiceException),
)
def _new_e2b_interpreter_impl(*args, **kwargs) -> E2BCodeInterpreterImpl: # type: ignore
return E2BCodeInterpreterImpl(template="va-sandbox", *args, **kwargs)

Expand Down Expand Up @@ -564,12 +598,17 @@ def get_default_instance() -> CodeInterpreter:
return instance

@staticmethod
def new_instance() -> CodeInterpreter:
if os.getenv("CODE_SANDBOX_RUNTIME") == "e2b":
def new_instance(code_sandbox_runtime: Optional[str] = None) -> CodeInterpreter:
if not code_sandbox_runtime:
code_sandbox_runtime = os.getenv("CODE_SANDBOX_RUNTIME", "local")
if code_sandbox_runtime == "e2b":
instance: CodeInterpreter = E2BCodeInterpreter(timeout=_SESSION_TIMEOUT)
else:
elif code_sandbox_runtime == "local":
instance = LocalCodeInterpreter(timeout=_SESSION_TIMEOUT)
atexit.register(instance.close)
else:
raise ValueError(
f"Unsupported code sandbox runtime: {code_sandbox_runtime}. Supported runtimes: e2b, local"
)
return instance


Expand Down
14 changes: 2 additions & 12 deletions vision_agent/utils/type_defs.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from pydantic import Field, field_validator
from pydantic_settings import BaseSettings

from vision_agent.utils.exceptions import InvalidApiKeyError


class LandingaiAPIKey(BaseSettings):
"""The API key of a user in a particular organization in LandingLens.
Expand Down Expand Up @@ -34,15 +36,3 @@ class Config:
env_prefix = "landingai_"
case_sensitive = False
extra = "ignore"


class InvalidApiKeyError(Exception):
"""Exception raised when the an invalid API key is provided. This error could be raised from any SDK code, not limited to a HTTP client."""

def __init__(self, message: str):
self.message = f"""{message}
For more information, see https://landing-ai.github.io/landingai-python/landingai.html#manage-api-credentials"""
super().__init__(self.message)

def __str__(self) -> str:
return self.message

0 comments on commit 30fd01f

Please sign in to comment.