Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better exception handling for remote code sandbox #160

Merged
merged 6 commits into from
Jul 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ rich = "^13.7.1"
langsmith = "^0.1.58"
ipykernel = "^6.29.4"
e2b = "^0.17.1"
e2b-code-interpreter = "0.0.11a1"
e2b-code-interpreter = "0.0.11a2"
tenacity = "^8.3.0"
pillow-heif = "^0.16.0"
pytube = "15.0.0"
Expand Down
11 changes: 10 additions & 1 deletion vision_agent/agent/vision_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,7 @@ def __init__(
tool_recommender: Optional[Sim] = None,
verbosity: int = 0,
report_progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None,
code_sandbox_runtime: Optional[str] = None,
) -> None:
"""Initialize the Vision Agent.

Expand All @@ -487,6 +488,11 @@ def __init__(
This is useful for streaming logs in a web application where multiple
VisionAgent instances are running in parallel. This callback ensures
that the progress are not mixed up.
code_sandbox_runtime: the code sandbox runtime to use. A code sandbox is
used to run the generated code. It can be one of the following
values: None, "local" or "e2b". If None, Vision Agent will read the
value from the environment variable CODE_SANDBOX_RUNTIME. If it's
also None, the local python runtime environment will be used.
"""

self.planner = (
Expand All @@ -506,6 +512,7 @@ def __init__(
self.verbosity = verbosity
self.max_retries = 2
self.report_progress_callback = report_progress_callback
self.code_sandbox_runtime = code_sandbox_runtime

def __call__(
self,
Expand Down Expand Up @@ -560,7 +567,9 @@ def chat_with_workflow(
raise ValueError("Chat cannot be empty.")

# NOTE: each chat should have a dedicated code interpreter instance to avoid concurrency issues
with CodeInterpreterFactory.new_instance() as code_interpreter:
with CodeInterpreterFactory.new_instance(
code_sandbox_runtime=self.code_sandbox_runtime
) as code_interpreter:
chat = copy.deepcopy(chat)
media_list = []
for chat_i in chat:
Expand Down
42 changes: 42 additions & 0 deletions vision_agent/utils/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"""Vision Agent exceptions."""


class InvalidApiKeyError(Exception):
"""Exception raised when the an invalid API key is provided. This error could be raised from any SDK code, not limited to a HTTP client."""

def __init__(self, message: str):
self.message = f"""{message}
For more information, see https://landing-ai.github.io/landingai-python/landingai.html#manage-api-credentials"""
super().__init__(self.message)

def __str__(self) -> str:
return self.message


class RemoteSandboxError(Exception):
"""Exception related to remote sandbox."""

is_retryable = False


class RemoteSandboxCreationError(RemoteSandboxError):
"""Exception raised when failed to create a remote sandbox.
This could be due to the remote sandbox service is unavailable.
"""

is_retryable = False


class RemoteSandboxExecutionError(RemoteSandboxError):
"""Exception raised when failed in a remote sandbox code execution."""

is_retryable = False


class RemoteSandboxClosedError(RemoteSandboxError):
"""Exception raised when a remote sandbox is dead.
This is retryable in the sense that the user can try again with a new sandbox. Can't be retried in the same sandbox.
When this error is raised, the user should retry by create a new VisionAgent (i.e. a new sandbox).
"""

is_retryable = True
Comment on lines +22 to +42
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fyi, @yzld2002, new exception classes

81 changes: 60 additions & 21 deletions vision_agent/utils/execute.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import abc
import atexit
import base64
import copy
import logging
Expand All @@ -18,7 +17,6 @@
import nbformat
import tenacity
from dotenv import load_dotenv
from e2b.api.v2.client.exceptions import ServiceException
from e2b_code_interpreter import CodeInterpreter as E2BCodeInterpreterImpl
from e2b_code_interpreter import Execution as E2BExecution
from e2b_code_interpreter import Result as E2BResult
Expand All @@ -30,9 +28,15 @@
from pydantic import BaseModel, field_serializer
from typing_extensions import Self

from vision_agent.utils.exceptions import (
RemoteSandboxClosedError,
RemoteSandboxCreationError,
RemoteSandboxExecutionError,
)

load_dotenv()
_LOGGER = logging.getLogger(__name__)
_SESSION_TIMEOUT = 300 # 5 minutes
_SESSION_TIMEOUT = 600 # 10 minutes


class MimeType(str, Enum):
Expand Down Expand Up @@ -417,7 +421,15 @@ class E2BCodeInterpreter(CodeInterpreter):
def __init__(self, *args: Any, **kwargs: Any) -> None:
super().__init__(*args, **kwargs)
assert os.getenv("E2B_API_KEY"), "E2B_API_KEY environment variable must be set"
self.interpreter = E2BCodeInterpreter._new_e2b_interpreter_impl(*args, **kwargs)
try:
self.interpreter = E2BCodeInterpreter._new_e2b_interpreter_impl(
*args, **kwargs
)
except Exception as e:
raise RemoteSandboxCreationError(
f"Failed to create a remote sandbox due to {e}"
) from e

result = self.exec_cell(
"""
import platform
Expand All @@ -433,27 +445,40 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
_LOGGER.info(f"E2BCodeInterpreter initialized:\n{sys_versions}")

def close(self, *args: Any, **kwargs: Any) -> None:
self.interpreter.close()
self.interpreter.kill()
try:
self.interpreter.notebook.close()
self.interpreter.kill(request_timeout=2)
_LOGGER.info(
f"The sandbox {self.interpreter.sandbox_id} is closed successfully."
)
except Exception as e:
_LOGGER.warn(
f"Failed to close the remote sandbox ({self.interpreter.sandbox_id}) due to {e}. This is not an issue. It's likely that the sandbox is already closed due to timeout."
)

def restart_kernel(self) -> None:
self._check_sandbox_liveness()
self.interpreter.notebook.restart_kernel()

@tenacity.retry(
wait=tenacity.wait_exponential_jitter(),
stop=tenacity.stop_after_attempt(2),
# TODO: change TimeoutError to a more specific exception when e2b team provides more granular retryable exceptions
retry=tenacity.retry_if_exception_type(TimeoutError),
)
def exec_cell(self, code: str) -> Execution:
if not self.interpreter.is_running():
raise ConnectionResetError(
"Remote sandbox is closed unexpectedly. Please retry the operation."
)
self._check_sandbox_liveness()
self.interpreter.set_timeout(_SESSION_TIMEOUT) # Extend the life of the sandbox
execution = self.interpreter.notebook.exec_cell(code, timeout=self.timeout)
return Execution.from_e2b_execution(execution)
try:
execution = self.interpreter.notebook.exec_cell(code, timeout=self.timeout)
return Execution.from_e2b_execution(execution)
except Exception as e:
raise RemoteSandboxExecutionError(
f"Failed executing code in remote sandbox due to {e}: {code}"
) from e

def upload_file(self, file: Union[str, Path]) -> str:
self._check_sandbox_liveness()
file_name = Path(file).name
remote_path = f"/home/user/{file_name}"
with open(file, "rb") as f:
Expand All @@ -462,17 +487,26 @@ def upload_file(self, file: Union[str, Path]) -> str:
return remote_path

def download_file(self, file_path: str) -> Path:
self._check_sandbox_liveness()
with tempfile.NamedTemporaryFile(mode="w+b", delete=False) as file:
file.write(self.interpreter.files.read(path=file_path, format="bytes"))
_LOGGER.info(f"File ({file_path}) is downloaded to: {file.name}")
return Path(file.name)

def _check_sandbox_liveness(self) -> None:
try:
alive = self.interpreter.is_running(request_timeout=2)
except Exception as e:
_LOGGER.error(
f"Failed to check the health of the remote sandbox ({self.interpreter.sandbox_id}) due to {e}. Consider the sandbox as dead."
)
alive = False
if not alive:
raise RemoteSandboxClosedError(
"Remote sandbox is closed unexpectedly. Please start a new VisionAgent instance."
)

@staticmethod
@tenacity.retry(
wait=tenacity.wait_exponential_jitter(),
stop=tenacity.stop_after_delay(60),
retry=tenacity.retry_if_exception_type(ServiceException),
)
def _new_e2b_interpreter_impl(*args, **kwargs) -> E2BCodeInterpreterImpl: # type: ignore
return E2BCodeInterpreterImpl(template="va-sandbox", *args, **kwargs)

Expand Down Expand Up @@ -564,12 +598,17 @@ def get_default_instance() -> CodeInterpreter:
return instance

@staticmethod
def new_instance() -> CodeInterpreter:
if os.getenv("CODE_SANDBOX_RUNTIME") == "e2b":
def new_instance(code_sandbox_runtime: Optional[str] = None) -> CodeInterpreter:
if not code_sandbox_runtime:
code_sandbox_runtime = os.getenv("CODE_SANDBOX_RUNTIME", "local")
if code_sandbox_runtime == "e2b":
instance: CodeInterpreter = E2BCodeInterpreter(timeout=_SESSION_TIMEOUT)
else:
elif code_sandbox_runtime == "local":
instance = LocalCodeInterpreter(timeout=_SESSION_TIMEOUT)
atexit.register(instance.close)
else:
raise ValueError(
f"Unsupported code sandbox runtime: {code_sandbox_runtime}. Supported runtimes: e2b, local"
)
return instance


Expand Down
14 changes: 2 additions & 12 deletions vision_agent/utils/type_defs.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from pydantic import Field, field_validator
from pydantic_settings import BaseSettings

from vision_agent.utils.exceptions import InvalidApiKeyError


class LandingaiAPIKey(BaseSettings):
"""The API key of a user in a particular organization in LandingLens.
Expand Down Expand Up @@ -34,15 +36,3 @@ class Config:
env_prefix = "landingai_"
case_sensitive = False
extra = "ignore"


class InvalidApiKeyError(Exception):
"""Exception raised when the an invalid API key is provided. This error could be raised from any SDK code, not limited to a HTTP client."""

def __init__(self, message: str):
self.message = f"""{message}
For more information, see https://landing-ai.github.io/landingai-python/landingai.html#manage-api-credentials"""
super().__init__(self.message)

def __str__(self) -> str:
return self.message
Loading