diff --git a/vision_agent/agent/vision_agent.py b/vision_agent/agent/vision_agent.py index 203dbf7b..6e1621f0 100644 --- a/vision_agent/agent/vision_agent.py +++ b/vision_agent/agent/vision_agent.py @@ -195,9 +195,8 @@ def __init__( agent: Optional[LMM] = None, verbosity: int = 0, local_artifacts_path: Optional[Union[str, Path]] = None, - code_sandbox_runtime: Optional[str] = None, callback_message: Optional[Callable[[Dict[str, Any]], None]] = None, - code_interpreter: Optional[CodeInterpreter] = None, + code_interpreter: Optional[Union[str, CodeInterpreter]] = None, ) -> None: """Initialize the VisionAgent. @@ -207,14 +206,17 @@ def __init__( verbosity (int): The verbosity level of the agent. local_artifacts_path (Optional[Union[str, Path]]): The path to the local artifacts file. - code_sandbox_runtime (Optional[str]): The code sandbox runtime to use. - code_interpreter (Optional[CodeInterpreter]): if not None, use this CodeInterpreter + callback_message (Optional[Callable[[Dict[str, Any]], None]]): Callback + function to send intermediate update messages. + code_interpreter (Optional[Union[str, CodeInterpreter]]): For string values + it can be one of: None, "local" or "e2b". If None, it will read from + the environment variable "CODE_SANDBOX_RUNTIME". If a CodeInterpreter + object is provided it will use that. """ self.agent = AnthropicLMM(temperature=0.0) if agent is None else agent self.max_iterations = 12 self.verbosity = verbosity - self.code_sandbox_runtime = code_sandbox_runtime self.code_interpreter = code_interpreter self.callback_message = callback_message if self.verbosity >= 1: @@ -305,11 +307,13 @@ def chat_with_artifacts( # this is setting remote artifacts path artifacts = Artifacts(WORKSPACE / "artifacts.pkl") + # NOTE: each chat should have a dedicated code interpreter instance to avoid concurrency issues code_interpreter = ( self.code_interpreter if self.code_interpreter is not None + and not isinstance(self.code_interpreter, str) else CodeInterpreterFactory.new_instance( - code_sandbox_runtime=self.code_sandbox_runtime, + code_sandbox_runtime=self.code_interpreter, ) ) with code_interpreter: @@ -498,8 +502,8 @@ def __init__( agent: Optional[LMM] = None, verbosity: int = 0, local_artifacts_path: Optional[Union[str, Path]] = None, - code_sandbox_runtime: Optional[str] = None, callback_message: Optional[Callable[[Dict[str, Any]], None]] = None, + code_interpreter: Optional[Union[str, CodeInterpreter]] = None, ) -> None: """Initialize the VisionAgent using OpenAI LMMs. @@ -509,7 +513,12 @@ def __init__( verbosity (int): The verbosity level of the agent. local_artifacts_path (Optional[Union[str, Path]]): The path to the local artifacts file. - code_sandbox_runtime (Optional[str]): The code sandbox runtime to use. + callback_message (Optional[Callable[[Dict[str, Any]], None]]): Callback + function to send intermediate update messages. + code_interpreter (Optional[Union[str, CodeInterpreter]]): For string values + it can be one of: None, "local" or "e2b". If None, it will read from + the environment variable "CODE_SANDBOX_RUNTIME". If a CodeInterpreter + object is provided it will use that. """ agent = OpenAILMM(temperature=0.0, json_mode=True) if agent is None else agent @@ -517,8 +526,8 @@ def __init__( agent, verbosity, local_artifacts_path, - code_sandbox_runtime, callback_message, + code_interpreter, ) @@ -528,8 +537,8 @@ def __init__( agent: Optional[LMM] = None, verbosity: int = 0, local_artifacts_path: Optional[Union[str, Path]] = None, - code_sandbox_runtime: Optional[str] = None, callback_message: Optional[Callable[[Dict[str, Any]], None]] = None, + code_interpreter: Optional[Union[str, CodeInterpreter]] = None, ) -> None: """Initialize the VisionAgent using Anthropic LMMs. @@ -539,7 +548,12 @@ def __init__( verbosity (int): The verbosity level of the agent. local_artifacts_path (Optional[Union[str, Path]]): The path to the local artifacts file. - code_sandbox_runtime (Optional[str]): The code sandbox runtime to use. + callback_message (Optional[Callable[[Dict[str, Any]], None]]): Callback + function to send intermediate update messages. + code_interpreter (Optional[Union[str, CodeInterpreter]]): For string values + it can be one of: None, "local" or "e2b". If None, it will read from + the environment variable "CODE_SANDBOX_RUNTIME". If a CodeInterpreter + object is provided it will use that. """ agent = AnthropicLMM(temperature=0.0) if agent is None else agent @@ -547,6 +561,6 @@ def __init__( agent, verbosity, local_artifacts_path, - code_sandbox_runtime, callback_message, + code_interpreter, ) diff --git a/vision_agent/agent/vision_agent_coder.py b/vision_agent/agent/vision_agent_coder.py index 83f6c3fc..f1246f09 100644 --- a/vision_agent/agent/vision_agent_coder.py +++ b/vision_agent/agent/vision_agent_coder.py @@ -337,7 +337,7 @@ def __init__( debugger: Optional[LMM] = None, verbosity: int = 0, report_progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None, - code_sandbox_runtime: Optional[str] = None, + code_interpreter: Optional[Union[str, CodeInterpreter]] = None, ) -> None: """Initialize the Vision Agent Coder. @@ -355,11 +355,10 @@ def __init__( in a web application where multiple VisionAgentCoder instances are running in parallel. This callback ensures that the progress are not mixed up. - code_sandbox_runtime (Optional[str]): the code sandbox runtime to use. A - code sandbox is used to run the generated code. It can be one of the - following values: None, "local" or "e2b". If None, VisionAgentCoder - will read the value from the environment variable CODE_SANDBOX_RUNTIME. - If it's also None, the local python runtime environment will be used. + code_interpreter (Optional[Union[str, CodeInterpreter]]): For string values + it can be one of: None, "local" or "e2b". If None, it will read from + the environment variable "CODE_SANDBOX_RUNTIME". If a CodeInterpreter + object is provided it will use that. """ self.planner = ( @@ -375,7 +374,7 @@ def __init__( _LOGGER.setLevel(logging.INFO) self.report_progress_callback = report_progress_callback - self.code_sandbox_runtime = code_sandbox_runtime + self.code_interpreter = code_interpreter def __call__( self, @@ -441,13 +440,15 @@ def generate_code_from_plan( raise ValueError("Chat cannot be empty.") # NOTE: each chat should have a dedicated code interpreter instance to avoid concurrency issues - with ( - code_interpreter - if code_interpreter is not None + code_interpreter = ( + self.code_interpreter + if self.code_interpreter is not None + and not isinstance(self.code_interpreter, str) else CodeInterpreterFactory.new_instance( - code_sandbox_runtime=self.code_sandbox_runtime + code_sandbox_runtime=self.code_interpreter, ) - ) as code_interpreter: + ) + with code_interpreter: chat = copy.deepcopy(chat) media_list = [] for chat_i in chat: @@ -556,9 +557,16 @@ def generate_code( if not chat: raise ValueError("Chat cannot be empty.") - with CodeInterpreterFactory.new_instance( - code_sandbox_runtime=self.code_sandbox_runtime - ) as code_interpreter: + # NOTE: each chat should have a dedicated code interpreter instance to avoid concurrency issues + code_interpreter = ( + self.code_interpreter + if self.code_interpreter is not None + and not isinstance(self.code_interpreter, str) + else CodeInterpreterFactory.new_instance( + code_sandbox_runtime=self.code_interpreter, + ) + ) + with code_interpreter: plan_context = self.planner.generate_plan( # type: ignore chat, test_multi_plan=test_multi_plan, @@ -595,7 +603,7 @@ def __init__( debugger: Optional[LMM] = None, verbosity: int = 0, report_progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None, - code_sandbox_runtime: Optional[str] = None, + code_interpreter: Optional[Union[str, CodeInterpreter]] = None, ) -> None: self.planner = ( OpenAIVisionAgentPlanner(verbosity=verbosity) @@ -610,7 +618,7 @@ def __init__( _LOGGER.setLevel(logging.INFO) self.report_progress_callback = report_progress_callback - self.code_sandbox_runtime = code_sandbox_runtime + self.code_interpreter = code_interpreter class AnthropicVisionAgentCoder(VisionAgentCoder): @@ -624,7 +632,7 @@ def __init__( debugger: Optional[LMM] = None, verbosity: int = 0, report_progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None, - code_sandbox_runtime: Optional[str] = None, + code_interpreter: Optional[Union[str, CodeInterpreter]] = None, ) -> None: # NOTE: Claude doesn't have an official JSON mode self.planner = ( @@ -640,7 +648,7 @@ def __init__( _LOGGER.setLevel(logging.INFO) self.report_progress_callback = report_progress_callback - self.code_sandbox_runtime = code_sandbox_runtime + self.code_interpreter = code_interpreter class OllamaVisionAgentCoder(VisionAgentCoder): @@ -668,6 +676,7 @@ def __init__( debugger: Optional[LMM] = None, verbosity: int = 0, report_progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None, + code_interpreter: Optional[Union[str, CodeInterpreter]] = None, ) -> None: super().__init__( planner=( @@ -692,6 +701,7 @@ def __init__( ), verbosity=verbosity, report_progress_callback=report_progress_callback, + code_interpreter=code_interpreter, ) @@ -717,6 +727,7 @@ def __init__( debugger: Optional[LMM] = None, verbosity: int = 0, report_progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None, + code_interpreter: Optional[Union[str, CodeInterpreter]] = None, ) -> None: """Initialize the Vision Agent Coder. @@ -747,4 +758,5 @@ def __init__( ), verbosity=verbosity, report_progress_callback=report_progress_callback, + code_interpreter=code_interpreter, ) diff --git a/vision_agent/agent/vision_agent_planner.py b/vision_agent/agent/vision_agent_planner.py index 1a87fe49..bb7ac3ba 100644 --- a/vision_agent/agent/vision_agent_planner.py +++ b/vision_agent/agent/vision_agent_planner.py @@ -318,7 +318,7 @@ def __init__( tool_recommender: Optional[Sim] = None, verbosity: int = 0, report_progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None, - code_sandbox_runtime: Optional[str] = None, + code_interpreter: Optional[Union[str, CodeInterpreter]] = None, ) -> None: self.planner = AnthropicLMM(temperature=0.0) if planner is None else planner self.verbosity = verbosity @@ -331,7 +331,7 @@ def __init__( else tool_recommender ) self.report_progress_callback = report_progress_callback - self.code_sandbox_runtime = code_sandbox_runtime + self.code_interpreter = code_interpreter def __call__( self, input: Union[str, List[Message]], media: Optional[Union[str, Path]] = None @@ -353,13 +353,17 @@ def generate_plan( if not chat: raise ValueError("Chat cannot be empty") - with ( + code_interpreter = ( code_interpreter if code_interpreter is not None - else CodeInterpreterFactory.new_instance( - code_sandbox_runtime=self.code_sandbox_runtime + else ( + self.code_interpreter + if not isinstance(self.code_interpreter, str) + else CodeInterpreterFactory.new_instance(self.code_interpreter) ) - ) as code_interpreter: + ) + code_interpreter = cast(CodeInterpreter, code_interpreter) + with code_interpreter: chat = copy.deepcopy(chat) media_list = [] for chat_i in chat: @@ -464,14 +468,14 @@ def __init__( tool_recommender: Optional[Sim] = None, verbosity: int = 0, report_progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None, - code_sandbox_runtime: Optional[str] = None, + code_interpreter: Optional[Union[str, CodeInterpreter]] = None, ) -> None: super().__init__( planner=AnthropicLMM(temperature=0.0) if planner is None else planner, tool_recommender=tool_recommender, verbosity=verbosity, report_progress_callback=report_progress_callback, - code_sandbox_runtime=code_sandbox_runtime, + code_interpreter=code_interpreter, ) @@ -482,7 +486,7 @@ def __init__( tool_recommender: Optional[Sim] = None, verbosity: int = 0, report_progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None, - code_sandbox_runtime: Optional[str] = None, + code_interpreter: Optional[Union[str, CodeInterpreter]] = None, ) -> None: super().__init__( planner=( @@ -493,7 +497,7 @@ def __init__( tool_recommender=tool_recommender, verbosity=verbosity, report_progress_callback=report_progress_callback, - code_sandbox_runtime=code_sandbox_runtime, + code_interpreter=code_interpreter, ) @@ -504,7 +508,7 @@ def __init__( tool_recommender: Optional[Sim] = None, verbosity: int = 0, report_progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None, - code_sandbox_runtime: Optional[str] = None, + code_interpreter: Optional[Union[str, CodeInterpreter]] = None, ) -> None: super().__init__( planner=( @@ -519,7 +523,7 @@ def __init__( ), verbosity=verbosity, report_progress_callback=report_progress_callback, - code_sandbox_runtime=code_sandbox_runtime, + code_interpreter=code_interpreter, ) @@ -530,7 +534,7 @@ def __init__( tool_recommender: Optional[Sim] = None, verbosity: int = 0, report_progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None, - code_sandbox_runtime: Optional[str] = None, + code_interpreter: Optional[Union[str, CodeInterpreter]] = None, ) -> None: super().__init__( planner=( @@ -545,5 +549,5 @@ def __init__( ), verbosity=verbosity, report_progress_callback=report_progress_callback, - code_sandbox_runtime=code_sandbox_runtime, + code_interpreter=code_interpreter, )