diff --git a/vision_agent/agent/vision_agent.py b/vision_agent/agent/vision_agent.py index 962815aa..aea703be 100644 --- a/vision_agent/agent/vision_agent.py +++ b/vision_agent/agent/vision_agent.py @@ -218,7 +218,7 @@ def chat_with_code( ) as code_interpreter: orig_chat = copy.deepcopy(chat) int_chat = copy.deepcopy(chat) - last_user_message = chat[-1] + last_user_message_content = chat[-1].get("content") media_list = [] for chat_i in int_chat: if "media" in chat_i: @@ -267,27 +267,23 @@ def chat_with_code( orig_chat.append({"role": "observation", "content": artifacts_loaded}) self.streaming_message({"role": "observation", "content": artifacts_loaded}) - user_code_action = None - if isinstance(last_user_message.get("content"), str): - user_code_action = parse_execution( - last_user_message.get("content"), False - ) - - if user_code_action is not None: - user_result, user_obs = run_code_action( - user_code_action, code_interpreter, str(remote_artifacts_path) - ) - if self.verbosity >= 1: - _LOGGER.info(user_obs) - self.streaming_message( - { - "role": "observation", - "content": user_obs, - "execution": user_result, - "finished": True, - } - ) - finished = True + if isinstance(last_user_message_content, str): + user_code_action = parse_execution(last_user_message_content, False) + if user_code_action is not None: + user_result, user_obs = run_code_action( + user_code_action, code_interpreter, str(remote_artifacts_path) + ) + if self.verbosity >= 1: + _LOGGER.info(user_obs) + self.streaming_message( + { + "role": "observation", + "content": user_obs, + "execution": user_result, + "finished": True, + } + ) + finished = True while not finished and iterations < self.max_iterations: response = run_conversation(self.agent, int_chat)