Skip to content

Commit

Permalink
added premade responses to execute code
Browse files Browse the repository at this point in the history
  • Loading branch information
dillonalaird committed Oct 9, 2024
1 parent 22c1e0e commit 17d6790
Showing 1 changed file with 51 additions and 4 deletions.
55 changes: 51 additions & 4 deletions vision_agent/agent/vision_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,32 @@ def execute_user_code_action(
return user_result, user_obs


def add_step_descriptions(response: Dict[str, str]) -> Dict[str, str]:
response = copy.deepcopy(response)
if "response" in response:
resp_str = response["response"]
if "<execute_python>" in resp_str:
# only include descriptions for these, the rest will just have executing
# code
description_map = {
"open_code_artifact": "Reading file.",
"create_code_artifact": "Creating file.",
"edit_code_artifact": "Editing file.",
"generate_vision_code": "Generating vision code.",
"edit_vision_code": "Editing vision code.",
}
description = ""
for k, v in description_map.items():
if k in resp_str:
description += v + " "
if description == "":
description = "Executing code."
resp_str = resp_str[resp_str.find("<execute_python>") :]
resp_str = description + resp_str
response["response"] = resp_str
return response


class VisionAgent(Agent):
"""Vision Agent is an agent that can chat with the user and call tools or other
agents to generate code for it. Vision Agent uses python code to execute actions
Expand Down Expand Up @@ -335,8 +361,18 @@ def chat_with_code(
response = run_conversation(self.agent, int_chat)
if self.verbosity >= 1:
_LOGGER.info(response)
int_chat.append({"role": "assistant", "content": str(response)})
orig_chat.append({"role": "assistant", "content": str(response)})
int_chat.append(
{
"role": "assistant",
"content": str(add_step_descriptions(response)),
}
)
orig_chat.append(
{
"role": "assistant",
"content": str(add_step_descriptions(response)),
}
)

# sometimes it gets stuck in a loop, so we force it to exit
if last_response == response:
Expand Down Expand Up @@ -382,6 +418,16 @@ def chat_with_code(

obs_chat_elt: Message = {"role": "observation", "content": obs}
if media_obs and result.success:
# for view_media_artifact, we need to ensure the media is loaded
# locally so the conversation agent can actually see it
code_interpreter.download_file(
str(remote_artifacts_path.name),
str(self.local_artifacts_path),
)
artifacts.load(
self.local_artifacts_path,
Path(self.local_artifacts_path).parent,
)
obs_chat_elt["media"] = [
Path(self.local_artifacts_path).parent / media_ob
for media_ob in media_obs
Expand All @@ -407,8 +453,9 @@ def chat_with_code(
code_interpreter.download_file(
str(remote_artifacts_path.name), str(self.local_artifacts_path)
)
artifacts.load(self.local_artifacts_path)
artifacts.save()
artifacts.load(
self.local_artifacts_path, Path(self.local_artifacts_path).parent
)
return orig_chat, artifacts

def streaming_message(self, message: Dict[str, Any]) -> None:
Expand Down

0 comments on commit 17d6790

Please sign in to comment.