Skip to content

Commit

Permalink
add support for passing args to visionagentcoder
Browse files Browse the repository at this point in the history
  • Loading branch information
dillonalaird committed Sep 5, 2024
1 parent 3b15c9b commit d255b1e
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 7 deletions.
23 changes: 20 additions & 3 deletions vision_agent/agent/vision_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
)
from vision_agent.lmm import LMM, Message, OpenAILMM
from vision_agent.tools import META_TOOL_DOCSTRING
from vision_agent.tools.meta_tools import Artifacts
from vision_agent.tools.meta_tools import Artifacts, use_extra_vision_agent_args
from vision_agent.utils import CodeInterpreterFactory
from vision_agent.utils.execute import CodeInterpreter, Execution

Expand Down Expand Up @@ -87,11 +87,18 @@ def run_code_action(
return result, obs


def parse_execution(response: str) -> Optional[str]:
def parse_execution(
response: str,
test_multi_plan: bool = True,
customed_tool_names: Optional[List[str]] = None,
) -> Optional[str]:
code = None
if "<execute_python>" in response:
code = response[response.find("<execute_python>") + len("<execute_python>") :]
code = code[: code.find("</execute_python>")]

if code is not None:
code = use_extra_vision_agent_args(code, test_multi_plan, customed_tool_names)
return code


Expand Down Expand Up @@ -174,6 +181,8 @@ def chat_with_code(
self,
chat: List[Message],
artifacts: Optional[Artifacts] = None,
test_multi_plan: bool = True,
customized_tool_names: Optional[List[str]] = None,
) -> Tuple[List[Message], Artifacts]:
"""Chat with VisionAgent, it will use code to execute actions to accomplish
its tasks.
Expand All @@ -184,6 +193,12 @@ def chat_with_code(
or if it contains media files, it should be in the format of:
[{"role": "user", "content": "describe your task here...", "media": ["image1.jpg", "image2.jpg"]}]
artifacts (Optional[Artifacts]): The artifacts to use in the task.
test_multi_plan (bool): If True, it will test tools for multiple plans and
pick the best one based off of the tool results. If False, it will go
with the first plan.
customized_tool_names (List[str]): A list of customized tools for agent to
pick and use. If not provided, default to full tool set from
vision_agent.tools.
Returns:
List[Message]: The conversation response.
Expand Down Expand Up @@ -262,7 +277,9 @@ def chat_with_code(
if response["let_user_respond"]:
break

code_action = parse_execution(response["response"])
code_action = parse_execution(
response["response"], test_multi_plan, customized_tool_names
)

if code_action is not None:
result, obs = run_code_action(
Expand Down
71 changes: 67 additions & 4 deletions vision_agent/tools/meta_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,12 @@ def edit_code_artifact(


def generate_vision_code(
artifacts: Artifacts, name: str, chat: str, media: List[str]
artifacts: Artifacts,
name: str,
chat: str,
media: List[str],
test_multi_plan: bool = True,
customized_tool_names: Optional[List[str]] = None,
) -> str:
"""Generates python code to solve vision based tasks.
Expand All @@ -306,6 +311,8 @@ def generate_vision_code(
name (str): The name of the artifact to save the code to.
chat (str): The chat message from the user.
media (List[str]): The media files to use.
test_multi_plan (bool): Do not change this parameter.
customized_tool_names (Optional[List[str]]): Do not change this parameter.
Returns:
str: The generated code.
Expand All @@ -330,7 +337,11 @@ def detect_dogs(image_path: str):
agent = va.agent.VisionAgentCoder()

fixed_chat: List[Message] = [{"role": "user", "content": chat, "media": media}]
response = agent.chat_with_workflow(fixed_chat, test_multi_plan=False)
response = agent.chat_with_workflow(
fixed_chat,
test_multi_plan=test_multi_plan,
customized_tool_names=customized_tool_names,
)
redisplay_results(response["test_result"])
code = response["code"]
artifacts[name] = code
Expand All @@ -342,14 +353,19 @@ def detect_dogs(image_path: str):


def edit_vision_code(
artifacts: Artifacts, name: str, chat_history: List[str], media: List[str]
artifacts: Artifacts,
name: str,
chat_history: List[str],
media: List[str],
customized_tool_names: Optional[List[str]] = None,
) -> str:
"""Edits python code to solve a vision based task.
Parameters:
artifacts (Artifacts): The artifacts object to save the code to.
name (str): The file path to the code.
chat_history (List[str]): The chat history to used to generate the code.
customized_tool_names (Optional[List[str]]): Do not change this parameter.
Returns:
str: The edited code.
Expand Down Expand Up @@ -386,7 +402,11 @@ def detect_dogs(image_path: str):
fixed_chat_history.append({"role": "assistant", "content": code})
fixed_chat_history.append({"role": "user", "content": chat})

response = agent.chat_with_workflow(fixed_chat_history, test_multi_plan=False)
response = agent.chat_with_workflow(
fixed_chat_history,
test_multi_plan=False,
customized_tool_names=customized_tool_names,
)
redisplay_results(response["test_result"])
code = response["code"]
artifacts[name] = code
Expand Down Expand Up @@ -480,6 +500,49 @@ def get_diff_with_prompts(name: str, before: str, after: str) -> str:
return f"[Artifact {name} edits]\n{diff}\n[End of edits]"


def use_extra_vision_agent_args(
code: str,
test_multi_plan: bool = True,
customized_tool_names: Optional[List[str]] = None,
) -> str:
"""This is for forcing arguments passed by the user to VisionAgent into the
VisionAgentCoder call.
Parameters:
code (str): The code to edit.
test_multi_plan (bool): Do not change this parameter.
customized_tool_names (Optional[List[str]]): Do not change this parameter.
Returns:
str: The edited code.
"""
generate_pattern = r"generate_vision_code\(\s*([^\)]+)\)"

def generate_replacer(match: re.Match) -> str:
arg = match.group(1)
out_str = f"generate_vision_code({arg}, test_multi_plan={test_multi_plan}"
if customized_tool_names is not None:
out_str += f", customized_tool_names={customized_tool_names})"
else:
out_str += ")"
return out_str

edit_pattern = r"edit_vision_code\(\s*([^\)]+)\)"

def edit_replacer(match: re.Match) -> str:
arg = match.group(1)
out_str = f"edit_vision_code({arg}"
if customized_tool_names is not None:
out_str += f", customized_tool_names={customized_tool_names})"
else:
out_str += ")"
return out_str

new_code = re.sub(generate_pattern, generate_replacer, code)
new_code = re.sub(edit_pattern, edit_replacer, new_code)
return new_code


def use_florence2_fine_tuning(
artifacts: Artifacts, name: str, task: str, fine_tune_id: str
) -> str:
Expand Down

0 comments on commit d255b1e

Please sign in to comment.