diff --git a/vision_agent/agent/vision_agent.py b/vision_agent/agent/vision_agent.py index 73a41184..6dc3bea0 100644 --- a/vision_agent/agent/vision_agent.py +++ b/vision_agent/agent/vision_agent.py @@ -315,12 +315,16 @@ def create_tasks( def self_reflect( reflect_model: Union[LLM, LMM], question: str, + tools: Dict[int, Any], tool_result: List[Dict], final_answer: str, image: Optional[Union[str, Path]] = None, ) -> str: prompt = VISION_AGENT_REFLECTION.format( - question=question, tool_results=str(tool_result), final_answer=final_answer + question=question, + tools=format_tools(tools), + tool_results=str(tool_result), + final_answer=final_answer, ) if issubclass(type(reflect_model), LMM): return reflect_model(prompt, image=image) # type: ignore @@ -328,7 +332,8 @@ def self_reflect( def parse_reflect(reflect: str) -> bool: - return reflect.lower() == "finish" + # GPT-4V has a hard time following directions, so make the criteria less strict + return "finish" in reflect.lower() and len(reflect) < 100 class VisionAgent(Agent): @@ -425,7 +430,12 @@ def chat_with_workflow( ) reflection = self_reflect( - self.reflect_model, question, all_tool_results, final_answer, image + self.reflect_model, + question, + self.tools, + all_tool_results, + final_answer, + image, ) _LOGGER.info(f"\tReflection: {reflection}") if parse_reflect(reflection): diff --git a/vision_agent/agent/vision_agent_prompts.py b/vision_agent/agent/vision_agent_prompts.py index 7d4a724c..62579419 100644 --- a/vision_agent/agent/vision_agent_prompts.py +++ b/vision_agent/agent/vision_agent_prompts.py @@ -1,7 +1,10 @@ -VISION_AGENT_REFLECTION = """You are an advanced reasoning agent that can improve based on self refection. You will be given a previous reasoning trial in which you were given the user's question, the decomposed tasks and tools that the agent used to answer teh question and the final answer the agent provided. You must determine if the agent's answer was correct or incorrect. If the agen'ts answer was correct, respond with Finish. If the agent's answer was incorrect, you must diagnose a possible reason for failure or phrasing discrepancy and devise a new, concise, high level plan that aims to mitigate the same failure. Use complete sentences. +VISION_AGENT_REFLECTION = """You are an advanced reasoning agent that can improve based on self refection. You will be given a previous reasoning trial in which you were given the user's question, the available tools that the agent has, the decomposed tasks and tools that the agent used to answer the question and the final answer the agent provided. You must determine if the agent's answer was correct or incorrect. If the agent's answer was correct, respond with Finish. If the agent's answer was incorrect, you must diagnose a possible reason for failure or phrasing discrepancy and devise a new, concise, high level plan that aims to mitigate the same failure with the tools avilable. Use complete sentences. User's question: {question} +Tools available: +{tools} + Tasks and tools used: {tool_results}