@@ -255,7 +255,7 @@ def self_reflect(
255255) ->  str :
256256    prompt  =  VISION_AGENT_REFLECTION .format (
257257        question = question ,
258-         tools = format_tools (tools ),
258+         tools = format_tools ({ k :  v [ "description" ]  for   k ,  v   in   tools . items ()} ),
259259        tool_results = str (tool_result ),
260260        final_answer = final_answer ,
261261    )
@@ -268,11 +268,16 @@ def self_reflect(
268268    return  reflect_model (prompt )
269269
270270
271- def  parse_reflect (reflect : str ) ->  bool :
272-     # GPT-4V has a hard time following directions, so make the criteria less strict 
273-     return  (
271+ def  parse_reflect (reflect : str ) ->  Dict [str , Any ]:
272+     try :
273+         return  parse_json (reflect )
274+     except  Exception :
275+         _LOGGER .error (f"Failed parse json reflection: { reflect }  )
276+     # LMMs have a hard time following directions, so make the criteria less strict 
277+     finish  =  (
274278        "finish"  in  reflect .lower () and  len (reflect ) <  100 
275279    ) or  "finish"  in  reflect .lower ()[- 10 :]
280+     return  {"Finish" : finish , "Reflection" : reflect }
276281
277282
278283def  visualize_result (all_tool_results : List [Dict ]) ->  List [str ]:
@@ -389,7 +394,7 @@ def __init__(
389394            OpenAILLM (temperature = 0.1 ) if  answer_model  is  None  else  answer_model 
390395        )
391396        self .reflect_model  =  (
392-             OpenAILMM (temperature = 0.1 ) if  reflect_model  is  None  else  reflect_model 
397+             OpenAILMM (json_mode = True ,  temperature = 0.1 ) if  reflect_model  is  None  else  reflect_model 
393398        )
394399        self .max_retries  =  max_retries 
395400        self .tools  =  TOOLS 
@@ -485,13 +490,14 @@ def chat_with_workflow(
485490                visualized_output [0 ] if  len (visualized_output ) >  0  else  image ,
486491            )
487492            self .log_progress (f"Reflection: { reflection }  )
488-             if  parse_reflect (reflection ):
493+             parsed_reflection  =  parse_reflect (reflection )
494+             if  parsed_reflection ["Finish" ]:
489495                break 
490496            else :
491-                 reflections  +=  "\n "  +  reflection 
492-         # '<END >' is a symbol to indicate the end of the chat, which is useful for streaming logs. 
497+                 reflections  +=  "\n "  +  parsed_reflection [ "Reflection" ] 
498+         # '<ANSWER >' is a symbol to indicate the end of the chat, which is useful for streaming logs. 
493499        self .log_progress (
494-             f"The Vision Agent has concluded this chat. <ANSWER>{ final_answer } < ANSWER>" 
500+             f"The Vision Agent has concluded this chat. <ANSWER>{ final_answer }  
495501        )
496502
497503        if  visualize_output :
0 commit comments