@@ -255,7 +255,7 @@ def self_reflect(
255
255
) -> str :
256
256
prompt = VISION_AGENT_REFLECTION .format (
257
257
question = question ,
258
- tools = format_tools (tools ),
258
+ tools = format_tools ({ k : v [ "description" ] for k , v in tools . items ()} ),
259
259
tool_results = str (tool_result ),
260
260
final_answer = final_answer ,
261
261
)
@@ -268,11 +268,16 @@ def self_reflect(
268
268
return reflect_model (prompt )
269
269
270
270
271
- def parse_reflect (reflect : str ) -> bool :
272
- # GPT-4V has a hard time following directions, so make the criteria less strict
273
- return (
271
+ def parse_reflect (reflect : str ) -> Dict [str , Any ]:
272
+ try :
273
+ return parse_json (reflect )
274
+ except Exception :
275
+ _LOGGER .error (f"Failed parse json reflection: { reflect } " )
276
+ # LMMs have a hard time following directions, so make the criteria less strict
277
+ finish = (
274
278
"finish" in reflect .lower () and len (reflect ) < 100
275
279
) or "finish" in reflect .lower ()[- 10 :]
280
+ return {"Finish" : finish , "Reflection" : reflect }
276
281
277
282
278
283
def visualize_result (all_tool_results : List [Dict ]) -> List [str ]:
@@ -389,7 +394,7 @@ def __init__(
389
394
OpenAILLM (temperature = 0.1 ) if answer_model is None else answer_model
390
395
)
391
396
self .reflect_model = (
392
- OpenAILMM (temperature = 0.1 ) if reflect_model is None else reflect_model
397
+ OpenAILMM (json_mode = True , temperature = 0.1 ) if reflect_model is None else reflect_model
393
398
)
394
399
self .max_retries = max_retries
395
400
self .tools = TOOLS
@@ -485,13 +490,14 @@ def chat_with_workflow(
485
490
visualized_output [0 ] if len (visualized_output ) > 0 else image ,
486
491
)
487
492
self .log_progress (f"Reflection: { reflection } " )
488
- if parse_reflect (reflection ):
493
+ parsed_reflection = parse_reflect (reflection )
494
+ if parsed_reflection ["Finish" ]:
489
495
break
490
496
else :
491
- reflections += "\n " + reflection
492
- # '<END >' is a symbol to indicate the end of the chat, which is useful for streaming logs.
497
+ reflections += "\n " + parsed_reflection [ "Reflection" ]
498
+ # '<ANSWER >' is a symbol to indicate the end of the chat, which is useful for streaming logs.
493
499
self .log_progress (
494
- f"The Vision Agent has concluded this chat. <ANSWER>{ final_answer } </< ANSWER>"
500
+ f"The Vision Agent has concluded this chat. <ANSWER>{ final_answer } </ANSWER>"
495
501
)
496
502
497
503
if visualize_output :
0 commit comments