@@ -477,7 +477,7 @@ def florence2_roberta_vqa(prompt: str, image: np.ndarray) -> str:
477477
478478 Example
479479 -------
480- >>> florence2_roberta_vqa('What is the top left animal in this image ?', image)
480+ >>> florence2_roberta_vqa('What is the top left animal in this image?', image)
481481 'white tiger'
482482 """
483483
@@ -492,6 +492,36 @@ def florence2_roberta_vqa(prompt: str, image: np.ndarray) -> str:
492492 return answer # type: ignore
493493
494494
495+ def ixc25_image_vqa (prompt : str , image : np .ndarray ) -> str :
496+ """'ixc25_image_vqa' is a tool that can answer any questions about arbitrary images
497+ including regular images or images of documents or presentations. It returns text
498+ as an answer to the question.
499+
500+ Parameters:
501+ prompt (str): The question about the image
502+ image (np.ndarray): The reference image used for the question
503+
504+ Returns:
505+ str: A string which is the answer to the given prompt.
506+
507+ Example
508+ -------
509+ >>> ixc25_image_vqa('What is the cat doing?', image)
510+ 'drinking milk'
511+ """
512+
513+ buffer_bytes = numpy_to_bytes (image )
514+ files = [("image" , buffer_bytes )]
515+ payload = {
516+ "prompt" : prompt ,
517+ "function_name" : "ixc25_image_vqa" ,
518+ }
519+ data : Dict [str , Any ] = send_inference_request (
520+ payload , "internlm-xcomposer2" , files = files , v2 = True
521+ )
522+ return data ["answer" ]
523+
524+
495525def git_vqa_v2 (prompt : str , image : np .ndarray ) -> str :
496526 """'git_vqa_v2' is a tool that can answer questions about the visual
497527 contents of an image given a question and an image. It returns an answer to the
0 commit comments