@@ -58,9 +58,10 @@ def grounding_dino(
5858 box_threshold : float = 0.20 ,
5959 iou_threshold : float = 0.20 ,
6060) -> List [Dict [str , Any ]]:
61- """'grounding_dino' is a tool that can detect and count objects given a text prompt
62- such as category names or referring expressions. It returns a list and count of
63- bounding boxes, label names and associated probability scores.
61+ """'grounding_dino' is a tool that can detect and count multiple objects given a text
62+ prompt such as category names or referring expressions. The categories in text prompt
63+ are separated by commas or periods. It returns a list and count of bounding boxes,
64+ label names and associated probability scores.
6465
6566 Parameters:
6667 prompt (str): The prompt to ground to the image.
@@ -111,9 +112,10 @@ def grounding_sam(
111112 box_threshold : float = 0.20 ,
112113 iou_threshold : float = 0.20 ,
113114) -> List [Dict [str , Any ]]:
114- """'grounding_sam' is a tool that can detect and segment objects given a text
115- prompt such as category names or referring expressions. It returns a list of
116- bounding boxes, label names and masks file names and associated probability scores.
115+ """'grounding_sam' is a tool that can detect and segment multiple objects given a
116+ text prompt such as category names or referring expressions. The categories in text
117+ prompt are separated by commas or periods. It returns a list of bounding boxes,
118+ label names, mask file names and associated probability scores.
117119
118120 Parameters:
119121 prompt (str): The prompt to ground to the image.
@@ -343,9 +345,9 @@ def image_question_answering(image: np.ndarray, prompt: str) -> str:
343345
344346
345347def clip (image : np .ndarray , classes : List [str ]) -> Dict [str , Any ]:
346- """'clip' is a tool that can classify an image given a list of input classes or tags.
347- It returns the same list of the input classes along with their probability scores
348- based on image content.
348+ """'clip' is a tool that can classify an image or a cropped detection given a list
349+ of input classes or tags. It returns the same list of the input classes along with
350+ their probability scores based on image content.
349351
350352 Parameters:
351353 image (np.ndarray): The image to classify or tag
0 commit comments