@@ -58,9 +58,10 @@ def grounding_dino(
58
58
box_threshold : float = 0.20 ,
59
59
iou_threshold : float = 0.20 ,
60
60
) -> List [Dict [str , Any ]]:
61
- """'grounding_dino' is a tool that can detect and count objects given a text prompt
62
- such as category names or referring expressions. It returns a list and count of
63
- bounding boxes, label names and associated probability scores.
61
+ """'grounding_dino' is a tool that can detect and count multiple objects given a text
62
+ prompt such as category names or referring expressions. The categories in text prompt
63
+ are separated by commas or periods. It returns a list and count of bounding boxes,
64
+ label names and associated probability scores.
64
65
65
66
Parameters:
66
67
prompt (str): The prompt to ground to the image.
@@ -111,9 +112,10 @@ def grounding_sam(
111
112
box_threshold : float = 0.20 ,
112
113
iou_threshold : float = 0.20 ,
113
114
) -> List [Dict [str , Any ]]:
114
- """'grounding_sam' is a tool that can detect and segment objects given a text
115
- prompt such as category names or referring expressions. It returns a list of
116
- bounding boxes, label names and masks file names and associated probability scores.
115
+ """'grounding_sam' is a tool that can detect and segment multiple objects given a
116
+ text prompt such as category names or referring expressions. The categories in text
117
+ prompt are separated by commas or periods. It returns a list of bounding boxes,
118
+ label names, mask file names and associated probability scores.
117
119
118
120
Parameters:
119
121
prompt (str): The prompt to ground to the image.
@@ -343,9 +345,9 @@ def image_question_answering(image: np.ndarray, prompt: str) -> str:
343
345
344
346
345
347
def clip (image : np .ndarray , classes : List [str ]) -> Dict [str , Any ]:
346
- """'clip' is a tool that can classify an image given a list of input classes or tags.
347
- It returns the same list of the input classes along with their probability scores
348
- based on image content.
348
+ """'clip' is a tool that can classify an image or a cropped detection given a list
349
+ of input classes or tags. It returns the same list of the input classes along with
350
+ their probability scores based on image content.
349
351
350
352
Parameters:
351
353
image (np.ndarray): The image to classify or tag
0 commit comments