@@ -467,6 +467,8 @@ def loca_visual_prompt_counting(
467
467
468
468
Parameters:
469
469
image (np.ndarray): The image that contains lot of instances of a single object
470
+ visual_prompt (Dict[str, List[float]]): Bounding box of the object in format
471
+ [xmin, ymin, xmax, ymax]. Only 1 bounding box can be provided.
470
472
471
473
Returns:
472
474
Dict[str, Any]: A dictionary containing the key 'count' and the count as a
@@ -499,6 +501,99 @@ def loca_visual_prompt_counting(
499
501
return resp_data
500
502
501
503
504
+ def countgd_counting (
505
+ prompt : str ,
506
+ image : np .ndarray ,
507
+ box_threshold : float = 0.23 ,
508
+ ) -> List [Dict [str , Any ]]:
509
+ """'countgd_counting' is a tool that can precisely count multiple instances of an
510
+ object given a text prompt. It returns a list of bounding boxes with normalized
511
+ coordinates, label names and associated confidence scores.
512
+
513
+ Parameters:
514
+ prompt (str): The object that needs to be counted.
515
+ image (np.ndarray): The image that contains multiple instances of the object.
516
+ box_threshold (float, optional): The threshold for detection. Defaults
517
+ to 0.23.
518
+
519
+ Returns:
520
+ List[Dict[str, Any]]: A list of dictionaries containing the score, label, and
521
+ bounding box of the detected objects with normalized coordinates between 0
522
+ and 1 (xmin, ymin, xmax, ymax). xmin and ymin are the coordinates of the
523
+ top-left and xmax and ymax are the coordinates of the bottom-right of the
524
+ bounding box.
525
+
526
+ Example
527
+ -------
528
+ >>> countgd_counting("flower", image)
529
+ [
530
+ {'score': 0.49, 'label': 'flower', 'bbox': [0.1, 0.11, 0.35, 0.4]},
531
+ {'score': 0.68, 'label': 'flower', 'bbox': [0.2, 0.21, 0.45, 0.5},
532
+ {'score': 0.78, 'label': 'flower', 'bbox': [0.3, 0.35, 0.48, 0.52},
533
+ {'score': 0.98, 'label': 'flower', 'bbox': [0.44, 0.24, 0.49, 0.58},
534
+ ]
535
+ """
536
+ buffer_bytes = numpy_to_bytes (image )
537
+ files = [("image" , buffer_bytes )]
538
+ payload = {
539
+ "text" : prompt ,
540
+ "visual_prompts" : [],
541
+ "box_threshold" : box_threshold ,
542
+ "function_name" : "countgd_counting" ,
543
+ }
544
+ data : Dict [str , Any ] = send_inference_request (
545
+ payload , "countgd_counting" , files = files , v2 = True
546
+ )
547
+ return data
548
+
549
+
550
+ def countgd_example_based_counting (
551
+ visual_prompts : List [List [float ]],
552
+ image : np .ndarray ,
553
+ box_threshold : float = 0.23 ,
554
+ ) -> List [Dict [str , Any ]]:
555
+ """'countgd_example_based_counting' is a tool that can precisely count multiple
556
+ instances of an object given few visual example prompts. It returns a list of bounding
557
+ boxes with normalized coordinates, label names and associated confidence scores.
558
+
559
+ Parameters:
560
+ visual_prompts (List[List[float]]): Bounding boxes of the object in format
561
+ [xmin, ymin, xmax, ymax]. Upto 3 bounding boxes can be provided.
562
+ image (np.ndarray): The image that contains multiple instances of the object.
563
+ box_threshold (float, optional): The threshold for detection. Defaults
564
+ to 0.23.
565
+
566
+ Returns:
567
+ List[Dict[str, Any]]: A list of dictionaries containing the score, label, and
568
+ bounding box of the detected objects with normalized coordinates between 0
569
+ and 1 (xmin, ymin, xmax, ymax). xmin and ymin are the coordinates of the
570
+ top-left and xmax and ymax are the coordinates of the bottom-right of the
571
+ bounding box.
572
+
573
+ Example
574
+ -------
575
+ >>> countgd_example_based_counting(visual_prompts=[[0.1, 0.1, 0.4, 0.42], [0.2, 0.3, 0.25, 0.35]], image=image)
576
+ [
577
+ {'score': 0.49, 'label': 'object', 'bbox': [0.1, 0.11, 0.35, 0.4]},
578
+ {'score': 0.68, 'label': 'object', 'bbox': [0.2, 0.21, 0.45, 0.5},
579
+ {'score': 0.78, 'label': 'object', 'bbox': [0.3, 0.35, 0.48, 0.52},
580
+ {'score': 0.98, 'label': 'object', 'bbox': [0.44, 0.24, 0.49, 0.58},
581
+ ]
582
+ """
583
+ buffer_bytes = numpy_to_bytes (image )
584
+ files = [("image" , buffer_bytes )]
585
+ payload = {
586
+ "text" : "" ,
587
+ "visual_prompts" : visual_prompts ,
588
+ "box_threshold" : box_threshold ,
589
+ "function_name" : "countgd_example_based_counting" ,
590
+ }
591
+ data : Dict [str , Any ] = send_inference_request (
592
+ payload , "countgd_example_based_counting" , files = files , v2 = True
593
+ )
594
+ return data
595
+
596
+
502
597
def florence2_roberta_vqa (prompt : str , image : np .ndarray ) -> str :
503
598
"""'florence2_roberta_vqa' is a tool that takes an image and analyzes
504
599
its contents, generates detailed captions and then tries to answer the given
@@ -1657,8 +1752,7 @@ def florencev2_fine_tuned_object_detection(
1657
1752
clip ,
1658
1753
vit_image_classification ,
1659
1754
vit_nsfw_classification ,
1660
- loca_zero_shot_counting ,
1661
- loca_visual_prompt_counting ,
1755
+ countgd_counting ,
1662
1756
florence2_image_caption ,
1663
1757
florence2_ocr ,
1664
1758
florence2_sam2_image ,
0 commit comments