Skip to content

Commit

Permalink
testing owlv2_video
Browse files Browse the repository at this point in the history
  • Loading branch information
dillonalaird committed Sep 6, 2024
1 parent 4af5053 commit f1d5f1f
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 6 deletions.
1 change: 1 addition & 0 deletions vision_agent/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
overlay_heat_map,
overlay_segmentation_masks,
owl_v2_image,
owl_v2_image2,
owl_v2_video,
save_image,
save_json,
Expand Down
41 changes: 35 additions & 6 deletions vision_agent/tools/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,6 @@ def owl_v2_image(
"function_name": "owl_v2",
}
data: Dict[str, Any] = send_inference_request(request_data, "owlv2", v2=True)
print(data)
return_data = []
if data is not None:
for elt in data:
Expand All @@ -199,6 +198,35 @@ def owl_v2_image(
return return_data


def owl_v2_image2(
prompt: str,
image: np.ndarray,
box_threshold: float = 0.30,
) -> List[Dict[str, Any]]:
image_size = image.shape[:2]
buffer_bytes = numpy_to_bytes(image)
files = [("image", buffer_bytes)]
payload = {
"prompts": [s.strip() for s in prompt.split(",")],
"model": "owlv2",
"function_name": "owl_v2_image",
}
resp_data = send_inference_request(
payload, "text-to-object-detection", files=files, v2=True
)
bboxes = resp_data[0]
bboxes_formatted = [
ODResponseData(
label=bbox["label"], # type: ignore
bbox=normalize_bbox(bbox["bounding_box"], image_size), # type: ignore
score=round(bbox["score"], 2), # type: ignore
)
for bbox in bboxes
]
filtered_bboxes = filter_bboxes_by_threshold(bboxes_formatted, box_threshold)
return [bbox.model_dump() for bbox in filtered_bboxes]


def owl_v2_video(
prompt: str,
frames: List[np.ndarray],
Expand Down Expand Up @@ -247,22 +275,23 @@ def owl_v2_video(
data: Dict[str, Any] = send_inference_request(
payload, "text-to-object-detection", files=files, v2=True
)
print(data)
bboxes_formatted = []
if data is not None:
for frame_data in data:
bboxes_formated_frame = []
for elt in frame_data:
bboxes_formated_frame.append(
ODResponseData(
label=elt["label"], # type: ignore
bbox=normalize_bbox(elt["bounding_box"], image_size), # type: ignore
score=round(elt["score"], 2), # type: ignore
label=elt["label"], # type: ignore
bbox=normalize_bbox(elt["bounding_box"], image_size), # type: ignore
score=round(elt["score"], 2), # type: ignore
)
)
bboxes_formatted.append(bboxes_formated_frame)

filtered_bboxes = [filter_bboxes_by_threshold(elt, box_threshold) for elt in bboxes_formatted]
filtered_bboxes = [
filter_bboxes_by_threshold(elt, box_threshold) for elt in bboxes_formatted
]
return [[bbox.model_dump() for bbox in frame] for frame in filtered_bboxes]


Expand Down

0 comments on commit f1d5f1f

Please sign in to comment.