Skip to content

Commit

Permalink
updated tools
Browse files Browse the repository at this point in the history
  • Loading branch information
dillonalaird committed Aug 10, 2024
1 parent fb77a36 commit 6ac6e58
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 21 deletions.
41 changes: 21 additions & 20 deletions vision_agent/tools/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,13 +426,12 @@ def florencev2_roberta_vqa(prompt: str, image: np.ndarray) -> str:
image_b64 = convert_to_b64(image)
data = {
"image": image_b64,
"prompt": prompt,
"tool": "image_question_answering_with_context",
"question": prompt,
"function_name": "florencev2_roberta_vqa",
}

answer = send_inference_request(data, "tools")
return answer["text"][0] # type: ignore
answer = send_inference_request(data, "florence2-qa", v2=True)
return answer # type: ignore


def git_vqa_v2(prompt: str, image: np.ndarray) -> str:
Expand Down Expand Up @@ -544,11 +543,10 @@ def vit_nsfw_classification(image: np.ndarray) -> Dict[str, Any]:
image_b64 = convert_to_b64(image)
data = {
"image": image_b64,
"tool": "nsfw_image_classification",
"function_name": "vit_nsfw_classification",
}
resp_data = send_inference_request(data, "tools")
resp_data["scores"] = round(resp_data["scores"], 4)
resp_data = send_inference_request(data, "nsfw-classification", v2=True)
resp_data["score"] = round(resp_data["score"], 4)
return resp_data


Expand Down Expand Up @@ -636,18 +634,19 @@ def florencev2_object_detection(image: np.ndarray) -> List[Dict[str, Any]]:
image_b64 = convert_to_b64(image)
data = {
"image": image_b64,
"tool": "object_detection",
"task": "<OD>",
"function_name": "florencev2_object_detection",
}

answer = send_inference_request(data, "tools")
detections = send_inference_request(data, "florence2", v2=True)
detections = detections["<OD>"]
return_data = []
for i in range(len(answer["bboxes"])):
for i in range(len(detections["bboxes"])):
return_data.append(
{
"score": round(answer["scores"][i], 2),
"label": answer["labels"][i],
"bbox": normalize_bbox(answer["bboxes"][i], image_size),
"score": 1.0,
"label": detections["labels"][i],
"bbox": normalize_bbox(detections["bboxes"][i], image_size),
}
)
return return_data
Expand Down Expand Up @@ -736,13 +735,16 @@ def depth_anything_v2(image: np.ndarray) -> np.ndarray:
image_b64 = convert_to_b64(image)
data = {
"image": image_b64,
"tool": "generate_depth",
"function_name": "depth_anything_v2",
}

answer = send_inference_request(data, "tools")
return_data = np.array(b64_to_pil(answer["masks"][0]).convert("L"))
return return_data
depth_map = send_inference_request(data, "depth-anything-v2", v2=True)
depth_map_np = np.array(depth_map["map"])
depth_map_np = (depth_map_np - depth_map_np.min()) / (
depth_map_np.max() - depth_map_np.min()
)
depth_map_np = (255 * depth_map_np).astype(np.uint8)
return depth_map_np


def generate_soft_edge_image(image: np.ndarray) -> np.ndarray:
Expand Down Expand Up @@ -833,12 +835,11 @@ def generate_pose_image(image: np.ndarray) -> np.ndarray:
image_b64 = convert_to_b64(image)
data = {
"image": image_b64,
"tool": "generate_pose",
"function_name": "generate_pose_image",
}

answer = send_inference_request(data, "tools")
return_data = np.array(b64_to_pil(answer["masks"][0]).convert("RGB"))
pos_img = send_inference_request(data, "pose-detector", v2=True)
return_data = np.array(b64_to_pil(pos_img["data"]).convert("RGB"))
return return_data


Expand Down
2 changes: 1 addition & 1 deletion vision_agent/utils/type_defs.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class LandingaiAPIKey(BaseSettings):
alias="LANDINGAI_API_KEY",
description="The API key of LandingAI.",
)

@field_validator("api_key")
@classmethod
def is_api_key_valid(cls, key: str) -> str:
Expand Down

0 comments on commit 6ac6e58

Please sign in to comment.