Skip to content

Commit

Permalink
add function name back
Browse files Browse the repository at this point in the history
  • Loading branch information
Dayof committed Aug 6, 2024
1 parent c63c752 commit 57845ee
Showing 1 changed file with 26 additions and 50 deletions.
76 changes: 26 additions & 50 deletions vision_agent/tools/tools.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,28 @@
import io
import json
import inspect
import logging
import tempfile
from uuid import UUID
from pathlib import Path
from importlib import resources
from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast
from typing import Any, Dict, List, Optional, Tuple, Union, cast

import cv2
import requests
import numpy as np
import pandas as pd
from pytube import YouTube # type: ignore
from moviepy.editor import ImageSequenceClip
from PIL import Image, ImageDraw, ImageFont
from pillow_heif import register_heif_opener # type: ignore

from vision_agent.clients.landing_public_api import LandingPublicAPI
from vision_agent.tools.tool_types import BboxInput, BboxInputBase64
from vision_agent.tools.tool_utils import send_inference_request
from vision_agent.tools.tool_utils import (
send_inference_request,
get_tool_descriptions,
get_tool_documentation,
get_tools_df,
)
from vision_agent.utils import extract_frames_from_video
from vision_agent.utils.execute import FileSerializer, MimeType
from vision_agent.utils.image_utils import (
Expand Down Expand Up @@ -57,7 +60,6 @@
]
_API_KEY = "land_sk_WVYwP00xA3iXely2vuar6YUDZ3MJT9yLX6oW5noUkwICzYLiDV"
_OCR_URL = "https://app.landing.ai/ocr/v1/detect-text"
logging.basicConfig(level=logging.INFO)
_LOGGER = logging.getLogger(__name__)


Expand Down Expand Up @@ -108,6 +110,7 @@ def grounding_dino(
"visual_grounding" if model_size == "large" else "visual_grounding_tiny"
),
"kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
"function_name": "grounding_dino",
}
data: Dict[str, Any] = send_inference_request(request_data, "tools")
return_data = []
Expand Down Expand Up @@ -163,6 +166,7 @@ def owl_v2(
"image": image_b64,
"tool": "open_vocab_detection",
"kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
"function_name": "owl_v2",
}
data: Dict[str, Any] = send_inference_request(request_data, "tools")
return_data = []
Expand Down Expand Up @@ -227,6 +231,7 @@ def grounding_sam(
"image": image_b64,
"tool": "visual_grounding_segment",
"kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
"function_name": "grounding_sam",
}
data: Dict[str, Any] = send_inference_request(request_data, "tools")
return_data = []
Expand Down Expand Up @@ -366,6 +371,7 @@ def loca_zero_shot_counting(image: np.ndarray) -> Dict[str, Any]:
data = {
"image": image_b64,
"tool": "zero_shot_counting",
"function_name": "loca_zero_shot_counting",
}
resp_data = send_inference_request(data, "tools")
resp_data["heat_map"] = np.array(b64_to_pil(resp_data["heat_map"][0]))
Expand Down Expand Up @@ -401,6 +407,7 @@ def loca_visual_prompt_counting(
"image": image_b64,
"prompt": bbox_str,
"tool": "few_shot_counting",
"function_name": "loca_visual_prompt_counting",
}
resp_data = send_inference_request(data, "tools")
resp_data["heat_map"] = np.array(b64_to_pil(resp_data["heat_map"][0]))
Expand Down Expand Up @@ -430,6 +437,7 @@ def florencev2_roberta_vqa(prompt: str, image: np.ndarray) -> str:
"image": image_b64,
"prompt": prompt,
"tool": "image_question_answering_with_context",
"function_name": "florencev2_roberta_vqa",
}

answer = send_inference_request(data, "tools")
Expand Down Expand Up @@ -459,6 +467,7 @@ def git_vqa_v2(prompt: str, image: np.ndarray) -> str:
"image": image_b64,
"prompt": prompt,
"tool": "image_question_answering",
"function_name": "git_vqa_v2",
}

answer = send_inference_request(data, "tools")
Expand Down Expand Up @@ -489,6 +498,7 @@ def clip(image: np.ndarray, classes: List[str]) -> Dict[str, Any]:
"prompt": ",".join(classes),
"image": image_b64,
"tool": "closed_set_image_classification",
"function_name": "clip",
}
resp_data = send_inference_request(data, "tools")
resp_data["scores"] = [round(prob, 4) for prob in resp_data["scores"]]
Expand Down Expand Up @@ -516,6 +526,7 @@ def vit_image_classification(image: np.ndarray) -> Dict[str, Any]:
data = {
"image": image_b64,
"tool": "image_classification",
"function_name": "vit_image_classification",
}
resp_data = send_inference_request(data, "tools")
resp_data["scores"] = [round(prob, 4) for prob in resp_data["scores"]]
Expand Down Expand Up @@ -543,6 +554,7 @@ def vit_nsfw_classification(image: np.ndarray) -> Dict[str, Any]:
data = {
"image": image_b64,
"tool": "nsfw_image_classification",
"function_name": "vit_nsfw_classification",
}
resp_data = send_inference_request(data, "tools")
resp_data["scores"] = round(resp_data["scores"], 4)
Expand All @@ -569,6 +581,7 @@ def blip_image_caption(image: np.ndarray) -> str:
data = {
"image": image_b64,
"tool": "image_captioning",
"function_name": "blip_image_caption",
}

answer = send_inference_request(data, "tools")
Expand Down Expand Up @@ -597,6 +610,7 @@ def florencev2_image_caption(image: np.ndarray, detail_caption: bool = True) ->
"image": image_b64,
"tool": "florence2_image_captioning",
"detail_caption": detail_caption,
"function_name": "florencev2_image_caption",
}

answer = send_inference_request(data, "tools")
Expand Down Expand Up @@ -632,6 +646,7 @@ def florencev2_object_detection(image: np.ndarray) -> List[Dict[str, Any]]:
data = {
"image": image_b64,
"tool": "object_detection",
"function_name": "florencev2_object_detection",
}

answer = send_inference_request(data, "tools")
Expand Down Expand Up @@ -673,7 +688,7 @@ def florencev2_fine_tuning(bboxes: List[Dict[str, Any]]) -> UUID:
image=convert_to_b64(bbox_input.image),
filename=bbox_input.filename,
labels=bbox_input.labels,
bboxes=bbox_input.bboxes
bboxes=bbox_input.bboxes,
)
for bbox_input in bboxes_input
]
Expand Down Expand Up @@ -724,6 +739,7 @@ def detr_segmentation(image: np.ndarray) -> List[Dict[str, Any]]:
data = {
"image": image_b64,
"tool": "panoptic_segmentation",
"function_name": "detr_segmentation",
}

answer = send_inference_request(data, "tools")
Expand Down Expand Up @@ -766,6 +782,7 @@ def depth_anything_v2(image: np.ndarray) -> np.ndarray:
data = {
"image": image_b64,
"tool": "generate_depth",
"function_name": "depth_anything_v2",
}

answer = send_inference_request(data, "tools")
Expand Down Expand Up @@ -797,6 +814,7 @@ def generate_soft_edge_image(image: np.ndarray) -> np.ndarray:
data = {
"image": image_b64,
"tool": "generate_hed",
"function_name": "generate_soft_edge_image",
}

answer = send_inference_request(data, "tools")
Expand Down Expand Up @@ -829,6 +847,7 @@ def dpt_hybrid_midas(image: np.ndarray) -> np.ndarray:
data = {
"image": image_b64,
"tool": "generate_normal",
"function_name": "dpt_hybrid_midas",
}

answer = send_inference_request(data, "tools")
Expand Down Expand Up @@ -900,6 +919,7 @@ def template_match(
"image": image_b64,
"template": template_image_b64,
"tool": "template_match",
"function_name": "template_match",
}

answer = send_inference_request(data, "tools")
Expand Down Expand Up @@ -1259,50 +1279,6 @@ def overlay_heat_map(
return np.array(combined)


def get_tool_documentation(funcs: List[Callable[..., Any]]) -> str:
docstrings = ""
for func in funcs:
docstrings += f"{func.__name__}{inspect.signature(func)}:\n{func.__doc__}\n\n"

return docstrings


def get_tool_descriptions(funcs: List[Callable[..., Any]]) -> str:
descriptions = ""
for func in funcs:
description = func.__doc__
if description is None:
description = ""

if "Parameters:" in description:
description = (
description[: description.find("Parameters:")]
.replace("\n", " ")
.strip()
)

description = " ".join(description.split())
descriptions += f"- {func.__name__}{inspect.signature(func)}: {description}\n"
return descriptions


def get_tools_df(funcs: List[Callable[..., Any]]) -> pd.DataFrame:
data: Dict[str, List[str]] = {"desc": [], "doc": []}

for func in funcs:
desc = func.__doc__
if desc is None:
desc = ""
desc = desc[: desc.find("Parameters:")].replace("\n", " ").strip()
desc = " ".join(desc.split())

doc = f"{func.__name__}{inspect.signature(func)}:\n{func.__doc__}"
data["desc"].append(desc)
data["doc"].append(doc)

return pd.DataFrame(data) # type: ignore


TOOLS = [
owl_v2,
grounding_sam,
Expand Down

0 comments on commit 57845ee

Please sign in to comment.