diff --git a/vision_agent/tools/tool_utils.py b/vision_agent/tools/tool_utils.py index 185563a4..2a260c41 100644 --- a/vision_agent/tools/tool_utils.py +++ b/vision_agent/tools/tool_utils.py @@ -1,7 +1,7 @@ import inspect import logging import os -from typing import Any, Callable, Dict, List, MutableMapping, Optional, Tuple +from typing import Any, Callable, Dict, List, MutableMapping, Optional, Tuple, Union import pandas as pd from IPython.display import display @@ -34,7 +34,7 @@ def send_inference_request( files: Optional[List[Tuple[Any, ...]]] = None, v2: bool = False, metadata_payload: Optional[Dict[str, Any]] = None, -) -> Dict[str, Any]: +) -> Union[Dict[str, Any], List[Dict[str, Any]]]: # TODO: runtime_tag and function_name should be metadata_payload and now included # in the service payload try: diff --git a/vision_agent/tools/tools.py b/vision_agent/tools/tools.py index c4f3b8df..a4eee6ac 100644 --- a/vision_agent/tools/tools.py +++ b/vision_agent/tools/tools.py @@ -1660,7 +1660,10 @@ def overlay_counting_results( fontsize = max(10, int(min(width, height) / 80)) pil_image = ImageEnhance.Brightness(pil_image).enhance(0.5) draw = ImageDraw.Draw(pil_image) - font = ImageFont.load_default(size=fontsize) + font = ImageFont.truetype( + str(resources.files("vision_agent.fonts").joinpath("default_font_ch_en.ttf")), + fontsize, + ) for i, elt in enumerate(instances): label = f"{i}" diff --git a/vision_agent/utils/image_utils.py b/vision_agent/utils/image_utils.py index d2bc8a6d..9c39be42 100644 --- a/vision_agent/utils/image_utils.py +++ b/vision_agent/utils/image_utils.py @@ -181,7 +181,7 @@ def denormalize_bbox( raise ValueError("Bounding box must be of length 4.") arr = np.array(bbox) - if np.all((arr >= 0) & (arr <= 1)): + if np.all((arr[:2] >= 0) & (arr[:2] <= 1)): x1, y1, x2, y2 = bbox x1 = round(x1 * image_size[1]) y1 = round(y1 * image_size[0])