Skip to content

Commit 075b897

Browse files
committed
adjust payload
1 parent d661cd3 commit 075b897

File tree

3 files changed

+40
-38
lines changed

3 files changed

+40
-38
lines changed

vision_agent/tools/tool_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1+
import os
12
import inspect
23
import logging
3-
import os
44
from base64 import b64encode
55
from typing import Any, Callable, Dict, List, MutableMapping, Optional, Tuple
66

@@ -38,7 +38,7 @@ def send_inference_request(
3838
v2: bool = False,
3939
metadata_payload: Optional[Dict[str, Any]] = None,
4040
) -> Any:
41-
# TODO: runtime_tag and function_name should be metadata_payload and now included
41+
# TODO: runtime_tag and function_name should be metadata_payload and not included
4242
# in the service payload
4343
if runtime_tag := os.environ.get("RUNTIME_TAG", ""):
4444
payload["runtime_tag"] = runtime_tag

vision_agent/tools/tools.py

Lines changed: 29 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,20 @@
1+
import os
12
import io
23
import json
34
import logging
4-
import os
55
import tempfile
66
import urllib.request
7-
from importlib import resources
7+
from uuid import UUID
88
from pathlib import Path
9+
from importlib import resources
910
from typing import Any, Dict, List, Optional, Tuple, Union, cast
10-
from uuid import UUID
1111

1212
import cv2
13-
import numpy as np
1413
import requests
15-
from PIL import Image, ImageDraw, ImageEnhance, ImageFont
16-
from pillow_heif import register_heif_opener # type: ignore
14+
import numpy as np
1715
from pytube import YouTube # type: ignore
16+
from pillow_heif import register_heif_opener # type: ignore
17+
from PIL import Image, ImageDraw, ImageEnhance, ImageFont
1818

1919
from vision_agent.clients.landing_public_api import LandingPublicAPI
2020
from vision_agent.lmm.lmm import OpenAILMM
@@ -28,7 +28,6 @@
2828
send_task_inference_request,
2929
)
3030
from vision_agent.tools.tools_types import (
31-
FineTuning,
3231
Florence2FtRequest,
3332
JobStatus,
3433
ODResponseData,
@@ -194,12 +193,16 @@ def owl_v2_image(
194193
data_obj = Florence2FtRequest(
195194
image=image_b64,
196195
task=PromptTask.PHRASE_GROUNDING,
197-
tool="florencev2_fine_tuning",
198196
prompt=prompt,
199-
fine_tuning=FineTuning(job_id=UUID(fine_tune_id)),
197+
job_id=UUID(fine_tune_id),
198+
)
199+
data = data_obj.model_dump(by_alias=True, exclude_none=True)
200+
detections = send_inference_request(
201+
data,
202+
"florence2-ft",
203+
v2=True,
204+
metadata_payload={"function_name": "owl_v2_image"},
200205
)
201-
data = data_obj.model_dump(by_alias=True)
202-
detections = send_inference_request(data, "tools", v2=False)
203206
# get the first frame
204207
detection = detections[0]
205208
bboxes_formatted = [
@@ -420,15 +423,17 @@ def florence2_sam2_image(
420423
req_data_obj = Florence2FtRequest(
421424
image=image_b64,
422425
task=PromptTask.PHRASE_GROUNDING,
423-
tool="florencev2_fine_tuning",
424426
prompt=prompt,
425427
postprocessing="sam2",
426-
fine_tuning=FineTuning(
427-
job_id=UUID(fine_tune_id),
428-
),
428+
job_id=UUID(fine_tune_id),
429+
)
430+
req_data = req_data_obj.model_dump(by_alias=True, exclude_none=True)
431+
detections_ft = send_inference_request(
432+
req_data,
433+
"florence2-ft",
434+
v2=True,
435+
metadata_payload={"function_name": "florence2_sam2_image"},
429436
)
430-
req_data = req_data_obj.model_dump(by_alias=True)
431-
detections_ft = send_inference_request(req_data, "tools", v2=False)
432437
# get the first frame
433438
detection = detections_ft[0]
434439
return_data = []
@@ -1136,6 +1141,9 @@ def florence2_image_caption(image: np.ndarray, detail_caption: bool = True) -> s
11361141
return answer[task] # type: ignore
11371142

11381143

1144+
# TODO: add video
1145+
1146+
11391147
def florence2_phrase_grounding(
11401148
prompt: str, image: np.ndarray, fine_tune_id: Optional[str] = None
11411149
) -> List[Dict[str, Any]]:
@@ -1180,15 +1188,14 @@ def florence2_phrase_grounding(
11801188
data_obj = Florence2FtRequest(
11811189
image=image_b64,
11821190
task=PromptTask.PHRASE_GROUNDING,
1183-
tool="florencev2_fine_tuning",
11841191
prompt=prompt,
1185-
fine_tuning=FineTuning(job_id=UUID(fine_tune_id)),
1192+
job_id=UUID(fine_tune_id),
11861193
)
1187-
data = data_obj.model_dump(by_alias=True)
1194+
data = data_obj.model_dump(by_alias=True, exclude_none=True)
11881195
detections = send_inference_request(
11891196
data,
1190-
"tools",
1191-
v2=False,
1197+
"florence2-ft",
1198+
v2=True,
11921199
metadata_payload={"function_name": "florence2_phrase_grounding"},
11931200
)
11941201
# get the first frame

vision_agent/tools/tools_types.py

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from enum import Enum
2-
from typing import List, Optional, Tuple, Union
32
from uuid import UUID
3+
from typing import List, Optional, Tuple, Union
44

55
from pydantic import BaseModel, ConfigDict, Field, SerializationInfo, field_serializer
66

@@ -24,25 +24,20 @@ class PromptTask(str, Enum):
2424
PHRASE_GROUNDING = "<CAPTION_TO_PHRASE_GROUNDING>"
2525

2626

27-
class FineTuning(BaseModel):
28-
model_config = ConfigDict(populate_by_name=True)
29-
30-
job_id: UUID = Field(alias="jobId")
31-
32-
@field_serializer("job_id")
33-
def serialize_job_id(self, job_id: UUID, _info: SerializationInfo) -> str:
34-
return str(job_id)
35-
36-
3727
class Florence2FtRequest(BaseModel):
3828
model_config = ConfigDict(populate_by_name=True)
3929

40-
image: str
30+
image: str | None
31+
video: bytes | None
4132
task: PromptTask
42-
tool: str
4333
prompt: Optional[str] = ""
34+
chunk_length_frames: Optional[int] = None
4435
postprocessing: Optional[str] = None
45-
fine_tuning: Optional[FineTuning] = Field(None, alias="fineTuning")
36+
job_id: Optional[UUID] = Field(None, alias="jobId")
37+
38+
@field_serializer("job_id")
39+
def serialize_job_id(self, job_id: UUID, _info: SerializationInfo) -> str:
40+
return str(job_id)
4641

4742

4843
class JobStatus(str, Enum):

0 commit comments

Comments
 (0)