From 6b232ed03f658f5de736e169069d85447daf7199 Mon Sep 17 00:00:00 2001
From: Dillon Laird <dillonalaird@gmail.com>
Date: Thu, 18 Apr 2024 15:24:56 -0700
Subject: [PATCH] added reference mask support

---
 examples/mask_app/app.py           | 35 ++++++++++++++++++++++++++++++
 examples/mask_app/requirements.txt |  2 ++
 vision_agent/agent/vision_agent.py |  8 +++++++
 vision_agent/tools/tools.py        | 11 +++++-----
 4 files changed, 50 insertions(+), 6 deletions(-)
 create mode 100644 examples/mask_app/app.py
 create mode 100644 examples/mask_app/requirements.txt

diff --git a/examples/mask_app/app.py b/examples/mask_app/app.py
new file mode 100644
index 00000000..881efba7
--- /dev/null
+++ b/examples/mask_app/app.py
@@ -0,0 +1,35 @@
+import cv2
+import numpy as np
+import streamlit as st
+from PIL import Image
+from streamlit_drawable_canvas import st_canvas
+
+st.title("Image Segmentation Mask App")
+
+uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png"])
+if uploaded_file is not None:
+    image = Image.open(uploaded_file)
+    orig_size = image.size
+
+stroke_width = st.sidebar.slider("Stroke width: ", 1, 50, 25)
+stroke_color = st.sidebar.color_picker("Stroke color hex: ")
+
+canvas_result = st_canvas(
+    fill_color="rgba(255, 165, 0, 0.3)",  # Fixed fill color with some opacity
+    stroke_width=stroke_width,
+    stroke_color=stroke_color,
+    background_color="#eee",
+    background_image=Image.open(uploaded_file) if uploaded_file else None,
+    update_streamlit=True,
+    height=500,
+    drawing_mode="freedraw",
+    key="canvas",
+)
+
+if canvas_result.image_data is not None:
+    mask = canvas_result.image_data.astype("uint8")[..., 3]
+    mask[mask > 0] = 255
+    if st.button("Save Mask Image") and orig_size:
+        mask = cv2.resize(mask, orig_size, interpolation=cv2.INTER_NEAREST)
+        cv2.imwrite("mask.png", mask)
+        st.success("Mask Image saved successfully.")
diff --git a/examples/mask_app/requirements.txt b/examples/mask_app/requirements.txt
new file mode 100644
index 00000000..3ce2aea0
--- /dev/null
+++ b/examples/mask_app/requirements.txt
@@ -0,0 +1,2 @@
+streamlit
+streamlit-drawable-canvas
diff --git a/vision_agent/agent/vision_agent.py b/vision_agent/agent/vision_agent.py
index a3f09b82..44c3aa08 100644
--- a/vision_agent/agent/vision_agent.py
+++ b/vision_agent/agent/vision_agent.py
@@ -365,6 +365,7 @@ def visualize_result(all_tool_results: List[Dict]) -> Sequence[Union[str, Path]]
             "grounding_sam_",
             "grounding_dino_",
             "extract_frames_",
+            "dinov_",
         ]:
             continue
 
@@ -469,11 +470,18 @@ def chat_with_workflow(
         self,
         chat: List[Dict[str, str]],
         image: Optional[Union[str, Path]] = None,
+        reference_data: Optional[Dict[str, str]] = None,
         visualize_output: Optional[bool] = False,
     ) -> Tuple[str, List[Dict]]:
         question = chat[0]["content"]
         if image:
             question += f" Image name: {image}"
+        if reference_data:
+            if not ("image" in reference_data and "mask" in reference_data):
+                raise ValueError(
+                    f"Reference data must contain 'image' and 'mask'. but got {reference_data}"
+                )
+            question += f" Reference image: {reference_data['image']}, Reference mask: {reference_data['mask']}"
 
         reflections = ""
         final_answer = ""
diff --git a/vision_agent/tools/tools.py b/vision_agent/tools/tools.py
index f53ee372..6d2a7b47 100644
--- a/vision_agent/tools/tools.py
+++ b/vision_agent/tools/tools.py
@@ -398,22 +398,21 @@ class DINOv(Tool):
         ],
         "examples": [
             {
-                "scenario": "Can you find all the balloons in this image that is similar to the provided masked area?",
+                "scenario": "Can you find all the balloons in this image that is similar to the provided masked area? Image name: input.jpg Reference image: balloon.jpg Reference mask: balloon_mask.jpg",
                 "parameters": {
                     "prompt": [
-                        {"mask": "reference_balloon_mask1.jpg", "image": "balloon.jpg"},
-                        {"mask": "reference_balloon_mask2.jpg", "image": "balloon.jpg"},
+                        {"mask": "balloon_mask.jpg", "image": "balloon.jpg"},
                     ],
                     "image": "input.jpg",
                 },
             },
             {
-                "scenario": "Count all the objects in this image that is similar to the provided masked area? Image name: input.jpg, Reference mask: mask.jpg, Mask image: background.jpg",
+                "scenario": "Detect all the objects in this image that are similar to the provided mask. Image name: original.jpg Reference image: mask.png Reference mask: background.png",
                 "parameters": {
                     "prompt": [
-                        {"mask": "reference_obj_mask1.jpg", "image": "background.jpg"},
+                        {"mask": "mask.png", "image": "background.png"},
                     ],
-                    "image": "input.jpg",
+                    "image": "original.jpg",
                 },
             },
         ],