diff --git a/vision_agent/agent/vision_agent_v3.py b/vision_agent/agent/vision_agent_v3.py
index f774a8f4..6d194d9c 100644
--- a/vision_agent/agent/vision_agent_v3.py
+++ b/vision_agent/agent/vision_agent_v3.py
@@ -3,7 +3,7 @@
 import logging
 import sys
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Union, cast, Callable
+from typing import Any, Dict, List, Optional, Union, cast, Callable, no_type_check
 
 from rich.console import Console
 from rich.syntax import Syntax
@@ -117,6 +117,7 @@ def write_and_test_code(
     log_progress: Callable[[Dict[str, Any]], None],
     verbosity: int = 0,
     max_retries: int = 3,
+    input_media: Optional[Union[str, Path]] = None,
 ) -> Dict[str, Any]:
     code = extract_code(
         coder(CODE.format(docstring=tool_info, question=task, feedback=working_memory))
@@ -124,14 +125,18 @@ def write_and_test_code(
     test = extract_code(
         tester(
             SIMPLE_TEST.format(
-                docstring=tool_utils, question=task, code=code, feedback=working_memory
+                docstring=tool_utils,
+                question=task,
+                code=code,
+                feedback=working_memory,
+                media=input_media,
             )
         )
     )
 
     success, result = _EXECUTE.run_isolation(f"{code}\n{test}")
     if verbosity == 2:
-        _LOGGER.info("First code and tests:")
+        _LOGGER.info("Initial code and tests:")
         log_progress(
             {
                 "log": "Code:",
@@ -153,7 +158,7 @@ def write_and_test_code(
                 "result": result,
             }
         )
-        _LOGGER.info(f"First result: {result}")
+        _LOGGER.info(f"Initial result: {result}")
 
     count = 0
     new_working_memory = []
@@ -198,16 +203,18 @@ def write_and_test_code(
             _LOGGER.info(f"Debug result: {result}")
         count += 1
 
-    if verbosity == 1:
+    if verbosity >= 1:
+        _LOGGER.info("Final code and tests:")
         _CONSOLE.print(
             Syntax(f"{code}\n{test}", "python", theme="gruvbox-dark", line_numbers=True)
         )
-        _LOGGER.info(f"Result: {result}")
+        _LOGGER.info(f"Final Result: {result}")
 
     return {
         "code": code,
         "test": test,
         "success": success,
+        "test_result": result,
         "working_memory": new_working_memory,
     }
 
@@ -263,23 +270,26 @@ def __init__(
             else tool_recommender
         )
         self.verbosity = verbosity
-        self.max_retries = 3
+        self.max_retries = 2
         self.report_progress_callback = report_progress_callback
 
+    @no_type_check
     def __call__(
         self,
         input: Union[List[Dict[str, str]], str],
         image: Optional[Union[str, Path]] = None,
-    ) -> str:
+    ) -> Dict[str, Any]:
         if isinstance(input, str):
             input = [{"role": "user", "content": input}]
         results = self.chat_with_workflow(input, image)
-        return results["code"]  # type: ignore
+        results.pop("working_memory")
+        return results
 
     def chat_with_workflow(
         self,
         chat: List[Dict[str, str]],
         image: Optional[Union[str, Path]] = None,
+        self_reflection: bool = False,
     ) -> Dict[str, Any]:
         if len(chat) == 0:
             raise ValueError("Chat cannot be empty.")
@@ -302,13 +312,14 @@ def chat_with_workflow(
                 chat, TOOL_DESCRIPTIONS, format_memory(working_memory), self.planner
             )
             plan_i_str = "\n-".join([e["instructions"] for e in plan_i])
-            if self.verbosity == 1 or self.verbosity == 2:
+            if self.verbosity >= 1:
                 self.log_progress(
                     {
                         "log": "Going to run the following plan(s) in sequence:\n",
                         "plan": plan_i,
                     }
                 )
+
                 _LOGGER.info(
                     f"""
 {tabulate(tabular_data=plan_i, headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
@@ -330,6 +341,7 @@ def chat_with_workflow(
                 self.debugger,
                 self.log_progress,
                 verbosity=self.verbosity,
+                input_media=image,
             )
             success = cast(bool, results["success"])
             code = cast(str, results["code"])
@@ -337,18 +349,21 @@ def chat_with_workflow(
             working_memory.extend(results["working_memory"])  # type: ignore
             plan.append({"code": code, "test": test, "plan": plan_i})
 
-            reflection = reflect(chat, plan_i_str, code, self.planner)
-            if self.verbosity > 0:
-                self.log_progress(
-                    {
-                        "log": "Reflection:",
-                        "reflection": reflection,
-                    }
-                )
-                _LOGGER.info(f"Reflection: {reflection}")
-            feedback = cast(str, reflection["feedback"])
-            success = cast(bool, reflection["success"])
-            working_memory.append({"code": f"{code}\n{test}", "feedback": feedback})
+            if self_reflection:
+                reflection = reflect(chat, plan_i_str, code, self.planner)
+                if self.verbosity > 0:
+                    self.log_progress(
+                        {
+                            "log": "Reflection:",
+                            "reflection": reflection,
+                        }
+                    )
+                    _LOGGER.info(f"Reflection: {reflection}")
+                feedback = cast(str, reflection["feedback"])
+                success = cast(bool, reflection["success"])
+                working_memory.append({"code": f"{code}\n{test}", "feedback": feedback})
+
+            retries += 1
 
         self.log_progress(
             {
@@ -360,6 +375,7 @@ def chat_with_workflow(
         return {
             "code": code,
             "test": test,
+            "test_result": results["test_result"],
             "plan": plan,
             "working_memory": working_memory,
         }
diff --git a/vision_agent/agent/vision_agent_v3_prompts.py b/vision_agent/agent/vision_agent_v3_prompts.py
index 3e0813af..769559a4 100644
--- a/vision_agent/agent/vision_agent_v3_prompts.py
+++ b/vision_agent/agent/vision_agent_v3_prompts.py
@@ -61,6 +61,7 @@
 2. **Algorithm/Method Selection**: Decide on the most efficient way.
 3. **Pseudocode Creation**: Write down the steps you will follow in pseudocode.
 4. **Code Generation**: Translate your pseudocode into executable Python code.
+5. **Logging**: Log the output of the custom functions that were provided to you from `from vision_agent.tools.tools_v2 import *`. Use a debug flag in the function parameters to toggle logging on and off.
 """
 
 TEST = """
@@ -149,7 +150,7 @@ def find_text(image_path: str, text: str) -> str:
 
 **Input Code Snippet**:
 ```python
-### Please decided how would you want to generate test cases. Based on incomplete code or completed version.
+### Please decide how would you want to generate test cases. Based on incomplete code or completed version.
 {code}
 ```
 
@@ -159,8 +160,12 @@ def find_text(image_path: str, text: str) -> str:
 **Instructions**:
 1. Verify the fundamental functionality under normal conditions.
 2. Ensure each test case is well-documented with comments explaining the scenario it covers.
-3. DO NOT use any files that are not provided by the user's instructions, your test must be run and will crash if it tries to load a non-existent file.
-4. DO NOT mock any functions, you must test their functionality as is.
+3. Your test case MUST run only on the given image which is {media}
+4. DO NOT use any non-existent or dummy image or video files that are not provided by the user's instructions.
+5. DO NOT mock any functions, you must test their functionality as is.
+6. DO NOT assert the output value, run the code and verify it runs without any errors and assert only the output format or data structure.
+7. DO NOT import the testing function as it will available in the testing environment.
+8. Print the output of the function that is being tested.
 """
 
 
diff --git a/vision_agent/tools/tools_v2.py b/vision_agent/tools/tools_v2.py
index 04f4dedf..8e202856 100644
--- a/vision_agent/tools/tools_v2.py
+++ b/vision_agent/tools/tools_v2.py
@@ -416,12 +416,15 @@ def closest_mask_distance(mask1: np.ndarray, mask2: np.ndarray) -> float:
     return cast(float, np.min(dist_matrix))
 
 
-def closest_box_distance(box1: List[float], box2: List[float]) -> float:
+def closest_box_distance(
+    box1: List[float], box2: List[float], image_size: Tuple[int, int]
+) -> float:
     """'closest_box_distance' calculates the closest distance between two bounding boxes.
 
     Parameters:
         box1 (List[float]): The first bounding box.
         box2 (List[float]): The second bounding box.
+        image_size (Tuple[int, int]): The size of the image given as (height, width).
 
     Returns:
         float: The closest distance between the two bounding boxes.
@@ -432,8 +435,8 @@ def closest_box_distance(box1: List[float], box2: List[float]) -> float:
     141.42
     """
 
-    x11, y11, x12, y12 = box1
-    x21, y21, x22, y22 = box2
+    x11, y11, x12, y12 = denormalize_bbox(box1, image_size)
+    x21, y21, x22, y22 = denormalize_bbox(box2, image_size)
 
     horizontal_distance = np.max([0, x21 - x12, x11 - x22])
     vertical_distance = np.max([0, y21 - y12, y11 - y22])