diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
index 4db319f9..73471a30 100644
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -1,4 +1,8 @@
-from vision_agent.agent.agent_utils import extract_code, extract_json
+from vision_agent.agent.agent_utils import (
+    extract_code,
+    extract_json,
+    remove_installs_from_code,
+)
 
 
 def test_basic_json_extract():
@@ -43,3 +47,19 @@ def test_basic_json_extract():
     a_code = extract_code(a)
     assert "def test_basic_json_extract():" in a_code
     assert "assert extract_json(a) == {" in a_code
+
+
+def test_remove_installs_from_code():
+    a = """import os
+imoprt sys
+
+!pip install pandas
+
+
+def test():
+    print("!pip install dummy")
+"""
+    out = remove_installs_from_code(a)
+    assert "import os" in out
+    assert "!pip install pandas" not in out
+    assert "!pip install dummy" in out
diff --git a/tests/unit/tools/test_tools.py b/tests/unit/tools/test_tools.py
index b2f1a87a..eec3c78f 100644
--- a/tests/unit/tools/test_tools.py
+++ b/tests/unit/tools/test_tools.py
@@ -1,25 +1,69 @@
-# Generated by CodiumAI
+import os
+import tempfile
 from pathlib import Path
 
 import numpy as np
 
-from vision_agent.tools.tools import save_video
+from vision_agent.tools.tools import save_image, save_video
 
 
-class TestSaveVideo:
-    def test_saves_frames_without_output_path(self):
-        frames = [
-            np.random.randint(0, 256, (480, 640, 3), dtype=np.uint8) for _ in range(10)
-        ]
-        output_path = save_video(frames)
-        assert Path(output_path).exists()
+def test_saves_frames_without_output_path():
+    frames = [
+        np.random.randint(0, 256, (480, 640, 3), dtype=np.uint8) for _ in range(10)
+    ]
+    output_path = save_video(frames)
+    assert Path(output_path).exists()
+    os.remove(output_path)
+
 
-    def test_saves_frames_with_output_path(self, tmp_path):
-        frames = [
-            np.random.randint(0, 256, (480, 640, 3), dtype=np.uint8) for _ in range(10)
-        ]
-        video_output_path = str(tmp_path / "output.mp4")
-        output_path = save_video(frames, video_output_path)
+def test_saves_frames_with_output_path():
+    frames = [
+        np.random.randint(0, 256, (480, 640, 3), dtype=np.uint8) for _ in range(10)
+    ]
 
-        assert output_path == video_output_path
+    with tempfile.TemporaryDirectory() as tmp_dir:
+        video_output_path = Path(tmp_dir) / "output.mp4"
+        output_path = save_video(frames, str(video_output_path))
+
+        assert output_path == str(video_output_path)
         assert Path(output_path).exists()
+
+
+def test_save_null_image():
+    image = None
+    try:
+        save_image(image, "tmp.jpg")
+    except ValueError as e:
+        assert str(e) == "The image is not a valid NumPy array with shape (H, W, C)"
+
+
+def test_save_empty_image():
+    image = np.zeros((0, 0, 3), dtype=np.uint8)
+    try:
+        save_image(image, "tmp.jpg")
+    except ValueError as e:
+        assert str(e) == "The image is not a valid NumPy array with shape (H, W, C)"
+
+
+def test_save_null_video():
+    frames = None
+    try:
+        save_video(frames, "tmp.mp4")
+    except ValueError as e:
+        assert str(e) == "Frames must be a list of NumPy arrays"
+
+
+def test_save_empty_list():
+    frames = []
+    try:
+        save_video(frames, "tmp.mp4")
+    except ValueError as e:
+        assert str(e) == "Frames must be a list of NumPy arrays"
+
+
+def test_save_invalid_frame():
+    frames = [np.zeros((0, 0, 3), dtype=np.uint8)]
+    try:
+        save_video(frames, "tmp.mp4")
+    except ValueError as e:
+        assert str(e) == "A frame is not a valid NumPy array with shape (H, W, C)"
diff --git a/vision_agent/agent/agent_utils.py b/vision_agent/agent/agent_utils.py
index dc0debee..624ad608 100644
--- a/vision_agent/agent/agent_utils.py
+++ b/vision_agent/agent/agent_utils.py
@@ -77,3 +77,9 @@ def extract_code(code: str) -> str:
     if code.startswith("python\n"):
         code = code[len("python\n") :]
     return code
+
+
+def remove_installs_from_code(code: str) -> str:
+    pattern = r"\n!pip install.*?(\n|\Z)\n"
+    code = re.sub(pattern, "", code, flags=re.DOTALL)
+    return code
diff --git a/vision_agent/agent/vision_agent.py b/vision_agent/agent/vision_agent.py
index 3c1682e8..c4e36156 100644
--- a/vision_agent/agent/vision_agent.py
+++ b/vision_agent/agent/vision_agent.py
@@ -407,8 +407,6 @@ def chat_with_code(
             code_interpreter.download_file(
                 str(remote_artifacts_path.name), str(self.local_artifacts_path)
             )
-            artifacts.load(self.local_artifacts_path)
-            artifacts.save()
         return orig_chat, artifacts
 
     def streaming_message(self, message: Dict[str, Any]) -> None:
diff --git a/vision_agent/agent/vision_agent_coder.py b/vision_agent/agent/vision_agent_coder.py
index aa4d83da..1e5030a2 100644
--- a/vision_agent/agent/vision_agent_coder.py
+++ b/vision_agent/agent/vision_agent_coder.py
@@ -13,7 +13,11 @@
 
 import vision_agent.tools as T
 from vision_agent.agent import Agent
-from vision_agent.agent.agent_utils import extract_code, extract_json
+from vision_agent.agent.agent_utils import (
+    extract_code,
+    extract_json,
+    remove_installs_from_code,
+)
 from vision_agent.agent.vision_agent_coder_prompts import (
     CODE,
     FIX_BUG,
@@ -836,8 +840,8 @@ def chat_with_workflow(
                 media=media_list,
             )
             success = cast(bool, results["success"])
-            code = cast(str, results["code"])
-            test = cast(str, results["test"])
+            code = remove_installs_from_code(cast(str, results["code"]))
+            test = remove_installs_from_code(cast(str, results["test"]))
             working_memory.extend(results["working_memory"])  # type: ignore
             plan.append({"code": code, "test": test, "plan": plan_i})
 
diff --git a/vision_agent/agent/vision_agent_prompts.py b/vision_agent/agent/vision_agent_prompts.py
index bc3295ef..a8b1e543 100644
--- a/vision_agent/agent/vision_agent_prompts.py
+++ b/vision_agent/agent/vision_agent_prompts.py
@@ -28,7 +28,8 @@
 1. **Understand and Clarify**: Make sure you understand the task, ask clarifying questions if the task is not clear.
 2. **Code Generation**: Only use code provided in the Documentation in your <execute_python> tags. Only use `edit_vision_code` to modify code written by `generate_vision_code`.
 3. **Execute**: Do only what the user asked you to do and no more. If you need to ask the user a question, set `let_user_respond` to `true`.
-4. **Output in JSON**: Respond in the following format in JSON:
+4. **Response**: Keep your responses short and concise. Provide the user only with the information they need to continue the conversation.
+5. **Output in JSON**: Respond in the following format in JSON:
 
 ```json
 {{"thoughts": <your thoughts>, "response": <your response to the user>, "let_user_respond": <a boolean whether or not to let the user respond>}}.
@@ -62,7 +63,7 @@
 [{'score': 0.99, 'label': 'dog', 'box': [0.1, 0.2, 0.3, 0.4]}, {'score': 0.23, 'label': 'dog', 'box': [0.2, 0.3, 0.4, 0.5]}]
 
 
-AGENT: {"thoughts": "Two dogs are detected, I will show this to the user and ask them if the result looks good.", "response": "I have written the code to detect dogs and shown the output, do the results look good to you?", "let_user_respond": true}
+AGENT: {"thoughts": "Two dogs are detected, I will show this to the user and ask them if the result looks good.", "response": "The code detectd two dogs, do the results look good to you?", "let_user_respond": true}
 """
 
 EXAMPLES_CODE1_EXTRA = """
@@ -91,7 +92,7 @@
 ----- stdout -----
 [{'score': 0.99, 'label': 'dog', 'box': [0.1, 0.2, 0.3, 0.4]}]
 
-AGENT: {"thoughts": "One dog is detected, I will show this to the user and ask them if the result looks good.", "response": "I have written the code to detect one dog and shown you the output, do the results look good to you?", "let_user_respond": true}
+AGENT: {"thoughts": "One dog is detected, I will show this to the user and ask them if the result looks good.", "response": "The code detected one dog, do these results look good to you?", "let_user_respond": true}
 """
 
 EXAMPLES_CODE2 = """
@@ -157,16 +158,16 @@
 ----- stdout -----
 2
 
-AGENT: {"thoughts": "Two workers with helmets are detected, I will show this to the user and ask them if the result looks good.", "response": "I have written the code to count the workers wearing helmets in code.py and saved the visualization under 'workers_viz.png'.", "let_user_respond": true}
+AGENT: {"thoughts": "Two workers with helmets are detected, I will show this to the user and ask them if the result looks good.", "response": "The code to detect workers with helmets is saved in code.py and the visualization under 'workers_viz.png'.", "let_user_respond": true}
 
 USER: The detections are slightly off. Can you fine tune florence2 using these labels? "[{'image_path': 'image1.png': 'labels': ['worker', 'helmet'], 'bboxes': [[235, 118, 294, 241], [232, 118, 294, 128]]}, {'image_path': 'image2.png': 'labels': ['worker', 'helmet'], 'bboxes': [[235, 118, 294, 241], [232, 118, 294, 128]]}]"
 
-AGENT: {"thoughts": "Because the user has supplied me with labels I can call object_detection_fine_tuning on their behalf to fine tune the model", "response": "I will fine tune florence2 with the labels you provided <execute_python>object_detection_fine_tuning([{'image_path': 'image1.png': 'labels': ['worker', 'helmet'], 'bboxes': [[235, 118, 294, 241], [232, 118, 294, 128]]}, {'image_path': 'image2.png': 'labels': ['worker', 'helmet'], 'bboxes': [[235, 118, 294, 241], [232, 118, 294, 128]]}])</execute_python>", "let_user_respond": false}
+AGENT: {"thoughts": "Because the user has supplied me with labels I can call object_detection_fine_tuning on their behalf to fine tune the model", "response": "<execute_python>object_detection_fine_tuning([{'image_path': 'image1.png': 'labels': ['worker', 'helmet'], 'bboxes': [[235, 118, 294, 241], [232, 118, 294, 128]]}, {'image_path': 'image2.png': 'labels': ['worker', 'helmet'], 'bboxes': [[235, 118, 294, 241], [232, 118, 294, 128]]}])</execute_python>", "let_user_respond": false}
 
 OBSERVATION:
 [Fine tuning id: 23b3b022-5ebf-4798-9373-20ef36429abf]
 
-AGENT: {"thoughts": "The model has finished fine tuning, I will now replace the original florence2_phrase_grounding call with the fine tuning id.", "response": "I will not update the code to use the fine tuned model. <execute_python>use_object_detection_fine_tuning(artifacts, "code.py", "23b3b022-5ebf-4798-9373-20ef36429abf")</execute_python>", "let_user_respond": false}
+AGENT: {"thoughts": "The model has finished fine tuning, I will now replace the original florence2_phrase_grounding call with the fine tuning id.", "response": "<execute_python>use_object_detection_fine_tuning(artifacts, "code.py", "23b3b022-5ebf-4798-9373-20ef36429abf")</execute_python>", "let_user_respond": false}
 
 OBSERVATION:
 [Artifact code.py edits]
diff --git a/vision_agent/tools/__init__.py b/vision_agent/tools/__init__.py
index e5b7c334..da74f677 100644
--- a/vision_agent/tools/__init__.py
+++ b/vision_agent/tools/__init__.py
@@ -45,7 +45,6 @@
     loca_zero_shot_counting,
     ocr,
     overlay_bounding_boxes,
-    overlay_counting_results,
     overlay_heat_map,
     overlay_segmentation_masks,
     owl_v2_image,
diff --git a/vision_agent/tools/meta_tools.py b/vision_agent/tools/meta_tools.py
index 976addae..dc910300 100644
--- a/vision_agent/tools/meta_tools.py
+++ b/vision_agent/tools/meta_tools.py
@@ -116,7 +116,9 @@ def show(self, uploaded_file_path: Optional[Union[str, Path]] = None) -> str:
         )
         output_str = "[Artifacts loaded]\n"
         for k in self.artifacts.keys():
-            output_str += f"Artifact {k} loaded to {str(loaded_path / k)}\n"
+            output_str += (
+                f"Artifact name: {k}, loaded to path: {str(loaded_path / k)}\n"
+            )
         output_str += "[End of artifacts]\n"
         print(output_str)
         return output_str
diff --git a/vision_agent/tools/tool_utils.py b/vision_agent/tools/tool_utils.py
index 924b96e6..b35d6fef 100644
--- a/vision_agent/tools/tool_utils.py
+++ b/vision_agent/tools/tool_utils.py
@@ -1,6 +1,6 @@
-import os
 import inspect
 import logging
+import os
 from base64 import b64encode
 from typing import Any, Callable, Dict, List, MutableMapping, Optional, Tuple
 
diff --git a/vision_agent/tools/tools.py b/vision_agent/tools/tools.py
index b33df8ec..71646c45 100644
--- a/vision_agent/tools/tools.py
+++ b/vision_agent/tools/tools.py
@@ -13,7 +13,7 @@
 import cv2
 import numpy as np
 import requests
-from PIL import Image, ImageDraw, ImageEnhance, ImageFont
+from PIL import Image, ImageDraw, ImageFont
 from pillow_heif import register_heif_opener  # type: ignore
 from pytube import YouTube  # type: ignore
 
@@ -1150,10 +1150,10 @@ def florence2_image_caption(image: np.ndarray, detail_caption: bool = True) -> s
 def florence2_phrase_grounding(
     prompt: str, image: np.ndarray, fine_tune_id: Optional[str] = None
 ) -> List[Dict[str, Any]]:
-    """'florence2_phrase_grounding' will run florence2 on a image. It can
-    detect multiple objects given a text prompt which can be object names or caption.
-    You can optionally separate the object names in the text with commas. It returns
-    a list of bounding boxes with normalized coordinates, label names and associated
+    """'florence2_phrase_grounding' is a tool that can detect multiple
+    objects given a text prompt which can be object names or caption. You
+    can optionally separate the object names in the text with commas. It returns a list
+    of bounding boxes with normalized coordinates, label names and associated
     probability scores of 1.0.
 
     Parameters:
@@ -1812,6 +1812,11 @@ def save_image(image: np.ndarray, file_path: str) -> None:
     """
     from IPython.display import display
 
+    if not isinstance(image, np.ndarray) or (
+        image.shape[0] == 0 and image.shape[1] == 0
+    ):
+        raise ValueError("The image is not a valid NumPy array with shape (H, W, C)")
+
     pil_image = Image.fromarray(image.astype(np.uint8)).convert("RGB")
     display(pil_image)
     pil_image.save(file_path)
@@ -1838,6 +1843,15 @@ def save_video(
     if fps <= 0:
         raise ValueError(f"fps must be greater than 0 got {fps}")
 
+    if not isinstance(frames, list) or len(frames) == 0:
+        raise ValueError("Frames must be a list of NumPy arrays")
+
+    for frame in frames:
+        if not isinstance(frame, np.ndarray) or (
+            frame.shape[0] == 0 and frame.shape[1] == 0
+        ):
+            raise ValueError("A frame is not a valid NumPy array with shape (H, W, C)")
+
     if output_video_path is None:
         output_video_path = tempfile.NamedTemporaryFile(
             delete=False, suffix=".mp4"
@@ -1907,30 +1921,36 @@ def overlay_bounding_boxes(
         bboxes = bbox_int[i]
         bboxes = sorted(bboxes, key=lambda x: x["label"], reverse=True)
 
-        width, height = pil_image.size
-        fontsize = max(12, int(min(width, height) / 40))
-        draw = ImageDraw.Draw(pil_image)
-        font = ImageFont.truetype(
-            str(
-                resources.files("vision_agent.fonts").joinpath("default_font_ch_en.ttf")
-            ),
-            fontsize,
-        )
-
-        for elt in bboxes:
-            label = elt["label"]
-            box = elt["bbox"]
-            scores = elt["score"]
-
-            # denormalize the box if it is normalized
-            box = denormalize_bbox(box, (height, width))
-            draw.rectangle(box, outline=color[label], width=4)
-            text = f"{label}: {scores:.2f}"
-            text_box = draw.textbbox((box[0], box[1]), text=text, font=font)
-            draw.rectangle(
-                (box[0], box[1], text_box[2], text_box[3]), fill=color[label]
+        if len(bboxes) > 20:
+            pil_image = _plot_counting(pil_image, bboxes, color)
+        else:
+            width, height = pil_image.size
+            fontsize = max(12, int(min(width, height) / 40))
+            draw = ImageDraw.Draw(pil_image)
+            font = ImageFont.truetype(
+                str(
+                    resources.files("vision_agent.fonts").joinpath(
+                        "default_font_ch_en.ttf"
+                    )
+                ),
+                fontsize,
             )
-            draw.text((box[0], box[1]), text, fill="black", font=font)
+
+            for elt in bboxes:
+                label = elt["label"]
+                box = elt["bbox"]
+                scores = elt["score"]
+
+                # denormalize the box if it is normalized
+                box = denormalize_bbox(box, (height, width))
+                draw.rectangle(box, outline=color[label], width=4)
+                text = f"{label}: {scores:.2f}"
+                text_box = draw.textbbox((box[0], box[1]), text=text, font=font)
+                draw.rectangle(
+                    (box[0], box[1], text_box[2], text_box[3]), fill=color[label]
+                )
+                draw.text((box[0], box[1]), text, fill="black", font=font)
+
         frame_out.append(np.array(pil_image))
     return frame_out[0] if len(frame_out) == 1 else frame_out
 
@@ -2089,39 +2109,19 @@ def overlay_heat_map(
     return np.array(combined)
 
 
-def overlay_counting_results(
-    image: np.ndarray, instances: List[Dict[str, Any]]
-) -> np.ndarray:
-    """'overlay_counting_results' is a utility function that displays counting results on
-    an image.
-
-    Parameters:
-        image (np.ndarray): The image to display the bounding boxes on.
-        instances (List[Dict[str, Any]]): A list of dictionaries containing the bounding
-            box information of each instance
-
-    Returns:
-        np.ndarray: The image with the instance_id dislpayed
-
-    Example
-    -------
-        >>> image_with_bboxes = overlay_counting_results(
-            image, [{'score': 0.99, 'label': 'object', 'bbox': [0.1, 0.11, 0.35, 0.4]}],
-        )
-    """
-    pil_image = Image.fromarray(image.astype(np.uint8)).convert("RGB")
-    color = (158, 218, 229)
-
-    width, height = pil_image.size
+def _plot_counting(
+    image: Image.Image,
+    bboxes: List[Dict[str, Any]],
+    colors: Dict[str, Tuple[int, int, int]],
+) -> Image.Image:
+    width, height = image.size
     fontsize = max(10, int(min(width, height) / 80))
-    pil_image = ImageEnhance.Brightness(pil_image).enhance(0.5)
-    draw = ImageDraw.Draw(pil_image)
+    draw = ImageDraw.Draw(image)
     font = ImageFont.truetype(
         str(resources.files("vision_agent.fonts").joinpath("default_font_ch_en.ttf")),
         fontsize,
     )
-
-    for i, elt in enumerate(instances, 1):
+    for i, elt in enumerate(bboxes, 1):
         label = f"{i}"
         box = elt["bbox"]
 
@@ -2143,7 +2143,7 @@ def overlay_counting_results(
         text_y1 = cy + text_height / 2
 
         # Draw the rectangle encapsulating the text
-        draw.rectangle((text_x0, text_y0, text_x1, text_y1), fill=color)
+        draw.rectangle((text_x0, text_y0, text_x1, text_y1), fill=colors[elt["label"]])
 
         # Draw the text at the center of the bounding box
         draw.text(
@@ -2154,7 +2154,7 @@ def overlay_counting_results(
             anchor="lt",
         )
 
-    return np.array(pil_image)
+    return image
 
 
 FUNCTION_TOOLS = [
@@ -2187,7 +2187,6 @@ def overlay_counting_results(
     overlay_bounding_boxes,
     overlay_segmentation_masks,
     overlay_heat_map,
-    overlay_counting_results,
 ]
 
 TOOLS = FUNCTION_TOOLS + UTIL_TOOLS
diff --git a/vision_agent/tools/tools_types.py b/vision_agent/tools/tools_types.py
index 1cc765b6..b2812fc0 100644
--- a/vision_agent/tools/tools_types.py
+++ b/vision_agent/tools/tools_types.py
@@ -1,6 +1,6 @@
 from enum import Enum
-from uuid import UUID
 from typing import List, Optional, Tuple, Union
+from uuid import UUID
 
 from pydantic import BaseModel, ConfigDict, Field, SerializationInfo, field_serializer