diff --git a/vision_agent/agent/vision_agent.py b/vision_agent/agent/vision_agent.py
index 6714ba34..5b8116ae 100644
--- a/vision_agent/agent/vision_agent.py
+++ b/vision_agent/agent/vision_agent.py
@@ -10,11 +10,16 @@
 from vision_agent.agent.vision_agent_prompts import (
     EXAMPLES_CODE1,
     EXAMPLES_CODE2,
+    EXAMPLES_CODE3,
     VA_CODE,
 )
 from vision_agent.lmm import LMM, AnthropicLMM, Message, OpenAILMM
 from vision_agent.tools import META_TOOL_DOCSTRING, load_image, save_image
-from vision_agent.tools.meta_tools import Artifacts, use_extra_vision_agent_args
+from vision_agent.tools.meta_tools import (
+    Artifacts,
+    check_and_load_image,
+    use_extra_vision_agent_args,
+)
 from vision_agent.utils import CodeInterpreterFactory
 from vision_agent.utils.execute import CodeInterpreter, Execution
 
@@ -30,7 +35,7 @@ class BoilerplateCode:
     pre_code = [
         "from typing import *",
         "from vision_agent.utils.execute import CodeInterpreter",
-        "from vision_agent.tools.meta_tools import Artifacts, open_code_artifact, create_code_artifact, edit_code_artifact, get_tool_descriptions, generate_vision_code, edit_vision_code, write_media_artifact, florence2_fine_tuning, use_florence2_fine_tuning",
+        "from vision_agent.tools.meta_tools import Artifacts, open_code_artifact, create_code_artifact, edit_code_artifact, get_tool_descriptions, generate_vision_code, edit_vision_code, write_media_artifact, view_media_artifact, florence2_fine_tuning, use_florence2_fine_tuning",
         "artifacts = Artifacts('{remote_path}')",
         "artifacts.load('{remote_path}')",
     ]
@@ -68,10 +73,17 @@ def run_conversation(orch: LMM, chat: List[Message]) -> Dict[str, Any]:
 
     prompt = VA_CODE.format(
         documentation=META_TOOL_DOCSTRING,
-        examples=f"{EXAMPLES_CODE1}\n{EXAMPLES_CODE2}",
+        examples=f"{EXAMPLES_CODE1}\n{EXAMPLES_CODE2}\n{EXAMPLES_CODE3}",
         conversation=conversation,
     )
-    return extract_json(orch([{"role": "user", "content": prompt}], stream=False))  # type: ignore
+    message: Message = {"role": "user", "content": prompt}
+    if (
+        chat[-1]["role"] == "observation"
+        and "media" in chat[-1]
+        and len(chat[-1]["media"]) > 0  # type: ignore
+    ):
+        message["media"] = chat[-1]["media"]
+    return extract_json(orch([message], stream=False))  # type: ignore
 
 
 def run_code_action(
@@ -296,13 +308,22 @@ def chat_with_code(
                         code_action, code_interpreter, str(remote_artifacts_path)
                     )
 
+                    media_obs = check_and_load_image(code_action)
+
                     if self.verbosity >= 1:
                         _LOGGER.info(obs)
+
+                    chat_elt: Message = {"role": "observation", "content": obs}
+                    if media_obs and result.success:
+                        chat_elt["media"] = [
+                            Path(code_interpreter.remote_path) / media_ob
+                            for media_ob in media_obs
+                        ]
+
                     # don't add execution results to internal chat
-                    int_chat.append({"role": "observation", "content": obs})
-                    orig_chat.append(
-                        {"role": "observation", "content": obs, "execution": result}
-                    )
+                    int_chat.append(chat_elt)
+                    chat_elt["execution"] = result
+                    orig_chat.append(chat_elt)
                     self.streaming_message(
                         {
                             "role": "observation",
diff --git a/vision_agent/agent/vision_agent_prompts.py b/vision_agent/agent/vision_agent_prompts.py
index 7b0dd600..557283a7 100644
--- a/vision_agent/agent/vision_agent_prompts.py
+++ b/vision_agent/agent/vision_agent_prompts.py
@@ -18,19 +18,24 @@
 {examples}
 --- END EXAMPLES ---
 
-**Instructions**:
-1. **Understand and Clarify**: Make sure you understand the task, ask clarifying questions if the task is not clear.
-2. **Output in JSON**: Respond in JSON format, {{"thoughts": <your thoughts>, "response": <your response to the user>, "let_user_respond": <a boolean whether or not to let the user respond>}}.
-
 **Conversation**:
 Here is the current conversation so far:
 --- START CONVERSATION ---
 {conversation}
+--- END CONVERSATION ---
+
+**Instructions**:
+1. **Understand and Clarify**: Make sure you understand the task, ask clarifying questions if the task is not clear.
+2. **Output in JSON**: Respond in the following format in JSON:
+
+```json
+{{"thoughts": <your thoughts>, "response": <your response to the user>, "let_user_respond": <a boolean whether or not to let the user respond>}}.
+```
 """
 
 
 EXAMPLES_CODE1 = """
-USER: Can you detect the dogs in this image? Media name dog.jpg
+USER: Can you write code to detect the dogs in this image? Media name dog.jpg
 
 OBSERVATION:
 [Artifacts loaded]
@@ -61,6 +66,7 @@
 EXAMPLES_CODE1_EXTRA = """
 USER: The the image only has one dog, can you fix this?
 
+OBSERVATION:
 [Artifacts loaded]
 Artifact dog.jpg loaded to /path/to/images/dog.jpg
 Artifact dog_detector.py loaded to /path/to/code/dog_detector.py
@@ -86,8 +92,24 @@
 AGENT: {"thoughts": "One dog is detected, I will show this to the user and ask them if the result looks good.", "response": "I have written the code to detect one dog and shown you the output, do the results look good to you?", "let_user_respond": true}
 """
 
-
 EXAMPLES_CODE2 = """
+USER: Can you describe this image?
+
+OBSERVATION:
+[Artifacts loaded]
+Artifact image.jpg loaded to /path/to/images/image.jpg
+[End of artifacts]
+
+AGENT: {"thoughts": "The user hasn't asked me to write any code and the task is very simple so I will view the image and answer myself to respond to the user quickly.", "response": "<execute_python>view_media_artifacts('image.jpg')</execute_python>", "let_user_respond": false}
+
+OBSERVATION:
+[Image image.jpg displayed]
+
+AGENT: {"thoughts": "The image shows a cat and a dog sitting on the couch, I will tell the user and ask them if they need any other assistance.", "response": "The image contains a dog and a cat sitting on a couch. Can I help you with any other tasks?", "let_user_respond": true}
+"""
+
+
+EXAMPLES_CODE3 = """
 USER: Can you create a function to count workers with helmets, return the count and save a visaulization of the bounding boxes?
 
 OBSERVATION:
diff --git a/vision_agent/tools/meta_tools.py b/vision_agent/tools/meta_tools.py
index ccf98287..3ec227f8 100644
--- a/vision_agent/tools/meta_tools.py
+++ b/vision_agent/tools/meta_tools.py
@@ -437,6 +437,33 @@ def list_artifacts(artifacts: Artifacts) -> str:
     return output_str
 
 
+def check_and_load_image(code: str) -> List[str]:
+    if not code.strip():
+        return []
+
+    pattern = r"show_media_artifact\(\s*([^\)]+),\s*['\"]([^\)]+)['\"]\s*\)"
+    match = re.search(pattern, code)
+    if match:
+        name = match.group(2)
+        return [name]
+    return []
+
+
+def view_media_artifact(artifacts: Artifacts, name: str) -> str:
+    """Views the image artifact with the given name.
+
+    Parameters:
+        artifacts (Artifacts): The artifacts object to show the image from.
+        name (str): The name of the image artifact to show.
+    """
+    if name not in artifacts:
+        output_str = f"[Artifact {name} does not exist]"
+    else:
+        output_str = f"[Image {name} displayed]"
+    print(output_str)
+    return output_str
+
+
 def get_tool_descriptions() -> str:
     """Returns a description of all the tools that `generate_vision_code` has access to.
     Helpful for answering questions about what types of vision tasks you can do with
@@ -515,7 +542,7 @@ def use_extra_vision_agent_args(
     Returns:
         str: The edited code.
     """
-    generate_pattern = r"generate_vision_code\(\s*([^\)]+)\)"
+    generate_pattern = r"generate_vision_code\(\s*([^\)]+)\s*\)"
 
     def generate_replacer(match: re.Match) -> str:
         arg = match.group(1)
@@ -526,7 +553,7 @@ def generate_replacer(match: re.Match) -> str:
             out_str += ")"
         return out_str
 
-    edit_pattern = r"edit_vision_code\(\s*([^\)]+)\)"
+    edit_pattern = r"edit_vision_code\(\s*([^\)]+)\s*\)"
 
     def edit_replacer(match: re.Match) -> str:
         arg = match.group(1)
@@ -604,6 +631,7 @@ def replacer(match: re.Match) -> str:
         generate_vision_code,
         edit_vision_code,
         write_media_artifact,
+        view_media_artifact,
         florence2_fine_tuning,
         use_florence2_fine_tuning,
         list_artifacts,