diff --git a/README.md b/README.md
index a5f24480..fda69986 100644
--- a/README.md
+++ b/README.md
@@ -168,20 +168,18 @@ result = agent.chat_with_workflow(conv)
 
 ### Tools
 There are a variety of tools for the model or the user to use. Some are executed locally
-while others are hosted for you. You can also ask an LMM directly to build a tool for
-you. For example:
+while others are hosted for you. You can easily access them yourself, for example if
+you want to run `owl_v2` and visualize the output you can run:
 
 ```python
->>> import vision_agent as va
->>> lmm = va.lmm.OpenAILMM()
->>> detector = lmm.generate_detector("Can you build a jar detector for me?")
->>> detector(va.tools.load_image("jar.jpg"))
-[{"labels": ["jar",],
-  "scores": [0.99],
-  "bboxes": [
-    [0.58, 0.2, 0.72, 0.45],
-  ]
-}]
+import vision_agent.tools as T
+import matplotlib.pyplot as plt
+
+image = T.load_image("dogs.jpg")
+dets = T.owl_v2("dogs", image)
+viz = T.overlay_bounding_boxes(image, dets)
+plt.imshow(viz)
+plt.show()
 ```
 
 You can also add custom tools to the agent:
@@ -214,6 +212,41 @@ function. Make sure the documentation is in the same format above with descripti
 `Parameters:`, `Returns:`, and `Example\n-------`. You can find an example use case
 [here](examples/custom_tools/) as this is what the agent uses to pick and use the tool.
 
+## Additional LLMs
+### Ollama
+We also provide a `VisionAgentCoder` that uses Ollama. To get started you must download
+a few models:
+
+```bash
+ollama pull llama3.1
+ollama pull mxbai-embed-large
+```
+
+`llama3.1` is used for the `OllamaLMM` for `OllamaVisionAgentCoder`. Normally we would
+use an actual LMM such as `llava` but `llava` cannot handle the long context lengths
+required by the agent. Since `llama3.1` cannot handle images you may see some
+performance degredation. `mxbai-embed-large` is the embedding model used to look up
+tools. You can use it just like you would use `VisionAgentCoder`:
+
+```python
+>>> import vision_agent as va
+>>> agent = va.agent.OllamaVisionAgentCoder()
+>>> agent("Count the apples in the image", media="apples.jpg")
+```
+> WARNING: VisionAgent doesn't work well unless the underlying LMM is sufficiently powerful. Do not expect good results or even working code with smaller models like Llama 3.1 8B.
+
+### Azure OpenAI
+We also provide a `AzureVisionAgentCoder` that uses Azure OpenAI models. To get started
+follow the Azure Setup section below. You can use it just like you would use=
+`VisionAgentCoder`:
+
+```python
+>>> import vision_agent as va
+>>> agent = va.agent.AzureVisionAgentCoder()
+>>> agent("Count the apples in the image", media="apples.jpg")
+```
+
+
 ### Azure Setup
 If you want to use Azure OpenAI models, you need to have two OpenAI model deployments:
 
@@ -252,6 +285,6 @@ agent = va.agent.AzureVisionAgentCoder()
 2. Follow the instructions to purchase and manage your API credits.
 3. Ensure your API key is correctly configured in your project settings.
 
-Failure to have sufficient API credits may result in limited or no functionality for the features that rely on the OpenAI API.
-
-For more details on managing your API usage and credits, please refer to the OpenAI API documentation.
+Failure to have sufficient API credits may result in limited or no functionality for
+the features that rely on the OpenAI API. For more details on managing your API usage
+and credits, please refer to the OpenAI API documentation.
diff --git a/docs/index.md b/docs/index.md
index d0ed8178..fc5ddde1 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -1,4 +1,9 @@
 # 🔍🤖 Vision Agent
+[![](https://dcbadge.vercel.app/api/server/wPdN8RCYew?compact=true&style=flat)](https://discord.gg/wPdN8RCYew)
+![ci_status](https://github.com/landing-ai/vision-agent/actions/workflows/ci_cd.yml/badge.svg)
+[![PyPI version](https://badge.fury.io/py/vision-agent.svg)](https://badge.fury.io/py/vision-agent)
+![version](https://img.shields.io/pypi/pyversions/vision-agent)
+</div>
 
 Vision Agent is a library that helps you utilize agent frameworks to generate code to
 solve your vision task. Many current vision problems can easily take hours or days to
@@ -160,20 +165,18 @@ result = agent.chat_with_workflow(conv)
 
 ### Tools
 There are a variety of tools for the model or the user to use. Some are executed locally
-while others are hosted for you. You can also ask an LMM directly to build a tool for
-you. For example:
+while others are hosted for you. You can easily access them yourself, for example if
+you want to run `owl_v2` and visualize the output you can run:
 
 ```python
->>> import vision_agent as va
->>> lmm = va.lmm.OpenAILMM()
->>> detector = lmm.generate_detector("Can you build a jar detector for me?")
->>> detector(va.tools.load_image("jar.jpg"))
-[{"labels": ["jar",],
-  "scores": [0.99],
-  "bboxes": [
-    [0.58, 0.2, 0.72, 0.45],
-  ]
-}]
+import vision_agent.tools as T
+import matplotlib.pyplot as plt
+
+image = T.load_image("dogs.jpg")
+dets = T.owl_v2("dogs", image)
+viz = T.overlay_bounding_boxes(image, dets)
+plt.imshow(viz)
+plt.show()
 ```
 
 You can also add custom tools to the agent:
@@ -206,6 +209,40 @@ function. Make sure the documentation is in the same format above with descripti
 `Parameters:`, `Returns:`, and `Example\n-------`. You can find an example use case
 [here](examples/custom_tools/) as this is what the agent uses to pick and use the tool.
 
+## Additional LLMs
+### Ollama
+We also provide a `VisionAgentCoder` that uses Ollama. To get started you must download
+a few models:
+
+```bash
+ollama pull llama3.1
+ollama pull mxbai-embed-large
+```
+
+`llama3.1` is used for the `OllamaLMM` for `OllamaVisionAgentCoder`. Normally we would
+use an actual LMM such as `llava` but `llava` cannot handle the long context lengths
+required by the agent. Since `llama3.1` cannot handle images you may see some
+performance degredation. `mxbai-embed-large` is the embedding model used to look up
+tools. You can use it just like you would use `VisionAgentCoder`:
+
+```python
+>>> import vision_agent as va
+>>> agent = va.agent.OllamaVisionAgentCoder()
+>>> agent("Count the apples in the image", media="apples.jpg")
+```
+
+### Azure OpenAI
+We also provide a `AzureVisionAgentCoder` that uses Azure OpenAI models. To get started
+follow the Azure Setup section below. You can use it just like you would use=
+`VisionAgentCoder`:
+
+```python
+>>> import vision_agent as va
+>>> agent = va.agent.AzureVisionAgentCoder()
+>>> agent("Count the apples in the image", media="apples.jpg")
+```
+> WARNING: VisionAgent doesn't work well unless the underlying LMM is sufficiently powerful. Do not expect good results or even working code with smaller models like Llama 3.1 8B.
+
 ### Azure Setup
 If you want to use Azure OpenAI models, you need to have two OpenAI model deployments:
 
@@ -244,6 +281,6 @@ agent = va.agent.AzureVisionAgentCoder()
 2. Follow the instructions to purchase and manage your API credits.
 3. Ensure your API key is correctly configured in your project settings.
 
-Failure to have sufficient API credits may result in limited or no functionality for the features that rely on the OpenAI API.
-
-For more details on managing your API usage and credits, please refer to the OpenAI API documentation.
+Failure to have sufficient API credits may result in limited or no functionality for
+the features that rely on the OpenAI API. For more details on managing your API usage
+and credits, please refer to the OpenAI API documentation.
diff --git a/docs/lmms.md b/docs/lmms.md
deleted file mode 100644
index 21c329e0..00000000
--- a/docs/lmms.md
+++ /dev/null
@@ -1,20 +0,0 @@
-### LMMs
-One of the problems of dealing with image data is it can be difficult to organize and
-search. For example, you might have a bunch of pictures of houses and want to count how
-many yellow houses you have, or how many houses with adobe roofs. The vision agent
-library uses LMMs to help create tags or descriptions of images to allow you to search
-over them, or use them in a database to carry out other operations.
-
-To get started, you can use an LMM to start generating text from images. The following
-code will use the LLaVA-1.6 34B model to generate a description of the image you pass it.
-
-```python
-import vision_agent as va
-
-model = va.lmm.get_lmm("llava")
-model.generate("Describe this image", "image.png")
->>> "A yellow house with a green lawn."
-```
-
-**WARNING** We are hosting the LLaVA-1.6 34B model, if it times out please wait ~3-5
-min for the server to warm up as it shuts down when usage is low.
diff --git a/tests/unit/fixtures.py b/tests/unit/fixtures.py
index ccad51e8..a56ebac6 100644
--- a/tests/unit/fixtures.py
+++ b/tests/unit/fixtures.py
@@ -31,3 +31,27 @@ def generator():
         mock_instance = mock.return_value
         mock_instance.chat.completions.create.return_value = mock_generate()
         yield mock_instance
+
+
+@pytest.fixture
+def generate_ollama_lmm_mock(request):
+    content = request.param
+
+    mock_resp = MagicMock()
+    mock_resp.status_code = 200
+    mock_resp.json.return_value = {"response": content}
+    with patch("vision_agent.lmm.lmm.requests.post") as mock:
+        mock.return_value = mock_resp
+        yield mock
+
+
+@pytest.fixture
+def chat_ollama_lmm_mock(request):
+    content = request.param
+
+    mock_resp = MagicMock()
+    mock_resp.status_code = 200
+    mock_resp.json.return_value = {"message": {"content": content}}
+    with patch("vision_agent.lmm.lmm.requests.post") as mock:
+        mock.return_value = mock_resp
+        yield mock
diff --git a/tests/unit/test_lmm.py b/tests/unit/test_lmm.py
index 9cb43650..c954b173 100644
--- a/tests/unit/test_lmm.py
+++ b/tests/unit/test_lmm.py
@@ -1,3 +1,4 @@
+import json
 import tempfile
 from unittest.mock import patch
 
@@ -5,9 +6,13 @@
 import pytest
 from PIL import Image
 
-from vision_agent.lmm.lmm import OpenAILMM
+from vision_agent.lmm.lmm import OllamaLMM, OpenAILMM
 
-from .fixtures import openai_lmm_mock  # noqa: F401
+from .fixtures import (  # noqa: F401
+    chat_ollama_lmm_mock,
+    generate_ollama_lmm_mock,
+    openai_lmm_mock,
+)
 
 
 def create_temp_image(image_format="jpeg"):
@@ -135,6 +140,31 @@ def test_call_with_mock_stream(openai_lmm_mock):  # noqa: F811
     )
 
 
+@pytest.mark.parametrize(
+    "generate_ollama_lmm_mock",
+    ["mocked response"],
+    indirect=["generate_ollama_lmm_mock"],
+)
+def test_generate_ollama_mock(generate_ollama_lmm_mock):  # noqa: F811
+    temp_image = create_temp_image()
+    lmm = OllamaLMM()
+    response = lmm.generate("test prompt", media=[temp_image])
+    assert response == "mocked response"
+    call_args = json.loads(generate_ollama_lmm_mock.call_args.kwargs["data"])
+    assert call_args["prompt"] == "test prompt"
+
+
+@pytest.mark.parametrize(
+    "chat_ollama_lmm_mock", ["mocked response"], indirect=["chat_ollama_lmm_mock"]
+)
+def test_chat_ollama_mock(chat_ollama_lmm_mock):  # noqa: F811
+    lmm = OllamaLMM()
+    response = lmm.chat([{"role": "user", "content": "test prompt"}])
+    assert response == "mocked response"
+    call_args = json.loads(chat_ollama_lmm_mock.call_args.kwargs["data"])
+    assert call_args["messages"][0]["content"] == "test prompt"
+
+
 @pytest.mark.parametrize(
     "openai_lmm_mock",
     ['{"Parameters": {"prompt": "cat"}}'],
diff --git a/vision_agent/agent/__init__.py b/vision_agent/agent/__init__.py
index 79b6abae..2164d688 100644
--- a/vision_agent/agent/__init__.py
+++ b/vision_agent/agent/__init__.py
@@ -1,3 +1,7 @@
 from .agent import Agent
 from .vision_agent import VisionAgent
-from .vision_agent_coder import AzureVisionAgentCoder, VisionAgentCoder
+from .vision_agent_coder import (
+    AzureVisionAgentCoder,
+    OllamaVisionAgentCoder,
+    VisionAgentCoder,
+)
diff --git a/vision_agent/agent/agent_utils.py b/vision_agent/agent/agent_utils.py
index 5d55e963..eb951ccc 100644
--- a/vision_agent/agent/agent_utils.py
+++ b/vision_agent/agent/agent_utils.py
@@ -1,9 +1,24 @@
 import json
 import logging
+import re
 import sys
-from typing import Any, Dict
+from typing import Any, Dict, Optional
 
 logging.basicConfig(stream=sys.stdout)
+_LOGGER = logging.getLogger(__name__)
+
+
+def _extract_sub_json(json_str: str) -> Optional[Dict[str, Any]]:
+    json_pattern = r"\{.*\}"
+    match = re.search(json_pattern, json_str, re.DOTALL)
+    if match:
+        json_str = match.group()
+        try:
+            json_dict = json.loads(json_str)
+            return json_dict  # type: ignore
+        except json.JSONDecodeError:
+            return None
+    return None
 
 
 def extract_json(json_str: str) -> Dict[str, Any]:
@@ -18,8 +33,16 @@ def extract_json(json_str: str) -> Dict[str, Any]:
             json_str = json_str[json_str.find("```") + len("```") :]
             # get the last ``` not one from an intermediate string
             json_str = json_str[: json_str.find("}```")]
+        try:
+            json_dict = json.loads(json_str)
+        except json.JSONDecodeError as e:
+            json_dict = _extract_sub_json(json_str)
+            if json_dict is not None:
+                return json_dict  # type: ignore
+            error_msg = f"Could not extract JSON from the given str: {json_str}"
+            _LOGGER.exception(error_msg)
+            raise ValueError(error_msg) from e
 
-        json_dict = json.loads(json_str)
     return json_dict  # type: ignore
 
 
diff --git a/vision_agent/agent/vision_agent_coder.py b/vision_agent/agent/vision_agent_coder.py
index 6bba2905..b10988c6 100644
--- a/vision_agent/agent/vision_agent_coder.py
+++ b/vision_agent/agent/vision_agent_coder.py
@@ -28,11 +28,11 @@
     TEST_PLANS,
     USER_REQ,
 )
-from vision_agent.lmm import LMM, AzureOpenAILMM, Message, OpenAILMM
+from vision_agent.lmm import LMM, AzureOpenAILMM, Message, OllamaLMM, OpenAILMM
 from vision_agent.utils import CodeInterpreterFactory, Execution
 from vision_agent.utils.execute import CodeInterpreter
 from vision_agent.utils.image_utils import b64_to_pil
-from vision_agent.utils.sim import AzureSim, Sim
+from vision_agent.utils.sim import AzureSim, OllamaSim, Sim
 from vision_agent.utils.video import play_video
 
 logging.basicConfig(stream=sys.stdout)
@@ -267,7 +267,11 @@ def pick_plan(
             pass
         count += 1
 
-    if best_plan is None:
+    if (
+        best_plan is None
+        or "best_plan" not in best_plan
+        or ("best_plan" in best_plan and best_plan["best_plan"] not in plans)
+    ):
         best_plan = {"best_plan": list(plans.keys())[0]}
 
     if verbosity >= 1:
@@ -589,8 +593,8 @@ class VisionAgentCoder(Agent):
 
     Example
     -------
-        >>> from vision_agent.agent import VisionAgentCoder
-        >>> agent = VisionAgentCoder()
+        >>> import vision_agent as va
+        >>> agent = va.agent.VisionAgentCoder()
         >>> code = agent("What percentage of the area of the jar is filled with coffee beans?", media="jar.jpg")
     """
 
@@ -857,6 +861,64 @@ def log_progress(self, data: Dict[str, Any]) -> None:
             self.report_progress_callback(data)
 
 
+class OllamaVisionAgentCoder(VisionAgentCoder):
+    """VisionAgentCoder that uses Ollama models for planning, coding, testing.
+
+    Pre-requisites:
+    1. Run ollama pull llama3.1 for the LLM
+    2. Run ollama pull mxbai-embed-large for the embedding similarity model
+
+    Technically you should use a VLM such as llava but llava is not able to handle the
+    context length and crashes.
+
+    Example
+    -------
+        >>> image vision_agent as va
+        >>> agent = va.agent.OllamaVisionAgentCoder()
+        >>> code = agent("What percentage of the area of the jar is filled with coffee beans?", media="jar.jpg")
+    """
+
+    def __init__(
+        self,
+        planner: Optional[LMM] = None,
+        coder: Optional[LMM] = None,
+        tester: Optional[LMM] = None,
+        debugger: Optional[LMM] = None,
+        tool_recommender: Optional[Sim] = None,
+        verbosity: int = 0,
+        report_progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None,
+    ) -> None:
+        super().__init__(
+            planner=(
+                OllamaLMM(model_name="llama3.1", temperature=0.0, json_mode=True)
+                if planner is None
+                else planner
+            ),
+            coder=(
+                OllamaLMM(model_name="llama3.1", temperature=0.0)
+                if coder is None
+                else coder
+            ),
+            tester=(
+                OllamaLMM(model_name="llama3.1", temperature=0.0)
+                if tester is None
+                else tester
+            ),
+            debugger=(
+                OllamaLMM(model_name="llama3.1", temperature=0.0, json_mode=True)
+                if debugger is None
+                else debugger
+            ),
+            tool_recommender=(
+                OllamaSim(T.TOOLS_DF, sim_key="desc")
+                if tool_recommender is None
+                else tool_recommender
+            ),
+            verbosity=verbosity,
+            report_progress_callback=report_progress_callback,
+        )
+
+
 class AzureVisionAgentCoder(VisionAgentCoder):
     """VisionAgentCoder that uses Azure OpenAI APIs for planning, coding, testing.
 
@@ -866,8 +928,8 @@ class AzureVisionAgentCoder(VisionAgentCoder):
 
     Example
     -------
-        >>> from vision_agent import AzureVisionAgentCoder
-        >>> agent = AzureVisionAgentCoder()
+        >>> import vision_agent as va
+        >>> agent = va.agent.AzureVisionAgentCoder()
         >>> code = agent("What percentage of the area of the jar is filled with coffee beans?", media="jar.jpg")
     """
 
diff --git a/vision_agent/lmm/lmm.py b/vision_agent/lmm/lmm.py
index 9a8c5bf1..e78a0593 100644
--- a/vision_agent/lmm/lmm.py
+++ b/vision_agent/lmm/lmm.py
@@ -330,12 +330,28 @@ def __init__(
         model_name: str = "llava",
         base_url: Optional[str] = "http://localhost:11434/api",
         json_mode: bool = False,
+        num_ctx: int = 128_000,
         **kwargs: Any,
     ):
+        """Initializes the Ollama LMM. kwargs are passed as 'options' to the model.
+        More information on options can be found here
+        https://github.com/ollama/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values
+
+        Parameters:
+            model_name (str): The ollama name of the model.
+            base_url (str): The base URL of the Ollama API.
+            json_mode (bool): Whether to use JSON mode.
+            num_ctx (int): The context length for the model.
+            kwargs (Any): Additional options to pass to the model.
+        """
+
         self.url = base_url
         self.model_name = model_name
-        self.json_mode = json_mode
-        self.kwargs = kwargs
+        self.kwargs = {"options": kwargs}
+
+        if json_mode:
+            self.kwargs["format"] = "json"  # type: ignore
+        self.kwargs["options"]["num_cxt"] = num_ctx
 
     def __call__(
         self,
@@ -369,13 +385,14 @@ def chat(
         url = f"{self.url}/chat"
         model = self.model_name
         messages = fixed_chat
-        data = {"model": model, "messages": messages}
+        data: Dict[str, Any] = {"model": model, "messages": messages}
 
         tmp_kwargs = self.kwargs | kwargs
         data.update(tmp_kwargs)
-        json_data = json.dumps(data)
         if "stream" in tmp_kwargs and tmp_kwargs["stream"]:
 
+            json_data = json.dumps(data)
+
             def f() -> Iterator[Optional[str]]:
                 with requests.post(url, data=json_data, stream=True) as stream:
                     if stream.status_code != 200:
@@ -392,13 +409,14 @@ def f() -> Iterator[Optional[str]]:
 
             return f()
         else:
-            stream = requests.post(url, data=json_data)
-            if stream.status_code != 200:
-                raise ValueError(
-                    f"Request failed with status code {stream.status_code}"
-                )
-            stream = stream.json()
-            return stream["message"]["content"]  # type: ignore
+            data["stream"] = False
+            json_data = json.dumps(data)
+            resp = requests.post(url, data=json_data)
+
+            if resp.status_code != 200:
+                raise ValueError(f"Request failed with status code {resp.status_code}")
+            resp = resp.json()
+            return resp["message"]["content"]  # type: ignore
 
     def generate(
         self,
@@ -408,7 +426,7 @@ def generate(
     ) -> Union[str, Iterator[Optional[str]]]:
 
         url = f"{self.url}/generate"
-        data = {
+        data: Dict[str, Any] = {
             "model": self.model_name,
             "prompt": prompt,
             "images": [],
@@ -416,13 +434,14 @@ def generate(
 
         if media and len(media) > 0:
             for m in media:
-                data["images"].append(encode_media(m))  # type: ignore
+                data["images"].append(encode_media(m))
 
         tmp_kwargs = self.kwargs | kwargs
         data.update(tmp_kwargs)
-        json_data = json.dumps(data)
         if "stream" in tmp_kwargs and tmp_kwargs["stream"]:
 
+            json_data = json.dumps(data)
+
             def f() -> Iterator[Optional[str]]:
                 with requests.post(url, data=json_data, stream=True) as stream:
                     if stream.status_code != 200:
@@ -439,15 +458,15 @@ def f() -> Iterator[Optional[str]]:
 
             return f()
         else:
-            stream = requests.post(url, data=json_data)
+            data["stream"] = False
+            json_data = json.dumps(data)
+            resp = requests.post(url, data=json_data)
 
-            if stream.status_code != 200:
-                raise ValueError(
-                    f"Request failed with status code {stream.status_code}"
-                )
+            if resp.status_code != 200:
+                raise ValueError(f"Request failed with status code {resp.status_code}")
 
-            stream = stream.json()
-            return stream["response"]  # type: ignore
+            resp = resp.json()
+            return resp["response"]  # type: ignore
 
 
 class ClaudeSonnetLMM(LMM):
diff --git a/vision_agent/utils/__init__.py b/vision_agent/utils/__init__.py
index b440db51..9a5a271a 100644
--- a/vision_agent/utils/__init__.py
+++ b/vision_agent/utils/__init__.py
@@ -6,5 +6,5 @@
     Logs,
     Result,
 )
-from .sim import AzureSim, Sim, load_sim, merge_sim
+from .sim import AzureSim, OllamaSim, Sim, load_sim, merge_sim
 from .video import extract_frames_from_video
diff --git a/vision_agent/utils/sim.py b/vision_agent/utils/sim.py
index c3b26403..5c89f700 100644
--- a/vision_agent/utils/sim.py
+++ b/vision_agent/utils/sim.py
@@ -1,20 +1,21 @@
 import os
 from functools import lru_cache
 from pathlib import Path
-from typing import Dict, List, Optional, Sequence, Union
+from typing import Callable, Dict, List, Optional, Sequence, Union
 
 import numpy as np
 import pandas as pd
-from openai import AzureOpenAI, Client, OpenAI
+import requests
+from openai import AzureOpenAI, OpenAI
 from scipy.spatial.distance import cosine  # type: ignore
 
 
 @lru_cache(maxsize=512)
 def get_embedding(
-    client: Client, text: str, model: str = "text-embedding-3-small"
+    emb_call: Callable[[List[str]], List[float]], text: str
 ) -> List[float]:
     text = text.replace("\n", " ")
-    return client.embeddings.create(input=[text], model=model).data[0].embedding
+    return emb_call([text])
 
 
 class Sim:
@@ -35,14 +36,19 @@ def __init__(
             model: str: The model to use for embeddings.
         """
         self.df = df
-        self.client = OpenAI(api_key=api_key)
+        client = OpenAI(api_key=api_key)
+        self.emb_call = (
+            lambda text: client.embeddings.create(input=text, model=model)
+            .data[0]
+            .embedding
+        )
         self.model = model
         if "embs" not in df.columns and sim_key is None:
             raise ValueError("key is required if no column 'embs' is present.")
 
         if sim_key is not None:
             self.df["embs"] = self.df[sim_key].apply(
-                lambda x: get_embedding(self.client, x, model=self.model)
+                lambda x: get_embedding(self.emb_call, x)
             )
 
     def save(self, sim_file: Union[str, Path]) -> None:
@@ -70,7 +76,7 @@ def top_k(
             Sequence[Dict]: The top k most similar items.
         """
 
-        embedding = get_embedding(self.client, query, model=self.model)
+        embedding = get_embedding(self.emb_call, query)
         self.df["sim"] = self.df.embs.apply(lambda x: 1 - cosine(x, embedding))
         res = self.df.sort_values("sim", ascending=False).head(k)
         if thresh is not None:
@@ -105,17 +111,51 @@ def __init__(
             )
 
         self.df = df
-        self.client = AzureOpenAI(
+        client = AzureOpenAI(
             api_key=api_key, api_version=api_version, azure_endpoint=azure_endpoint
         )
+        self.emb_call = (
+            lambda text: client.embeddings.create(input=text, model=model)
+            .data[0]
+            .embedding
+        )
 
         self.model = model
+        if "embs" not in df.columns and sim_key is None:
+            raise ValueError("key is required if no column 'embs' is present.")
+
+        if sim_key is not None:
+            self.df["embs"] = self.df[sim_key].apply(lambda x: get_embedding(client, x))
+
+
+class OllamaSim(Sim):
+    def __init__(
+        self,
+        df: pd.DataFrame,
+        sim_key: Optional[str] = None,
+        model_name: Optional[str] = None,
+        base_url: Optional[str] = None,
+    ) -> None:
+        self.df = df
+        if base_url is None:
+            base_url = "http://localhost:11434/api/embeddings"
+        if model_name is None:
+            model_name = "mxbai-embed-large"
+
+        def emb_call(text: List[str]) -> List[float]:
+            resp = requests.post(
+                base_url, json={"prompt": text[0], "model": model_name}
+            )
+            return resp.json()["embedding"]  # type: ignore
+
+        self.emb_call = emb_call
+
         if "embs" not in df.columns and sim_key is None:
             raise ValueError("key is required if no column 'embs' is present.")
 
         if sim_key is not None:
             self.df["embs"] = self.df[sim_key].apply(
-                lambda x: get_embedding(self.client, x, model=self.model)
+                lambda x: get_embedding(emb_call, x)
             )