Added Ollama support in transform function (#106)

destrex271 · web-flow · commit 1e194c7655c0 · 2024-06-24T10:25:07.000-05:00
* Embeddings generation

* Ollama Embeddings generator added

Embeddings can also be generated using Ollama models using the transform function.

* Merged with main and removed ollama insertion in backwards compatibility block

* Updated search.rs with main

* Resolved conflicts

* Updated branch with main

* Fixed formatting

* fix pgmq install

* fmt

* fix path for pgmq install
diff --git a/core/src/transformers/ollama.rs b/core/src/transformers/ollama.rs
@@ -2,6 +2,8 @@ use anyhow::Result;
 use ollama_rs::{generation::completion::request::GenerationRequest, Ollama};
 use url::Url;
 
+use super::types::EmbeddingRequest;
+
 pub struct OllamaInstance {
     pub model_name: String,
     pub instance: Ollama,
@@ -11,6 +13,8 @@ pub trait LLMFunctions {
     fn new(model_name: String, url: String) -> Self;
     #[allow(async_fn_in_trait)]
     async fn generate_reponse(&self, prompt_text: String) -> Result<String, String>;
+    #[allow(async_fn_in_trait)]
+    async fn generate_embedding(&self, inputs: String) -> Result<Vec<f64>, String>;
 }
 
 impl LLMFunctions for OllamaInstance {
@@ -38,6 +42,16 @@ impl LLMFunctions for OllamaInstance {
             Err(e) => Err(e.to_string()),
         }
     }
+    async fn generate_embedding(&self, input: String) -> Result<Vec<f64>, String> {
+        let embed = self
+            .instance
+            .generate_embeddings(self.model_name.clone(), input, None)
+            .await;
+        match embed {
+            Ok(res) => Ok(res.embeddings),
+            Err(e) => Err(e.to_string()),
+        }
+    }
 }
 
 pub fn ollama_embedding_dim(model_name: &str) -> i32 {
@@ -46,3 +60,41 @@ pub fn ollama_embedding_dim(model_name: &str) -> i32 {
         _ => 1536,
     }
 }
+
+pub fn check_model_host(url: &str) -> Result<String, String> {
+    let runtime = tokio::runtime::Builder::new_current_thread()
+        .enable_io()
+        .enable_time()
+        .build()
+        .unwrap_or_else(|e| panic!("failed to initialize tokio runtime: {}", e));
+
+    runtime.block_on(async {
+        let response = reqwest::get(url).await.unwrap();
+        match response.status() {
+            reqwest::StatusCode::OK => Ok(format!("Success! {:?}", response)),
+            _ => Err(format!("Error! {:?}", response)),
+        }
+    })
+}
+
+pub fn generate_embeddings(request: EmbeddingRequest) -> Result<Vec<Vec<f64>>> {
+    let runtime = tokio::runtime::Builder::new_current_thread()
+        .enable_io()
+        .enable_time()
+        .build()
+        .unwrap_or_else(|e| panic!("failed to initialize tokio runtime: {}", e));
+
+    runtime.block_on(async {
+        let instance = OllamaInstance::new(request.payload.model, request.url);
+        let mut embeddings: Vec<Vec<f64>> = vec![];
+        for input in request.payload.input {
+            let response = instance.generate_embedding(input).await;
+            let embedding = match response {
+                Ok(embed) => embed,
+                Err(e) => panic!("Unable to generate embeddings.\nError: {:?}", e),
+            };
+            embeddings.push(embedding);
+        }
+        Ok(embeddings)
+    })
+}
diff --git a/core/src/types.rs b/core/src/types.rs
@@ -1,4 +1,5 @@
 use chrono::serde::ts_seconds_option::deserialize as from_tsopt;
+
 use serde::{Deserialize, Serialize};
 use sqlx::types::chrono::Utc;
 use sqlx::FromRow;
@@ -168,10 +169,12 @@ pub enum ModelError {
 impl Model {
     pub fn new(input: &str) -> Result<Self, ModelError> {
         let mut parts: Vec<&str> = input.split('/').collect();
+
         let missing_source = parts.len() < 2;
         if parts.len() > 3 {
             return Err(ModelError::InvalidFormat(input.to_string()));
         }
+
         if missing_source && parts[0] == "text-embedding-ada-002" {
             // for backwards compatibility, prepend "openai" to text-embedding-ada-2
             parts.insert(0, "openai");
diff --git a/extension/Makefile b/extension/Makefile
@@ -73,8 +73,7 @@ install-pgvector:
 
 install-pgmq:
 	git clone https://github.com/tembo-io/pgmq.git && \
-	cd pgmq && \
-	PG_CONFIG=${PGRX_PG_CONFIG} make clean && \
+	cd pgmq/pgmq-extension && \
 	PG_CONFIG=${PGRX_PG_CONFIG} make && \
 	PG_CONFIG=${PGRX_PG_CONFIG} make install && \
 	cd .. && rm -rf pgmq
diff --git a/extension/src/search.rs b/extension/src/search.rs
@@ -8,6 +8,7 @@ use crate::util;
 
 use anyhow::{Context, Result};
 use pgrx::prelude::*;
+use vectorize_core::transformers::ollama::check_model_host;
 use vectorize_core::types::{self, Model, ModelSource, TableMethod, VectorizeMeta};
 
 #[allow(clippy::too_many_arguments)]
@@ -69,9 +70,26 @@ pub fn init_table(
             sync_get_model_info(&transformer.fullname, api_key.clone())
                 .context("transformer does not exist")?;
         }
-        ModelSource::Ollama | ModelSource::Tembo => {
+        ModelSource::Tembo => {
             error!("Ollama/Tembo not implemented for search yet");
         }
+        ModelSource::Ollama => {
+            let url = match guc::get_guc(guc::VectorizeGuc::OllamaServiceUrl) {
+                Some(k) => k,
+                None => {
+                    error!("failed to get Ollama url from GUC");
+                }
+            };
+            let res = check_model_host(&url);
+            match res {
+                Ok(_) => {
+                    info!("Model host active!")
+                }
+                Err(e) => {
+                    error!("Error with model host: {:?}", e)
+                }
+            }
+        }
     }
 
     let valid_params = types::JobParams {
diff --git a/extension/src/transformers/mod.rs b/extension/src/transformers/mod.rs
@@ -7,6 +7,7 @@ use generic::get_env_interpolated_guc;
 use pgrx::prelude::*;
 
 use vectorize_core::transformers::http_handler::openai_embedding_request;
+use vectorize_core::transformers::ollama::generate_embeddings;
 use vectorize_core::transformers::openai::OPENAI_BASE_URL;
 use vectorize_core::transformers::types::{EmbeddingPayload, EmbeddingRequest};
 use vectorize_core::types::{Model, ModelSource};
@@ -61,14 +62,38 @@ pub fn transform(input: &str, transformer: &Model, api_key: Option<String>) -> V
                 api_key: api_key.map(|s| s.to_string()),
             }
         }
-        ModelSource::Ollama => error!("Ollama transformer not implemented yet"),
+        ModelSource::Ollama => {
+            let url = match guc::get_guc(guc::VectorizeGuc::OllamaServiceUrl) {
+                Some(k) => k,
+                None => {
+                    error!("failed to get Ollama url from GUC");
+                }
+            };
+
+            let embedding_request = EmbeddingPayload {
+                input: vec![input.to_string()],
+                model: transformer.name.to_string(),
+            };
+
+            EmbeddingRequest {
+                url,
+                payload: embedding_request,
+                api_key: None,
+            }
+        }
     };
     let timeout = EMBEDDING_REQ_TIMEOUT_SEC.get();
 
     match transformer.source {
-        ModelSource::Ollama | ModelSource::Tembo => {
-            error!("Ollama/Tembo transformer not implemented yet")
+        ModelSource::Ollama => {
+            // Call the embeddings generation function
+            let embeddings = generate_embeddings(embedding_request);
+            match embeddings {
+                Ok(k) => k,
+                Err(e) => error!("error getting embeddings: {}", e),
+            }
         }
+
         ModelSource::OpenAI | ModelSource::SentenceTransformers => {
             match runtime
                 .block_on(async { openai_embedding_request(embedding_request, timeout).await })
@@ -79,5 +104,9 @@ pub fn transform(input: &str, transformer: &Model, api_key: Option<String>) -> V
                 }
             }
         }
+
+        ModelSource::Tembo => {
+            error!("Embeddings support not added for Tembo yet!")
+        }
     }
 }