1- use crate :: { query:: check_input, types, types:: TableMethod , types:: Transformer } ;
1+ use crate :: {
2+ query:: check_input,
3+ transformers:: { http_handler:: sync_get_model_info, types:: TransformerMetadata } ,
4+ types,
5+ types:: TableMethod ,
6+ } ;
27use pgrx:: prelude:: * ;
3- use std:: collections:: HashMap ;
48
59use anyhow:: { Context , Result } ;
6- use lazy_static:: lazy_static;
710
8- lazy_static ! {
9- // each model has its own job queue
10- // maintain the mapping of transformer to queue name here
11- pub static ref QUEUE_MAPPING : HashMap <Transformer , & ' static str > = {
12- let mut m = HashMap :: new( ) ;
13- m. insert( Transformer :: text_embedding_ada_002, "v_openai" ) ;
14- m. insert( Transformer :: all_MiniLM_L12_v2, "v_all_MiniLM_L12_v2" ) ;
15- m
16- } ;
17- }
11+ pub static VECTORIZE_QUEUE : & str = "vectorize_jobs" ;
1812
19- pub fn init_pgmq ( transformer : & Transformer ) -> Result < ( ) > {
20- let qname = QUEUE_MAPPING . get ( transformer) . expect ( "invalid transformer" ) ;
13+ pub fn init_pgmq ( ) -> Result < ( ) > {
2114 // check if queue already created:
2215 let queue_exists: bool = Spi :: get_one ( & format ! (
23- "SELECT EXISTS (SELECT 1 FROM pgmq.meta WHERE queue_name = '{qname }');" ,
16+ "SELECT EXISTS (SELECT 1 FROM pgmq.meta WHERE queue_name = '{VECTORIZE_QUEUE }');" ,
2417 ) ) ?
2518 . context ( "error checking if queue exists" ) ?;
2619 if queue_exists {
20+ info ! ( "queue already exists" ) ;
2721 return Ok ( ( ) ) ;
2822 } else {
23+ info ! ( "creating queue;" ) ;
2924 let ran: Result < _ , spi:: Error > = Spi :: connect ( |mut c| {
30- let _r = c. update ( & format ! ( "SELECT pgmq.create('{qname}');" ) , None , None ) ?;
25+ let _r = c. update (
26+ & format ! ( "SELECT pgmq.create('{VECTORIZE_QUEUE}');" ) ,
27+ None ,
28+ None ,
29+ ) ?;
3130 Ok ( ( ) )
3231 } ) ;
3332 if let Err ( e) = ran {
@@ -69,38 +68,30 @@ pub fn init_embedding_table_query(
6968 job_name : & str ,
7069 schema : & str ,
7170 table : & str ,
72- transformer : & types:: Transformer ,
73- search_alg : & types:: SimilarityAlg ,
71+ transformer : & str ,
7472 transform_method : & TableMethod ,
7573) -> Vec < String > {
76- // TODO: when adding support for other models, add the output dimension to the transformer attributes
77- // so that they can be read here, not hard-coded here below
78- // currently only supports the text-embedding-ada-002 embedding model - output dim 1536
79- // https://platform.openai.com/docs/guides/embeddings/what-are-embeddings
80-
8174 check_input ( job_name) . expect ( "invalid job name" ) ;
82- let col_type = match ( transformer, search_alg) {
83- // TODO: when adding support for other models, add the output dimension to the transformer attributes
84- // so that they can be read here, not hard-coded here below
85- // currently only supports the text-embedding-ada-002 embedding model - output dim 1536
75+ let col_type = match transformer {
8676 // https://platform.openai.com/docs/guides/embeddings/what-are-embeddings
87- (
88- types:: Transformer :: text_embedding_ada_002,
89- types:: SimilarityAlg :: pgv_cosine_similarity,
90- ) => "vector(1536)" ,
91- ( types:: Transformer :: all_MiniLM_L12_v2, types:: SimilarityAlg :: pgv_cosine_similarity) => {
92- "vector(384)"
77+ // for anything but OpenAI, first call info endpoint to get the embedding dim of the model
78+ "text-embedding-ada-002" => "vector(1536)" . to_owned ( ) ,
79+ _ => {
80+ let model_info: TransformerMetadata = sync_get_model_info ( transformer)
81+ . expect ( "failed to call vectorize.embedding_service_url" ) ;
82+ let dim = model_info. embedding_dimension ;
83+ format ! ( "vector({dim})" )
9384 }
9485 } ;
9586 match transform_method {
9687 TableMethod :: append => {
9788 vec ! [
98- append_embedding_column( job_name, schema, table, col_type) ,
89+ append_embedding_column( job_name, schema, table, & col_type) ,
9990 create_hnsw_cosine_index( job_name, schema, table) ,
10091 ]
10192 }
10293 TableMethod :: join => {
103- vec ! [ create_embedding_table( job_name, col_type) ]
94+ vec ! [ create_embedding_table( job_name, & col_type) ]
10495 }
10596 }
10697}
@@ -125,11 +116,6 @@ fn create_hnsw_cosine_index(job_name: &str, schema: &str, table: &str) -> String
125116}
126117
127118fn append_embedding_column ( job_name : & str , schema : & str , table : & str , col_type : & str ) -> String {
128- // TODO: when adding support for other models, add the output dimension to the transformer attributes
129- // so that they can be read here, not hard-coded here below
130- // currently only supports the text-embedding-ada-002 embedding model - output dim 1536
131- // https://platform.openai.com/docs/guides/embeddings/what-are-embeddings
132-
133119 check_input ( job_name) . expect ( "invalid job name" ) ;
134120 format ! (
135121 "
0 commit comments