Skip to content

Commit 019fa00

Browse files
authored
Merge pull request #17 from tembo-io/fix/missingEmbeds
add index
2 parents 0f3727c + e62418a commit 019fa00

5 files changed

Lines changed: 23 additions & 6 deletions

File tree

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "vectorize"
3-
version = "0.1.2"
3+
version = "0.2.0"
44
edition = "2021"
55
publish = false
66

Trunk.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ description = "The simplest implementation of LLM-backed vector search on Postgr
66
homepage = "https://github.com/tembo-io/pg_vectorize"
77
documentation = "https://github.com/tembo-io/pg_vectorize"
88
categories = ["orchestration", "machine_learning"]
9-
version = "0.1.2"
9+
version = "0.2.0"
1010

1111
[build]
1212
postgres_version = "15"

src/api.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,9 @@ fn table(
9797
);
9898

9999
let ran: Result<_, spi::Error> = Spi::connect(|mut c| {
100-
let _r = c.update(&init_embed_q, None, None)?;
100+
for q in init_embed_q {
101+
let _r = c.update(&q, None, None)?;
102+
}
101103
Ok(())
102104
});
103105
if let Err(e) = ran {

src/init.rs

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ pub fn init_embedding_table_query(
6565
transformer: &types::Transformer,
6666
search_alg: &types::SimilarityAlg,
6767
transform_method: &TableMethod,
68-
) -> String {
68+
) -> Vec<String> {
6969
// TODO: when adding support for other models, add the output dimension to the transformer attributes
7070
// so that they can be read here, not hard-coded here below
7171
// currently only supports the text-embedding-ada-002 embedding model - output dim 1536
@@ -80,8 +80,15 @@ pub fn init_embedding_table_query(
8080
(types::Transformer::openai, types::SimilarityAlg::pgv_cosine_similarity) => "vector(1536)",
8181
};
8282
match transform_method {
83-
TableMethod::append => append_embedding_column(job_name, schema, table, col_type),
84-
TableMethod::join => create_embedding_table(job_name, col_type),
83+
TableMethod::append => {
84+
vec![
85+
append_embedding_column(job_name, schema, table, col_type),
86+
create_hnsw_cosine_index(job_name, schema, table),
87+
]
88+
}
89+
TableMethod::join => {
90+
vec![create_embedding_table(job_name, col_type)]
91+
}
8592
}
8693
}
8794

@@ -97,6 +104,13 @@ fn create_embedding_table(job_name: &str, col_type: &str) -> String {
97104
)
98105
}
99106

107+
fn create_hnsw_cosine_index(job_name: &str, schema: &str, table: &str) -> String {
108+
format!(
109+
"CREATE INDEX IF NOT EXISTS {job_name}_idx ON {schema}.{table} USING hnsw ({job_name}_embeddings vector_cosine_ops);
110+
",
111+
)
112+
}
113+
100114
fn append_embedding_column(job_name: &str, schema: &str, table: &str, col_type: &str) -> String {
101115
// TODO: when adding support for other models, add the output dimension to the transformer attributes
102116
// so that they can be read here, not hard-coded here below

src/search.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ pub fn cosine_similarity_search(
1515
1 - ({project}_embeddings <=> '{emb}'::vector) AS cosine_similarity,
1616
*
1717
FROM {schema}.{table}
18+
WHERE {project}_updated_at is NOT NULL
1819
ORDER BY cosine_similarity DESC
1920
LIMIT {num_results};
2021
"

0 commit comments

Comments
 (0)