Skip to content

Commit 6b4956b

Browse files
committed
improve search query
1 parent f8a8bd8 commit 6b4956b

3 files changed

Lines changed: 21 additions & 28 deletions

File tree

core/src/query.rs

Lines changed: 14 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -731,8 +731,8 @@ pub fn hybrid_search_query(
731731
}
732732

733733
format!(
734-
"
735-
SELECT to_jsonb(t) as results
734+
"
735+
SELECT to_jsonb(t) as results
736736
FROM (
737737
SELECT {cols}, t.rrf_score, t.semantic_rank, t.fts_rank, t.similarity_score
738738
FROM (
@@ -742,45 +742,32 @@ pub fn hybrid_search_query(
742742
s.similarity_score,
743743
f.fts_rank,
744744
(
745-
CASE
746-
WHEN s.semantic_rank IS NOT NULL THEN {semantic_weight}::float/({rrf_k} + s.semantic_rank)
747-
ELSE 0
748-
END +
749-
CASE
750-
WHEN f.fts_rank IS NOT NULL THEN {fts_weight}::float/({rrf_k} + f.fts_rank)
751-
ELSE 0
752-
END
745+
COALESCE({semantic_weight}::float / ({rrf_k} + s.semantic_rank), 0) +
746+
COALESCE({fts_weight}::float / ({rrf_k} + f.fts_rank), 0)
753747
) as rrf_score
754748
FROM (
755749
SELECT
756750
{join_key},
757-
distance,
758-
ROW_NUMBER() OVER (ORDER BY distance) as semantic_rank,
759-
COUNT(*) OVER () as max_semantic_rank,
760-
1 - distance as similarity_score
761-
FROM (
762-
SELECT
763-
{join_key},
764-
embeddings <=> $1::vector as distance
765-
FROM vectorize._embeddings_{job_name}
766-
) sub
767-
ORDER BY distance
751+
embeddings <=> $1::vector as distance,
752+
ROW_NUMBER() OVER (ORDER BY embeddings <=> $1::vector) as semantic_rank,
753+
1 - (embeddings <=> $1::vector) as similarity_score
754+
FROM vectorize._embeddings_{job_name}
755+
ORDER BY embeddings <=> $1::vector
768756
LIMIT {window_size}
769757
) s
770758
FULL OUTER JOIN (
771759
SELECT
772760
{join_key},
773-
ROW_NUMBER() OVER (ORDER BY ts_rank_cd(search_tokens, query) DESC) as fts_rank,
774-
COUNT(*) OVER () as max_fts_rank
775-
FROM vectorize._search_tokens_{job_name},
776-
to_tsquery('english',
761+
ROW_NUMBER() OVER (ORDER BY ts_rank_cd(search_tokens, query) DESC) as fts_rank
762+
FROM vectorize._search_tokens_{job_name},
763+
to_tsquery('english',
777764
NULLIF(
778765
replace(plainto_tsquery('english', $2)::text, ' & ', ' | '),
779766
''
780767
)
781768
) as query
782769
WHERE search_tokens @@ query
783-
ORDER BY ts_rank_cd(search_tokens, query) DESC
770+
ORDER BY ts_rank_cd(search_tokens, query) DESC
784771
LIMIT {window_size}
785772
) f ON s.{join_key} = f.{join_key}
786773
) t
@@ -789,7 +776,7 @@ pub fn hybrid_search_query(
789776
ORDER BY t.rrf_score DESC
790777
LIMIT {limit}
791778
) t"
792-
)
779+
)
793780
}
794781
#[cfg(test)]
795782
mod tests {

server/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@ version = "0.1.0"
44
edition = "2024"
55
publish = false
66

7+
[[bin]]
8+
name = "vectorize-worker"
9+
path = "src/bin/worker.rs"
10+
711
[lib]
812
name = "vectorize_server"
913
path = "src/lib.rs"

server/Dockerfile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,15 @@ COPY Cargo.toml Cargo.lock ./
1515

1616
ENV SQLX_OFFLINE=1
1717
RUN cargo build --bin vectorize-server --release
18-
18+
RUN cargo build --bin vectorize-worker --release
19+
1920
FROM rust:1.90.0-slim-bookworm
2021

2122
RUN apt-get update && \
2223
apt-get install -y postgresql-client && apt-get clean && \
2324
rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
2425

2526
COPY --from=builder /build/target/release/vectorize-server /usr/local/bin/vectorize-server
27+
COPY --from=builder /build/target/release/vectorize-worker /usr/local/bin/vectorize-worker
2628

2729
CMD ["vectorize-server"]

0 commit comments

Comments
 (0)