Skip to content

Commit 9dcc44c

Browse files
main: Exposed API for similarity searches with fuzzy + nmslib.
1 parent 3f67c29 commit 9dcc44c

10 files changed

+120
-0
lines changed

main.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@
33

44
from ingest import ingest_data_router
55
from utils import utils_router
6+
from search import search_router
67

78

89
app = FastAPI()
910
app.include_router(ingest_data_router)
1011
app.include_router(utils_router)
12+
app.include_router(search_router)
1113

1214
if __name__ == '__main__':
1315
uvicorn.run("main:app", host='0.0.0.0', port=8080, workers=2)

req/KnnSearchIndex.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from pydantic import BaseModel
2+
from req import KnnSearchVectorFields
3+
4+
5+
class KnnSearchIndex(BaseModel):
6+
dedupe_vector_nmslib: KnnSearchVectorFields
7+

req/KnnSearchQuery.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
from pydantic import BaseModel
2+
from req import KnnSearchIndex
3+
4+
5+
class KnnSearchQuery(BaseModel):
6+
knn: KnnSearchIndex | None

req/KnnSearchV1.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
from pydantic import BaseModel
2+
from req import KnnSearchQuery
3+
4+
5+
class KnnSearchV1(BaseModel):
6+
size: int
7+
query: KnnSearchQuery
8+

req/KnnSearchVectorFields.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
from pydantic import BaseModel
2+
3+
4+
class KnnSearchVectorFields(BaseModel):
5+
vector: list
6+
k: int

req/KnnSimilaritySearch.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from pydantic import BaseModel
2+
3+
4+
class KnnSimilaritySearch(BaseModel):
5+
text: str | None
6+
size: int
7+
k: int

req/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,6 @@
11
from .OpensearchVectorDocumentV1 import OpensearchVectorDocumentV1 as OpensearchVectorDocumentV1
2+
from .KnnSearchVectorFields import KnnSearchVectorFields
3+
from .KnnSearchIndex import KnnSearchIndex
4+
from .KnnSearchQuery import KnnSearchQuery
5+
from .KnnSearchV1 import KnnSearchV1
6+
from .KnnSimilaritySearch import KnnSimilaritySearch

search/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from .search_similarity import get_similar_knn
2+
from .search_router import router as search_router
3+

search/search_router.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
import io
2+
import logging
3+
from logging.config import dictConfig
4+
from config import opensearch_config
5+
from starlette import status
6+
7+
from fastapi.encoders import jsonable_encoder
8+
from starlette.responses import JSONResponse
9+
from resp import DocumentInsertResponse
10+
11+
from logger import DedupeKnnLogger
12+
from req import OpensearchVectorDocumentV1
13+
from clients import LoadOpenSearchClient
14+
from fastapi import APIRouter
15+
import json
16+
from req import KnnSimilaritySearch, KnnSearchIndex, KnnSearchVectorFields, \
17+
KnnSearchV1, KnnSearchQuery
18+
19+
from utils.fast_text_utils import generate_sentence_vector
20+
from search import get_similar_knn
21+
22+
router = APIRouter()
23+
opensearch_client = LoadOpenSearchClient().get_opensearch_client()
24+
config = opensearch_config.OpensearchConfig().get_config()
25+
26+
logger = logging.getLogger("dedupeknn")
27+
dictConfig(DedupeKnnLogger().dict())
28+
29+
30+
@router.post("/api/v1/similarity/knn/search")
31+
async def get_similar_records_from_opensearch(req: KnnSimilaritySearch):
32+
logger.info("Got request for similarity match: {}".format(jsonable_encoder(req)))
33+
34+
if req.text is None or req.text == "":
35+
return JSONResponse(content="Please give valid text.", status_code=status.HTTP_400_BAD_REQUEST)
36+
sentence_vector = generate_sentence_vector(req.text)
37+
response = await get_similar_knn(vector=sentence_vector.tolist(), k=req.k, size=req.size)
38+
response = construct_similarity_response(opensearch_response=response)
39+
return JSONResponse(content=response, status_code=status.HTTP_200_OK)
40+
41+
42+
def construct_similarity_response(opensearch_response):
43+
successful_response = {}
44+
hits = []
45+
if 'timed_out' in opensearch_response and opensearch_response['timed_out'] == False:
46+
if opensearch_response['hits']['total']['value'] > 0:
47+
successful_response['total'] = opensearch_response['hits']['total']['value']
48+
49+
for hit in opensearch_response['hits']['hits']:
50+
h = {'id': hit['_id'], 'text': hit['_source']['input_string'], 'score': hit['_score']}
51+
hits.append(h)
52+
53+
successful_response['hits'] = hits
54+
return successful_response

search/search_similarity.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
from req import KnnSearchV1
2+
from req import KnnSearchVectorFields
3+
from req import KnnSearchIndex
4+
from req import KnnSearchQuery
5+
from req import KnnSearchV1
6+
from clients import LoadOpenSearchClient
7+
from config import OpensearchConfig
8+
import json
9+
10+
opensearch_client = LoadOpenSearchClient.get_opensearch_client()
11+
config = OpensearchConfig.get_config()
12+
13+
14+
async def get_similar_knn(vector: list, size: int, k: int):
15+
knn_fields = KnnSearchVectorFields(vector=vector, k=k)
16+
knn_search_index = KnnSearchIndex(dedupe_vector_nmslib=knn_fields)
17+
knn_search_query = KnnSearchQuery(knn=knn_search_index)
18+
opensearch_search_query = KnnSearchV1(query=knn_search_query, size=size)
19+
20+
response = await opensearch_client.search(body=json.loads(opensearch_search_query.json()),
21+
index=config['INDEX_NAME'][0])
22+
return response

0 commit comments

Comments
 (0)