Skip to content

Commit

Permalink
Merge pull request #101 from dod-advana/hotfix/updating_bert_tokens
Browse files Browse the repository at this point in the history
Changes from testing out BERT models
  • Loading branch information
rha930 committed Mar 7, 2022
2 parents 5a5b9ca + e2f4175 commit 458cf95
Show file tree
Hide file tree
Showing 10 changed files with 405 additions and 165 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ mlruns/
# Transformer models
models/transformers
# Corpus files
corpus
corpus/*

# Don't ignore .gitkeep files
!**/.gitkeep
Expand Down
1 change: 1 addition & 0 deletions gamechangerml/api/.dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
../models
../transformer_cache.zip
../src/DrQA/data/
../corpus
dataPipelines
out
common
18 changes: 12 additions & 6 deletions gamechangerml/api/fastapi/routers/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,11 @@ async def textExtract_infer(body: dict, extractType: str, response: Response) ->

@router.post("/transSentenceSearch", status_code=200)
async def trans_sentence_infer(
body: dict, response: Response, num_results: int = 10, externalSim: bool = False
body: dict,
response: Response,
num_results: int = 10,
process: bool = True,
externalSim: bool = False,
) -> dict:
"""trans_sentence_infer - endpoint for sentence transformer inference
Args:
Expand All @@ -99,7 +103,7 @@ async def trans_sentence_infer(
try:
query_text = body["text"]
results = MODELS.sentence_searcher.search(
query_text, num_results, externalSim=False
query_text, num_results, process=process, externalSim=False
)
logger.info(results)
except Exception:
Expand Down Expand Up @@ -160,7 +164,8 @@ async def post_expand_query_terms(body: dict, response: Response) -> dict:
logger.info(f"Expanding: {body}")
query_expander = (
MODELS.query_expander
if body.get("qe_model", "gc_core") != "jbook" or MODELS.query_expander_jbook==None
if body.get("qe_model", "gc_core") != "jbook"
or MODELS.query_expander_jbook == None
else MODELS.query_expander_jbook
)
try:
Expand All @@ -172,13 +177,14 @@ async def post_expand_query_terms(body: dict, response: Response) -> dict:
# Removes original word from the return terms unless it is combined with another word
logger.info(f"original expanded terms: {expansion_list}")
finalTerms = remove_original_kw(expansion_list, terms_string)
expansion_dict[terms_string] = ['"{}"'.format(exp) for exp in finalTerms]
expansion_dict[terms_string] = [
'"{}"'.format(exp) for exp in finalTerms]
logger.info(f"-- Expanded {terms_string} to \n {finalTerms}")
# Perform word similarity
logger.info(f"Finding similiar words for: {terms_string}")
sim_words_dict = MODELS.word_sim.most_similiar_tokens(terms_string)
logger.info(f"-- Expanded {terms_string} to \n {sim_words_dict}")
## Construct return payload
# Construct return payload
expanded_words = {}
expanded_words["qexp"] = expansion_dict
expanded_words["wordsim"] = sim_words_dict
Expand Down Expand Up @@ -222,7 +228,7 @@ async def post_recommender(body: dict, response: Response) -> dict:
logger.info(f"Recommending similar documents to {filenames}")
results = MODELS.recommender.get_recs(
filenames=filenames, sample=sample)
if results['results'] != []:
if results["results"] != []:
logger.info(f"Found similar docs: \n {str(results)}")
else:
logger.info("Did not find any similar docs")
Expand Down
4 changes: 2 additions & 2 deletions gamechangerml/configs/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ class QAConfig:
class EmbedderConfig:
BASE_MODEL = "msmarco-distilbert-base-v2"
MODEL_ARGS = {
"min_token_len": 10,
"min_token_len": 25,
"verbose": True, # for creating LocalCorpus
"return_id": True, # for creating LocalCorpus
}
Expand Down Expand Up @@ -104,7 +104,7 @@ class QexpConfig:
"num_keywords": 2,
"ngram": (1, 3),
"abbrv_file": None,
"merge_word_sim": True
"merge_word_sim": True,
},
}

Expand Down
Loading

0 comments on commit 458cf95

Please sign in to comment.