Skip to content

Commit

Permalink
Merge pull request #70 from dod-advana/feature/UOT-122261
Browse files Browse the repository at this point in the history
Feature/uot 122261
  • Loading branch information
rha930 authored Dec 3, 2021
2 parents 1794206 + eb9b586 commit ad8da24
Show file tree
Hide file tree
Showing 12 changed files with 386 additions and 80,672 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,14 @@
6. `docker-compose up`
7. visit `localhost:5000/docs`

## HELPFUL FLAGS FOR API
- export CONTAINER_RELOAD=True to reload the container on code changes for development
- export DOWNLOAD_DEP=True to get models and other deps from s3
- export MODEL_LOAD=False to not load models on API start (only for development needs)

## FAQ
- I get an error with redis on API start
- export ENV_TYPE=DEV
- Do I need to train models to use the API?
- No, you can use the pretrained models within the dependencies.
- The API is crashing when trying to load the models.
Expand All @@ -146,3 +153,4 @@
- No, but it will make training or inferring faster.
- What if I can't download the dependencies since I am external?
- We are working on making models publically available. However you can use download pretrained transformers from HuggingFace to include in the models/transformers directory, which will enable you to use some functionality of the API. Without any models, there is still functionality available like text extraction avaiable.

1 change: 1 addition & 0 deletions gamechangerml/api/.env
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,5 @@ DOWNLOAD_DEP=${DOWNLOAD_DEP}
DOCKER_BUILDKIT=1
DOCKER=true
LD_LIBRARY_PATH=/usr/local/cuda/lib64/
MODEL_LOAD=${MODEL_LOAD}

128 changes: 67 additions & 61 deletions gamechangerml/api/fastapi/model_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
SentenceEncoder,
)
from gamechangerml.src.search.embed_reader import sparse
from gamechangerml.src.search.ranking import ltr
from gamechangerml.api.fastapi.settings import *
from gamechangerml.src.featurization.word_sim import WordSim
from gamechangerml.src.featurization.topic_modeling import Topics
Expand All @@ -28,16 +27,14 @@
class ModelLoader:
# private model variables
def __init__(self):
self.ltr_model = ltr.LTR()

__qa_model = None
__sentence_searcher = None
__sentence_encoder = None
__query_expander = None
__query_expander_jbook = None
__word_sim = None
__sparse_reader = None
__topic_model = None
__qa_model = None
__sentence_searcher = None
__sentence_encoder = None
__query_expander = None
__query_expander_jbook = None
__word_sim = None
__sparse_reader = None
__topic_model = None

# Get methods for the models. If they don't exist try initializing them.
def getQA(self):
Expand Down Expand Up @@ -120,16 +117,17 @@ def initQA():
Returns:
"""
try:
logger.info("Starting QA pipeline")
ModelLoader.__qa_model = QAReader(
transformer_path=LOCAL_TRANSFORMERS_DIR.value,
use_gpu=True,
model_name=QAConfig.BASE_MODEL,
**QAConfig.MODEL_ARGS,
)
# set cache variable defined in settings.py
latest_qa_model.value = ModelLoader.__qa_model.READER_PATH
logger.info("Finished loading QA Reader")
if MODEL_LOAD_FLAG:
logger.info("Starting QA pipeline")
ModelLoader.__qa_model = QAReader(
transformer_path=LOCAL_TRANSFORMERS_DIR.value,
use_gpu=True,
model_name=QAConfig.BASE_MODEL,
**QAConfig.MODEL_ARGS,
)
# set cache variable defined in settings.py
latest_qa_model.value = ModelLoader.__qa_model.READER_PATH
logger.info("Finished loading QA Reader")
except OSError:
logger.error(f"Could not load Question Answer Model")

Expand All @@ -141,10 +139,11 @@ def initQE(qexp_model_path=QEXP_MODEL_NAME.value):
"""
logger.info(f"Loading Pretrained Vector from {qexp_model_path}")
try:
ModelLoader.__query_expander = qe.QE(
qexp_model_path, **QexpConfig.MODEL_ARGS["init"]
)
logger.info("** Loaded Query Expansion Model")
if MODEL_LOAD_FLAG:
ModelLoader.__query_expander = qe.QE(
qexp_model_path, **QexpConfig.MODEL_ARGS["init"]
)
logger.info("** Loaded Query Expansion Model")
except Exception as e:
logger.warning("** Could not load QE model")
logger.warning(e)
Expand All @@ -157,10 +156,11 @@ def initQEJBook(qexp_jbook_model_path=QEXP_JBOOK_MODEL_NAME.value):
"""
logger.info(f"Loading Pretrained Vector from {qexp_jbook_model_path}")
try:
ModelLoader.__query_expander_jbook = qe.QE(
qexp_jbook_model_path, **QexpConfig.MODEL_ARGS["init"]
)
logger.info("** Loaded JBOOK Query Expansion Model")
if MODEL_LOAD_FLAG:
ModelLoader.__query_expander_jbook = qe.QE(
qexp_jbook_model_path, **QexpConfig.MODEL_ARGS["init"]
)
logger.info("** Loaded JBOOK Query Expansion Model")
except Exception as e:
logger.warning("** Could not load JBOOK QE model")
logger.warning(e)
Expand All @@ -173,8 +173,9 @@ def initWordSim(model_path=WORD_SIM_MODEL.value):
"""
logger.info(f"Loading Query Expansion Model from {model_path}")
try:
ModelLoader.__word_sim = WordSim(model_path)
logger.info("** Loaded Word Sim Model")
if MODEL_LOAD_FLAG:
ModelLoader.__word_sim = WordSim(model_path)
logger.info("** Loaded Word Sim Model")
except Exception as e:
logger.warning("** Could not load Word Sim model")
logger.warning(e)
Expand All @@ -188,20 +189,22 @@ def initSentenceSearcher(
Args:
Returns:
"""
logger.info(f"Loading Sentence Searcher with sent index path: {index_path}")
logger.info(
f"Loading Sentence Searcher with sent index path: {index_path}")
try:
ModelLoader.__sentence_searcher = SentenceSearcher(
sim_model_name=SimilarityConfig.BASE_MODEL,
index_path=index_path,
transformer_path=transformer_path,
)
if MODEL_LOAD_FLAG:
ModelLoader.__sentence_searcher = SentenceSearcher(
sim_model_name=SimilarityConfig.BASE_MODEL,
index_path=index_path,
transformer_path=transformer_path,
)

sim_model = ModelLoader.__sentence_searcher.similarity
# set cache variable defined in settings.py
latest_intel_model_sim.value = sim_model.sim_model
logger.info(
f"** Loaded Similarity Model from {sim_model.sim_model} and sent index from {index_path}"
)
sim_model = ModelLoader.__sentence_searcher.similarity
# set cache variable defined in settings.py
latest_intel_model_sim.value = sim_model.sim_model
logger.info(
f"** Loaded Similarity Model from {sim_model.sim_model} and sent index from {index_path}"
)

except Exception as e:
logger.warning("** Could not load Similarity model")
Expand All @@ -216,15 +219,16 @@ def initSentenceEncoder(transformer_path=LOCAL_TRANSFORMERS_DIR.value):
"""
logger.info(f"Loading encoder model")
try:
ModelLoader.__sentence_encoder = SentenceEncoder(
encoder_model_name=EmbedderConfig.BASE_MODEL,
transformer_path=transformer_path,
**EmbedderConfig.MODEL_ARGS,
)
encoder_model = ModelLoader.__sentence_encoder.encoder_model
# set cache variable defined in settings.py
latest_intel_model_encoder.value = encoder_model
logger.info(f"** Loaded Encoder Model from {encoder_model}")
if MODEL_LOAD_FLAG:
ModelLoader.__sentence_encoder = SentenceEncoder(
encoder_model_name=EmbedderConfig.BASE_MODEL,
transformer_path=transformer_path,
**EmbedderConfig.MODEL_ARGS,
)
encoder_model = ModelLoader.__sentence_encoder.encoder_model
# set cache variable defined in settings.py
latest_intel_model_encoder.value = encoder_model
logger.info(f"** Loaded Encoder Model from {encoder_model}")

except Exception as e:
logger.warning("** Could not load Encoder model")
Expand All @@ -233,8 +237,10 @@ def initSentenceEncoder(transformer_path=LOCAL_TRANSFORMERS_DIR.value):
@staticmethod
def initSparse(model_name=latest_intel_model_trans.value):
try:
ModelLoader.__sparse_reader = sparse.SparseReader(model_name=model_name)
logger.info(f"Sparse Reader: {model_name} loaded")
if MODEL_LOAD_FLAG:
ModelLoader.__sparse_reader = sparse.SparseReader(
model_name=model_name)
logger.info(f"Sparse Reader: {model_name} loaded")
except Exception as e:
logger.warning("** Could not load Sparse Reader")
logger.warning(e)
Expand All @@ -246,13 +252,13 @@ def initTopics() -> None:
Returns:
"""
try:
logger.info("Starting Topic pipeline")
logger.info(TopicsConfig.DATA_ARGS)
ModelLoader.__topic_model = Topics(
TopicsConfig.DATA_ARGS["LOCAL_MODEL_DIR"]
)
logger.info("Finished loading Topic Model")
if MODEL_LOAD_FLAG:
logger.info("Starting Topic pipeline")
logger.info(TopicsConfig.DATA_ARGS)
ModelLoader.__topic_model = Topics(
TopicsConfig.DATA_ARGS["LOCAL_MODEL_DIR"]
)
logger.info("Finished loading Topic Model")
except Exception as e:
logger.warning("** Could not load Topic model")
logger.warning(e)

35 changes: 5 additions & 30 deletions gamechangerml/api/fastapi/routers/controls.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from gamechangerml.api.fastapi.routers.startup import *
from gamechangerml.api.utils.threaddriver import MlThread
from gamechangerml.train.pipeline import Pipeline
from gamechangerml.src.search.ranking.ltr import LTR
from gamechangerml.api.utils import processmanager
from gamechangerml.api.fastapi.model_loader import ModelLoader
from gamechangerml.src.utilities.test_utils import (
Expand Down Expand Up @@ -39,9 +38,10 @@

router = APIRouter()
MODELS = ModelLoader()
ltr = MODELS.ltr_model
## Get Methods ##

pipeline = Pipeline()


@router.get("/")
async def api_information():
Expand Down Expand Up @@ -142,15 +142,10 @@ async def initLTR(response: Response):
number_files = 0
resp = None
try:

logger.info("Attempting to initialize LTR")
resp = ltr.post_init_ltr()
logger.info(resp)
logger.info("Attempting to post features to LTR")
resp = ltr.post_features()
logger.info(resp)
pipeline.init_ltr()
except Exception as e:
logger.warning("Could not init LTR")
return resp


@router.get("/LTR/createModel", status_code=200)
Expand All @@ -165,23 +160,7 @@ async def create_LTR_model(response: Response):
model = []

def ltr_process():
logger.info("Attempting to create judgement list")
judgements = ltr.generate_judgement(ltr.mappings)
logger.info("Attempting to get features")

fts = ltr.generate_ft_txt_file(judgements)
logger.info("Attempting to read in data")
ltr.data = ltr.read_xg_data()
logger.info("Attempting to train LTR model")
bst, model = ltr.train()
logger.info("Created LTR model")
with open("gamechangerml/models/ltr/xgb-model.json") as f:
model = json.load(f)
logger.info("removing old LTR")
resp = ltr.delete_ltr("ltr_model")
logger.info(resp)
resp = ltr.post_model(model, model_name="ltr_model")
logger.info("Posted LTR model")
pipeline.create_ltr()

ltr_thread = MlThread(ltr_process)

Expand Down Expand Up @@ -364,7 +343,6 @@ async def train_model(model_dict: dict, response: Response):
# Methods for all the different models we can train
def finetune_sentence(model_dict=model_dict):
logger.info("Attempting to finetune the sentence transformer")
pipeline = Pipeline()
try:
testing_only = model_dict["testing_only"]
except:
Expand All @@ -383,7 +361,6 @@ def finetune_sentence(model_dict=model_dict):

def train_sentence(model_dict=model_dict):
logger.info("Attempting to start sentence pipeline")
pipeline = Pipeline()
try:
corpus_dir = model_dict["corpus_dir"]
except:
Expand All @@ -407,7 +384,6 @@ def train_sentence(model_dict=model_dict):

def train_qexp(model_dict=model_dict):
logger.info("Attempting to start qexp pipeline")
pipeline = Pipeline()
args = {
"model_id": model_dict["model_id"],
"validate": bool(model_dict["validate"]),
Expand All @@ -422,7 +398,6 @@ def train_qexp(model_dict=model_dict):

def run_evals(model_dict=model_dict):
logger.info("Attempting to run evaluation")
pipeline = Pipeline()
args = {
"model_name": model_dict["model_name"],
"eval_type": model_dict["eval_type"],
Expand Down
13 changes: 10 additions & 3 deletions gamechangerml/api/fastapi/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@

# get environ vars
GC_ML_HOST = os.environ.get("GC_ML_HOST", default="localhost")

MODEL_LOAD_FLAG = os.environ.get("MODEL_LOAD", default=True)
if MODEL_LOAD_FLAG in ["False", "false", "0"]:
MODEL_LOAD_FLAG = False
else:
MODEL_LOAD_FLAG = True

if GC_ML_HOST == "":
GC_ML_HOST = "localhost"
ignore_files = ["._.DS_Store", ".DS_Store", "index"]
Expand All @@ -14,7 +21,9 @@

# Redis Cache Variables
latest_intel_model_sent = CacheVariable("latest_intel_model_sent", True)
latest_intel_model_sim = CacheVariable("latest sentence searcher (similarity model + sent index)", True)
latest_intel_model_sim = CacheVariable(
"latest sentence searcher (similarity model + sent index)", True
)
latest_intel_model_encoder = CacheVariable("latest encoder model", True)
latest_qa_model = CacheVariable("latest_qa_model")
latest_intel_model_trans = CacheVariable("latest_intel_model_trans")
Expand All @@ -24,15 +33,13 @@
QEXP_MODEL_NAME = CacheVariable("QEXP_MODEL_NAME")
QEXP_JBOOK_MODEL_NAME = CacheVariable("QEXP_JBOOK_MODEL_NAME")
WORD_SIM_MODEL = CacheVariable("WORD_SIM_MODEL")
# LTR_MODEL = CacheVariable("LTR_MODEL")

model_path_dict = get_model_paths()
LOCAL_TRANSFORMERS_DIR.value = model_path_dict["transformers"]
SENT_INDEX_PATH.value = model_path_dict["sentence"]
QEXP_MODEL_NAME.value = model_path_dict["qexp"]
QEXP_JBOOK_MODEL_NAME.value = model_path_dict["qexp_jbook"]
WORD_SIM_MODEL.value = model_path_dict["word_sim"]
# LTR_MODEL.value = model_path_dict["ltr_model"]

t_list = []
try:
Expand Down
9 changes: 8 additions & 1 deletion gamechangerml/api/utils/processmanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
loading_corpus = "training: load_corpus"
training = "training: train_model"
reloading = "models: reloading_models"
ltr_creation = "models: ltr_creation"

# the dictionary that holds all the progress values
try:
Expand All @@ -21,6 +22,7 @@
training: False,
loading_corpus: False,
reloading: False,
ltr_creation: False,
}

PROCESS_STATUS.value = {"flags": default_flags}
Expand All @@ -35,7 +37,12 @@ def update_status(key, progress=0, total=100, message="", failed=False):
if progress == total or failed:
date = datetime.now()
date_string = date.strftime("%Y-%m-%d %H:%M:%S")
completed = {"process": key, "total": total, "message":message, "date": date_string}
completed = {
"process": key,
"total": total,
"message": message,
"date": date_string,
}
with thread_lock:
if key in PROCESS_STATUS.value:
temp = PROCESS_STATUS.value
Expand Down
Loading

0 comments on commit ad8da24

Please sign in to comment.