Skip to content

Commit

Permalink
Merge pull request #103 from dod-advana/task/UOT-132217
Browse files Browse the repository at this point in the history
Task/UOT-132217 - more changes to finetuning data
  • Loading branch information
rha930 committed Mar 17, 2022
2 parents c694c29 + 539528d commit 0dbf690
Show file tree
Hide file tree
Showing 26 changed files with 1,345 additions and 767 deletions.
29 changes: 26 additions & 3 deletions gamechangerml/api/fastapi/routers/controls.py
Original file line number Diff line number Diff line change
Expand Up @@ -664,18 +664,32 @@ def update_metadata(model_dict):
try:
index_path = model_dict["index_path"]
except:
index_path = os.path.join(MODEL_PATH, "sent_index_20210715")
index_path = os.path.join(
Config.LOCAL_PACKAGED_MODELS_DIR, model_dict["sentence"]
)
try:
update_eval_data = model_dict['update_eval_data']
except:
update_eval_data = False
try:
testing_only = model_dict["testing_only"]
except:
testing_only = False
try:
upload = model_dict["upload"]
except:
upload = True

logger.info(f"Testing only is set to: {testing_only}")

args = {
"meta_steps": meta_steps,
"corpus_dir": corpus_dir,
"retriever": retriever,
"index_path": index_path,
"update_eval_data": update_eval_data
"update_eval_data": update_eval_data,
"testing_only": testing_only,
"upload": upload
}

pipeline.run(
Expand All @@ -695,13 +709,18 @@ def finetune_sentence(model_dict):
remake_train_data = model_dict["remake_train_data"]
except:
remake_train_data = False
try:
model = model_dict["model"]
except:
model = None
args = {
"batch_size": 8,
"epochs": int(model_dict["epochs"]),
"warmup_steps": int(model_dict["warmup_steps"]),
"testing_only": bool(testing_only),
"remake_train_data": bool(remake_train_data),
"retriever": MODELS.sentence_searcher,
"model": model
}
pipeline.run(
build_type="sent_finetune",
Expand Down Expand Up @@ -749,10 +768,14 @@ def train_qexp(model_dict):

def run_evals(model_dict):
logger.info("Attempting to run evaluation")
try:
sample_limit = int(model_dict["sample_limit"])
except:
sample_limit = 15000
args = {
"model_name": model_dict["model_name"],
"eval_type": model_dict["eval_type"],
"sample_limit": int(model_dict["sample_limit"]),
"sample_limit": sample_limit,
"validation_data": model_dict["validation_data"],
}
pipeline.run(
Expand Down
13 changes: 1 addition & 12 deletions gamechangerml/api/tests/api_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from http.client import HTTPConnection # py3

from gamechangerml.src.search.query_expansion.utils import remove_original_kw
# from gamechangerml import DATA_PATH
#from gamechangerml import DATA_PATH

from .test_examples import TestSet

Expand Down Expand Up @@ -301,17 +301,6 @@ def test_qa_outside_scope():
# resp = http.post(API_URL + "/trainModel", json=model_dict)
# assert resp.ok == True

# def test_trainModel_sent_finetune():
# model_dict = {
# "build_type": "sent_finetune",
# "batch_size": 32,
# "epochs": 1,
# "warmup_steps": 100,
# "testing_only": True
# }
# resp = http.post(API_URL + "/trainModel", json=model_dict)
# assert resp.ok == True

# def test_trainModel_eval_squad():
# model_dict = {
# "build_type": "eval",
Expand Down
1 change: 0 additions & 1 deletion gamechangerml/configs/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@ class ValidationConfig:
"validation_dir": os.path.join(DATA_PATH, "validation"),
"evaluation_dir": os.path.join(DATA_PATH, "evaluation"),
"user_dir": os.path.join(DATA_PATH, "user_data"),
# location with smaller set of corpus JSONs
"test_corpus_dir": "gamechangerml/test_corpus",
"squad": {
"dev": "original/squad2.0/dev-v2.0.json",
Expand Down
31 changes: 31 additions & 0 deletions gamechangerml/data/test_data/MatamoFeedback_TEST.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
,event_name,createdAt,user_id,value_1,value_2,value_3,value_4,value_7,value_5
39,intelligent_search_thumbs_up,2021-08-31T17:14:00.347Z,12345,search_text: dcma,title_returned: Memo Joint Officer Handbook - Staffing and Action Guide (2011),,,,
2,intelligent_search_thumbs_up,2021-04-05T18:19:15.673Z,12345,title_returned: DoDD 5110.04 Washington Headquarters Services (WHS),search_text: plain language policy,,,,
16,qa_thumbs_down,2021-05-25T15:30:03.401Z,12345,question: who is the president?,QA answer: Russell T. Voughr,,,,
38,intelligent_search_thumbs_up,2021-08-27T16:39:26.347Z,12345,search_text: senior accountable official,title_returned: DoDI 5010.40 Managers' Internal Control Program Procedures,,,,
8,intelligent_search_thumbs_up,2021-04-22T12:30:23.340Z,12345,title_returned: AFI 11-235 SPECIALIZED REFUELING OPERATIONS,search_text: wet wing,,,,
34,intelligent_search_thumbs_up,2021-08-13T00:56:55.410Z,12345,search_text: security cooperation,title_returned: Memo 2015 - CNAS - Security Cooperation and Assistance,,,,
41,intelligent_search_thumbs_up,2021-09-23T14:41:50.557Z,12345,search_text: telework,title_returned: AFI 36-816 Civilian Telework Program,,,,
40,intelligent_search_thumbs_up,2021-09-08T23:54:40.935Z,12345,search_text: military,title_returned: DoDI 1332.45 Retention Determinations for Non-Deployable Service Members,,,,
20,qa_thumbs_down,2021-05-27T16:23:01.079Z,12345,question: who is the secretary of defense?,QA answer: David L. Norquist,,,,
1,intelligent_search_thumbs_up,2021-04-05T18:16:55.146Z,12345,title_returned: CJCSM 3500.03E Joint Training Manual for the Armed Forces of the United States,search_text: cyber range,,,,
23,intelligent_search_thumbs_up,2021-06-24T19:45:57.054Z,12345,title_returned: DoDI 1000.01 Identification (ID) Cards Required by the Geneva Conventions,search_text: who is sergeant major of the army,,,,
15,intelligent_search_thumbs_up,2021-05-21T16:25:13.292Z,12345,title_returned: DoDD 5105.60 National Geospatial-Intelligence Agency (NGA),search_text: geospatial,,,,
27,intelligent_search_thumbs_up,2021-07-21T13:48:49.176Z,12345,"search_text: ""use of alcohol"" and events",title_returned: AFI 34-219 ALCOHOLIC BEVERAGE PROGRAM,,,,
42,intelligent_search_thumbs_up,2021-09-23T16:11:31.850Z,12345,search_text: physical fitness,title_returned: MCO 1700.39 MARINE CORPS RECREATION PROGRAMS,,,,
35,intelligent_search_thumbs_up,2021-08-13T03:16:11.860Z,12345,search_text: pizza,title_returned: MISC PUBS GREECE,,,,
31,intelligent_search_thumbs_up,2021-08-03T19:02:06.140Z,12345,search_text: navy,title_returned: OPNAVINST 4650.17 UNUSUALLY ARDUOUS SEA DUTY FOR TRAVEL AND TRANSPORTATION ENTITLEMENTS,,,,
5,intelligent_search_thumbs_up,2021-04-15T11:42:48.684Z,12345,title_returned: DoDD 3000.06 Combat Support Agencies (CSAs),search_text: CSA,,,,
10,intelligent_search_thumbs_up,2021-05-11T19:38:12.825Z,12345,title_returned: SECNAVINST 5100.10K DEPARTMENT OF THE NAVY SAFETY PROGRAM,search_text: safety and occupational health,,,,
37,intelligent_search_thumbs_up,2021-08-25T18:08:27.541Z,12345,search_text: control system automation,title_returned: CIM 11000.7 FACILITIES ENERGY MANUAL,,,,
25,qa_thumbs_up,2021-07-15T14:36:45.517Z,12345,question: what is the mission of the national institute of health?,"QA answer: increase research in the field of viral disease causes, prevention, and treatment",,,,
30,intelligent_search_thumbs_up,2021-08-03T19:00:42.759Z,12345,search_text: telework,title_returned: AFI 36-816 Civilian Telework Program,,,,
4,intelligent_search_thumbs_up,2021-04-14T18:18:26.068Z,12345,title_returned: DoDD 5105.77 National Guard Bureau (NGB),"search_text: ""National Guard"" and NGB",,,,
29,intelligent_search_thumbs_up,2021-08-03T12:50:35.464Z,12345,search_text: International Cooperative Administrative Support Services (ICASS),title_returned: DoDI 7060.06 International Cooperative Administrative Support Services (ICASS),,,,
0,intelligent_search_thumbs_up,2021-03-30T13:13:04.319Z,12345,title_returned: CJCSI 3207.01C Department of Defense Support to Humanitarian Mine Action,search_text: USAR,,,,
13,intelligent_search_thumbs_up,2021-05-18T15:35:06.388Z,12345,title_returned: CJCSI 5123.01H Charter of the Joint Requirements Oversight Council (JROC) and the Implementation of the Joint Capabilities Integration and Development System,search_text: interoperability,,,,
14,qa_thumbs_up,2021-05-21T00:09:46.558Z,12345,question: who is the sergeant major of the army?,QA answer: Sergeant Major of the Army will serve as the senior enlisted assistant and advisor to the Chief of Staff,,,,
19,qa_thumbs_up,2021-05-26T15:15:29.828Z,12345,question: what is jadc2?,QA answer: MDC2 is renamed Joint All Domain Command and Control ( JADC2 ). 2. ( U ) The JROC acknowledges that the campaign plan is a living document that will evolve as experiments and exercises shape common understanding of JADC2,,,,
17,qa_thumbs_down,2021-05-25T16:53:38.305Z,12345,question: what is the mission of dcma?,"QA answer: The mission of Headquarters, US Army Western Command is to serve as the Army component to CINCPAC for the Pacific Command",,,,
32,intelligent_search_thumbs_up,2021-08-09T14:02:18.534Z,12345,"search_text: ""synchronizer"" and ""intelligence""",title_returned: DoDD 5143.01 Under Secretary of Defense for Intelligence and Security (USD(I&S)),,,,
7,intelligent_search_thumbs_up,2021-04-21T13:13:18.303Z,12345,title_returned: SECNAVINST 1752.4C SEXUAL ASSAULT PREVENTION AND RESPONSE PROGRAM PROCEDURES,search_text: sexual assault prevention,,,,
Loading

0 comments on commit 0dbf690

Please sign in to comment.