Skip to content

Commit 20eab95

Browse files
hamishiviHamish Ivisonvwxyzjn
authored
Upload metadata along with model weights (#320)
* uploading metadata * lint and bugfix * Update eval/utils.py Co-authored-by: Costa Huang <[email protected]> --------- Co-authored-by: Hamish Ivison <[email protected]> Co-authored-by: Costa Huang <[email protected]>
1 parent 28f0f54 commit 20eab95

File tree

16 files changed

+139
-49
lines changed

16 files changed

+139
-49
lines changed

eval/MATH/run_eval.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@
1111
query_openai_chat_model,
1212
dynamic_import_function,
1313
load_hf_tokenizer,
14-
upload_results_to_hf
14+
upload_results_to_hf,
15+
check_and_upload_model_metadata
1516
)
1617
from eval.MATH.examplars import EXAMPLARS as MATH_EXAMPLARS
1718
from eval.MATH.utilities import last_boxed_only_string, remove_boxed
@@ -214,6 +215,9 @@ def apply_chat_format(example, demonstrations, tokenizer):
214215
primary_score=primary_score,
215216
prepend_timestamp=True,
216217
)
218+
check_and_upload_model_metadata(
219+
args.model_name_or_path, args.upload_to_hf, args.hf_upload_name, hf_revision=args.hf_revision
220+
)
217221

218222

219223
if __name__ == "__main__":

eval/alpaca_farm/run_eval.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import datasets
99
import vllm
1010
from alpaca_eval import evaluate as alpaca_farm_evaluate
11-
from eval.utils import query_openai_chat_model, query_openai_model, generate_completions, dynamic_import_function, load_hf_lm, load_hf_tokenizer, upload_results_to_hf
11+
from eval.utils import query_openai_chat_model, query_openai_model, generate_completions, dynamic_import_function, load_hf_lm, load_hf_tokenizer, upload_results_to_hf, check_and_upload_model_metadata
1212

1313
def main(args):
1414
random.seed(42)
@@ -159,6 +159,9 @@ def main(args):
159159
primary_score=primary_score,
160160
prepend_timestamp=True,
161161
)
162+
check_and_upload_model_metadata(
163+
args.model_name_or_path, args.upload_to_hf, args.hf_upload_name, hf_revision=args.hf_revision
164+
)
162165

163166

164167
if __name__ == "__main__":

eval/bbh/run_eval.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414
query_openai_chat_model,
1515
dynamic_import_function,
1616
load_hf_tokenizer,
17-
upload_results_to_hf
17+
upload_results_to_hf,
18+
check_and_upload_model_metadata
1819
)
1920

2021

@@ -196,6 +197,9 @@ def main(args):
196197
primary_score=primary_score,
197198
prepend_timestamp=True,
198199
)
200+
check_and_upload_model_metadata(
201+
args.model_name_or_path, args.upload_to_hf, args.hf_upload_name, hf_revision=args.hf_revision
202+
)
199203

200204

201205
if __name__ == "__main__":

eval/codex_humaneval/run_eval.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
dynamic_import_function,
1212
load_hf_tokenizer,
1313
upload_results_to_hf,
14+
check_and_upload_model_metadata,
1415
)
1516
from eval.codex_humaneval.data import write_jsonl, read_problems
1617
from eval.codex_humaneval.evaluation import evaluate_functional_correctness
@@ -195,6 +196,9 @@ def apply_chat_format(tokenizer, inst, suffix):
195196
primary_score=primary_score,
196197
prepend_timestamp=True,
197198
)
199+
check_and_upload_model_metadata(
200+
args.model_name_or_path, args.upload_to_hf, args.hf_upload_name, hf_revision=args.hf_revision
201+
)
198202

199203

200204
if __name__ == "__main__":

eval/gsm/run_eval.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@
1313
query_openai_chat_model,
1414
dynamic_import_function,
1515
load_hf_tokenizer,
16-
upload_results_to_hf
16+
upload_results_to_hf,
17+
check_and_upload_model_metadata
1718
)
1819
from eval.gsm.examplars import EXAMPLARS as GSM_EXAMPLARS
1920

@@ -199,6 +200,9 @@ def apply_chat_format(example, tokenizer):
199200
primary_score=primary_score,
200201
prepend_timestamp=True,
201202
)
203+
check_and_upload_model_metadata(
204+
args.model_name_or_path, args.upload_to_hf, args.hf_upload_name, hf_revision=args.hf_revision
205+
)
202206

203207

204208
if __name__ == "__main__":

eval/ifeval/run_eval.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@
2020
query_openai_chat_model,
2121
dynamic_import_function,
2222
load_hf_tokenizer,
23-
upload_results_to_hf
23+
upload_results_to_hf,
24+
check_and_upload_model_metadata
2425
)
2526
from eval.ifeval import instructions_registry
2627

@@ -351,6 +352,9 @@ def main(args):
351352
primary_score=primary_score,
352353
prepend_timestamp=True,
353354
)
355+
check_and_upload_model_metadata(
356+
args.model_name_or_path, args.upload_to_hf, args.hf_upload_name, hf_revision=args.hf_revision
357+
)
354358

355359

356360
if __name__ == "__main__":

eval/mbpp/run_eval.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
dynamic_import_function,
1313
load_hf_tokenizer,
1414
upload_results_to_hf,
15+
check_and_upload_model_metadata,
1516
)
1617
from eval.codex_humaneval.data import write_jsonl
1718
from eval.mbpp.evaluation import compute_code_eval
@@ -207,6 +208,9 @@ def apply_chat_format(tokenizer, inst, suffix):
207208
primary_score=primary_score,
208209
prepend_timestamp=True,
209210
)
211+
check_and_upload_model_metadata(
212+
args.model_name_or_path, args.upload_to_hf, args.hf_upload_name, hf_revision=args.hf_revision
213+
)
210214

211215

212216
if __name__ == "__main__":

eval/mmlu/run_eval.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import json
77
from tqdm import tqdm
88
from eval.mmlu.categories import subcategories, categories
9-
from eval.utils import get_next_word_predictions, load_hf_tokenizer, load_hf_lm, query_openai_chat_model, dynamic_import_function, upload_results_to_hf
9+
from eval.utils import get_next_word_predictions, load_hf_tokenizer, load_hf_lm, query_openai_chat_model, dynamic_import_function, upload_results_to_hf, check_and_upload_model_metadata
1010

1111

1212
choices = ["A", "B", "C", "D"]
@@ -270,6 +270,9 @@ def main(args):
270270
primary_score=primary_score,
271271
prepend_timestamp=True,
272272
)
273+
check_and_upload_model_metadata(
274+
args.model_name_or_path, args.upload_to_hf, args.hf_upload_name, hf_revision=args.hf_revision
275+
)
273276

274277

275278
if __name__ == "__main__":

eval/toxigen/run_eval.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
query_openai_chat_model,
1717
load_hf_tokenizer,
1818
upload_results_to_hf,
19+
check_and_upload_model_metadata,
1920
)
2021
from eval.utils import dynamic_import_function
2122

@@ -198,6 +199,9 @@ def main(args):
198199
primary_score=primary_score,
199200
prepend_timestamp=True,
200201
)
202+
check_and_upload_model_metadata(
203+
args.model_name_or_path, args.upload_to_hf, args.hf_upload_name, hf_revision=args.hf_revision
204+
)
201205

202206

203207
if __name__ == "__main__":

eval/truthfulqa/run_eval.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
score_completions,
1616
dynamic_import_function,
1717
upload_results_to_hf,
18+
check_and_upload_model_metadata,
1819
)
1920
from eval.truthfulqa.utilities import (
2021
format_prompt,
@@ -408,6 +409,9 @@ def main(args):
408409
primary_score=primary_score,
409410
prepend_timestamp=True,
410411
)
412+
check_and_upload_model_metadata(
413+
args.model_name_or_path, args.upload_to_hf, args.hf_upload_name, hf_revision=args.hf_revision
414+
)
411415

412416

413417
if __name__ == '__main__':

0 commit comments

Comments
 (0)