Skip to content

Commit 543617f

Browse files
Bump version to v0.4.4 ; Fixes to TMMLUplus (#2280)
1 parent 7a1614e commit 543617f

File tree

77 files changed

+195
-220
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

77 files changed

+195
-220
lines changed

lm_eval/__main__.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -299,13 +299,6 @@ def cli_evaluate(args: Union[argparse.Namespace, None] = None) -> None:
299299
"When `fewshot_as_multiturn` is selected, `apply_chat_template` must be set (either to `True` or to the chosen template name)."
300300
)
301301

302-
if (
303-
args.num_fewshot is None or args.num_fewshot == 0
304-
) and args.fewshot_as_multiturn:
305-
raise ValueError(
306-
"If fewshot_as_multiturn is set, num_fewshot must be greater than 0."
307-
)
308-
309302
if args.include_path is not None:
310303
eval_logger.info(f"Including path: {args.include_path}")
311304
task_manager = TaskManager(args.verbosity, include_path=args.include_path)

lm_eval/tasks/__init__.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -489,10 +489,12 @@ def _get_task_and_group(self, task_dir: str):
489489
if attr in config:
490490
if attr == "group" and print_info:
491491
self.logger.info(
492-
"`group` and `group_alias` keys in tasks' configs will no longer be used in the next release of lm-eval. "
493-
"`tag` will be used to allow to call a collection of tasks just like `group`. "
494-
"`group` will be removed in order to not cause confusion with the new ConfigurableGroup "
495-
"which will be the official way to create groups with addition of group-wide configurations."
492+
"`group` and `group_alias` keys in TaskConfigs are deprecated and will be removed in v0.4.5 of lm_eval. "
493+
"The new `tag` field will be used to allow for a shortcut to a group of tasks one does not wish to aggregate metrics across. "
494+
"`group`s which aggregate across subtasks must be only defined in a separate group config file, "
495+
"which will be the official way to create groups that support cross-task aggregation as in `mmlu`. "
496+
"Please see the v0.4.4 patch notes and our documentation: https://github.com/EleutherAI/lm-evaluation-harness/blob/main/docs/new_task_guide.md#advanced-group-configs "
497+
"for more information."
496498
)
497499
print_info = False
498500
# attr = "tag"
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
group: tmmluplus
2+
task:
3+
- tmmluplus_other
4+
- tmmluplus_social_sciences
5+
- tmmluplus_humanities
6+
- tmmluplus_STEM
7+
aggregate_metric_list:
8+
- metric: acc
9+
weight_by_size: True
10+
- metric: acc_norm
11+
weight_by_size: True
12+
metadata:
13+
version: 2.0
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
group: tmmluplus_STEM
2+
task:
3+
- tmmluplus_STEM_tasks
4+
aggregate_metric_list:
5+
- metric: acc
6+
weight_by_size: True
7+
- metric: acc_norm
8+
weight_by_size: True
9+
metadata:
10+
version: 2.0
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
group: tmmluplus_humanities
2+
task:
3+
- tmmluplus_humanities_tasks
4+
aggregate_metric_list:
5+
- metric: acc
6+
weight_by_size: True
7+
- metric: acc_norm
8+
weight_by_size: True
9+
metadata:
10+
version: 2.0
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
group: tmmluplus_other
2+
task:
3+
- tmmluplus_other_tasks
4+
aggregate_metric_list:
5+
- metric: acc
6+
weight_by_size: True
7+
- metric: acc_norm
8+
weight_by_size: True
9+
metadata:
10+
version: 2.0
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
group: tmmluplus_social_sciences
2+
task:
3+
- tmmluplus_social_sciences_tasks
4+
aggregate_metric_list:
5+
- metric: acc
6+
weight_by_size: True
7+
- metric: acc_norm
8+
weight_by_size: True
9+
metadata:
10+
version: 2.0

lm_eval/tasks/tmmluplus/default/_default_template_yaml renamed to lm_eval/tasks/tmmluplus/default/_tmmluplus_template_yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,4 @@ metric_list:
1616
aggregation: mean
1717
higher_is_better: true
1818
metadata:
19-
version: 1.0
19+
version: 2.0

lm_eval/tasks/tmmluplus/default/tmmluplus.yaml

Lines changed: 0 additions & 6 deletions
This file was deleted.
Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
"dataset_name": "accounting"
22
"description": "以下為會計學的單選題,請提供正確答案的選項。\n\n"
3-
"group": "tmmluplus_other"
4-
"group_alias": "other"
5-
"include": "_default_template_yaml"
3+
"tag": "tmmluplus_other_tasks"
4+
"include": "_tmmluplus_template_yaml"
65
"task": "tmmluplus_accounting"
76
"task_alias": "accounting"

0 commit comments

Comments
 (0)