From ea27157cad485c0cba742d4b2dbf658bde6e176e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine?= Date: Mon, 26 Feb 2024 13:56:23 +0100 Subject: [PATCH] init --- src/lighteval/tasks/lighteval_task.py | 2 -- .../custom_tasks/custom_evaluation_tasks.py | 17 +++++++++++++---- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/lighteval/tasks/lighteval_task.py b/src/lighteval/tasks/lighteval_task.py index 1b0f153c6..1875ec126 100644 --- a/src/lighteval/tasks/lighteval_task.py +++ b/src/lighteval/tasks/lighteval_task.py @@ -83,8 +83,6 @@ def __post_init__(self): self.hf_avail_splits = ["train", "validation", "test"] if self.evaluation_splits is None: self.evaluation_splits = ["validation"] - if self.stop_sequence is None: - self.stop_sequence = ["\n"] # Convert list to tuple for hashing self.metric = tuple(self.metric) diff --git a/tasks_examples/custom_tasks/custom_evaluation_tasks.py b/tasks_examples/custom_tasks/custom_evaluation_tasks.py index 0ed928e59..a7a7a5af0 100644 --- a/tasks_examples/custom_tasks/custom_evaluation_tasks.py +++ b/tasks_examples/custom_tasks/custom_evaluation_tasks.py @@ -25,6 +25,7 @@ hf_repo="hellaswag", hf_subset="default", metric=["loglikelihood_acc", "loglikelihood_acc_norm_nospace"], + stop_sequence=["\n"], ), LightevalTaskConfig( name="winogrande", @@ -32,6 +33,7 @@ hf_repo="winogrande", hf_subset="winogrande_xl", metric=["loglikelihood_acc", "loglikelihood_acc_norm_nospace"], + stop_sequence=["\n"], ), LightevalTaskConfig( name="piqa", @@ -39,6 +41,7 @@ hf_repo="piqa", hf_subset="plain_text", metric=["loglikelihood_acc", "loglikelihood_acc_norm_nospace"], + stop_sequence=["\n"], ), LightevalTaskConfig( name="siqa", @@ -47,6 +50,7 @@ hf_subset="default", hf_avail_splits=["train", "validation"], metric=["loglikelihood_acc", "loglikelihood_acc_norm_nospace"], + stop_sequence=["\n"], ), LightevalTaskConfig( name="openbookqa", @@ -54,6 +58,7 @@ hf_repo="openbookqa", hf_subset="main", metric=["loglikelihood_acc", "loglikelihood_acc_norm_nospace"], + stop_sequence=["\n"], ), LightevalTaskConfig( name="arc:easy", @@ -63,6 +68,7 @@ evaluation_splits=["test"], generation_size=1, metric=["loglikelihood_acc", "loglikelihood_acc_norm_nospace"], + stop_sequence=["\n"], ), LightevalTaskConfig( name="arc:challenge", @@ -72,6 +78,7 @@ evaluation_splits=["test"], generation_size=1, metric=["loglikelihood_acc", "loglikelihood_acc_norm_nospace"], + stop_sequence=["\n"], ), LightevalTaskConfig( name="commonsense_qa", @@ -79,6 +86,7 @@ hf_repo="commonsense_qa", hf_subset="default", metric=["loglikelihood_acc", "loglikelihood_acc_norm_nospace"], + stop_sequence=["\n"], ), ] @@ -176,6 +184,7 @@ def natural_questions_prompt(line, task_name: str = None): hf_repo="super_glue", hf_subset="boolq", metric=["target_perplexity"], + stop_sequence=["\n"], ), LightevalTaskConfig( name="quac", @@ -236,9 +245,9 @@ def __init__( few_shots_select=few_shots_select, suite=suite, generation_size=generation_size, - stop_sequence=stop_sequence, output_regex=output_regex, frozen=frozen, + stop_sequence=(stop_sequence if stop_sequence is not None else ["\n"]), ) @@ -303,7 +312,7 @@ def __init__( few_shots_select=few_shots_select, suite=suite, generation_size=generation_size, - stop_sequence=stop_sequence, + stop_sequence=(stop_sequence if stop_sequence is not None else ["\n"]), output_regex=output_regex, frozen=frozen, ) @@ -445,7 +454,7 @@ def __init__( few_shots_select=few_shots_select, suite=suite, generation_size=generation_size, - stop_sequence=stop_sequence, + stop_sequence=(stop_sequence if stop_sequence is not None else ["\n"]), output_regex=output_regex, frozen=frozen, ) @@ -537,7 +546,7 @@ def __init__( few_shots_select=few_shots_select, suite=suite, generation_size=generation_size, - stop_sequence=stop_sequence, + stop_sequence=(stop_sequence if stop_sequence is not None else ["\n"]), output_regex=output_regex, frozen=frozen, )