diff --git a/community_tasks/french_evals.py b/community_tasks/french_evals.py index 4f58ea7ef..607d28eb8 100644 --- a/community_tasks/french_evals.py +++ b/community_tasks/french_evals.py @@ -46,6 +46,7 @@ from lighteval.tasks.extended.ifeval.main import ifeval_metrics from lighteval.tasks.lighteval_task import LightevalTaskConfig from lighteval.tasks.requests import Doc +from lighteval.utils.utils import as_list # Ifeval-fr prompt function @@ -80,6 +81,22 @@ def prompt_gpqa_fr(line, task_name: str = None): ) +# BAC-fr prompt function +def prompt_bac_fr(line, task_name: str = None): + prompt = f"Enoncé: {line['enonce']}\n{line['instruction']}\n" + if line["choix"] is not None: # Multichoice evaluation + # prompt += "\n".join([f"{LETTER_INDICES[ix]}.{choix}" for ix, choix in enumerate(line["choix"])]) + return Doc( + task_name=task_name, + query=prompt, + choices=as_list(line["choix"]), + gold_index=line["choix"].index(line["choix correct"]), + instruction="", + ) + else: + return Doc(task_name=task_name, query=prompt, choices=[line["reponse"]], gold_index=0, instruction="") + + # IFEVal-fr task @@ -117,5 +134,23 @@ def prompt_gpqa_fr(line, task_name: str = None): version=0, ) +# BAC-fr task +bac_fr_task = LightevalTaskConfig( + name="bac-fr", + suite=["community"], + prompt_function=prompt_bac_fr, + hf_repo="fr-gouv-coordination-ia/bac-fr", + hf_subset="default", + hf_avail_splits=["train"], + evaluation_splits=["train"], + few_shots_split=None, + few_shots_select="random_sampling", + generation_size=1, + metric=[Metrics.quasi_exact_match_math, Metrics.exact_match], + stop_sequence=["\n"], + trust_dataset=True, + version=0, +) + # STORE YOUR EVALS -TASKS_TABLE = [ifeval_fr_task, gpqa_fr_task] +TASKS_TABLE = [ifeval_fr_task, gpqa_fr_task, bac_fr_task]