Skip to content

Commit 499b013

Browse files
committed
Merge branch 'add_swiss_legal_evals' into dev
2 parents b7259d8 + f6b50b4 commit 499b013

File tree

1 file changed

+27
-9
lines changed

1 file changed

+27
-9
lines changed

community_tasks/swiss_legal_evals.py

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -914,27 +914,45 @@ def prompt_fn(line: dict, task_name: str = None):
914914
# "claude-3-5-sonnet": "anthropic/claude-3-5-sonnet-20241022",
915915
}
916916

917-
METRICS_TO_USE = [
917+
LEXICAL_METRICS = [
918918
"bleu",
919919
"chrf",
920920
"bleu_sentence",
921921
"chrf_sentence",
922922
"ter_sentence",
923923
"meteor",
924+
]
925+
GPU_METRICS = [
924926
"bert_score",
925927
"bleurt_large",
926928
"xcomet_xxl",
929+
]
930+
API_METRICS = [
927931
"gemba_mqm_gpt_4o",
928932
"slt_judge_gpt_4o",
929933
]
930-
METRICS_TO_USE.extend(
931-
[
932-
f"slt_judge_{judge_model}-{system_style}-{few_shot_style}".replace("-", "_")
933-
for judge_model in JUDGE_MODELS
934-
for system_style in ["basic", "detailed"]
935-
for few_shot_style in ["diverse", "single"]
936-
]
937-
)
934+
JUDGE_METRICS = [
935+
f"slt_judge_{judge_model}-{system_style}-{few_shot_style}".replace("-", "_")
936+
for judge_model in JUDGE_MODELS
937+
for system_style in ["basic", "detailed"]
938+
for few_shot_style in ["diverse", "single"]
939+
]
940+
941+
metrics_to_evaluate = ["judge"]
942+
943+
METRICS_TO_USE = []
944+
if metrics_to_evaluate == ["debug"]:
945+
METRICS_TO_USE = ["bleu"]
946+
elif "lexical" in metrics_to_evaluate:
947+
METRICS_TO_USE += LEXICAL_METRICS
948+
elif "gpu" in metrics_to_evaluate:
949+
METRICS_TO_USE += GPU_METRICS
950+
elif "api" in metrics_to_evaluate:
951+
METRICS_TO_USE += API_METRICS
952+
elif "judge" in metrics_to_evaluate:
953+
METRICS_TO_USE += JUDGE_METRICS
954+
else:
955+
METRICS_TO_USE = LEXICAL_METRICS + GPU_METRICS + API_METRICS
938956
logger.info(f"Available metrics: {METRICS_TO_USE}")
939957

940958
METRICS = {}

0 commit comments

Comments
 (0)