From 5bea2f6d5954586fe394cae11c155b9a4e318e4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine?= Date: Mon, 27 Jan 2025 09:54:50 +0100 Subject: [PATCH] added defaults --- src/lighteval/metrics/metrics.py | 22 +++++++++++++++++++--- src/lighteval/metrics/metrics_sample.py | 8 ++++---- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/src/lighteval/metrics/metrics.py b/src/lighteval/metrics/metrics.py index 306ae603..8fa5496e 100644 --- a/src/lighteval/metrics/metrics.py +++ b/src/lighteval/metrics/metrics.py @@ -365,9 +365,25 @@ class Metrics(Enum): corpus_level_fn=CorpusLevelF1Score(average=None, num_classes=3).compute, higher_is_better=True, ) - pass_at_k_32 = SampleLevelMetric( - metric_name="pass@k:32", - sample_level_fn=PassAtK(k=32, strip_strings=True).compute, + pass_at_1 = SampleLevelMetric( + metric_name="pass@1:32_samples", + sample_level_fn=PassAtK(k=1, n=32, strip_strings=True).compute, + category=MetricCategory.GENERATIVE_SAMPLING, + use_case=MetricUseCase.REASONING, + corpus_level_fn=np.mean, + higher_is_better=True, + ) + pass_at_10 = SampleLevelMetric( + metric_name="pass@10:32_samples", + sample_level_fn=PassAtK(k=10, n=32, strip_strings=True).compute, + category=MetricCategory.GENERATIVE_SAMPLING, + use_case=MetricUseCase.REASONING, + corpus_level_fn=np.mean, + higher_is_better=True, + ) + pass_at_100 = SampleLevelMetric( + metric_name="pass@100:32_samples", + sample_level_fn=PassAtK(k=100, n=32, strip_strings=True).compute, category=MetricCategory.GENERATIVE_SAMPLING, use_case=MetricUseCase.REASONING, corpus_level_fn=np.mean, diff --git a/src/lighteval/metrics/metrics_sample.py b/src/lighteval/metrics/metrics_sample.py index 760d2bef..0720d089 100644 --- a/src/lighteval/metrics/metrics_sample.py +++ b/src/lighteval/metrics/metrics_sample.py @@ -1067,10 +1067,10 @@ def __init__( strip_strings (bool, optional): Whether to strip both reference and predictions. Defaults to False. sample_scoring_function (callable or str, optional): Function to use to score each sample. Either pass the full function (should take a string prediction and a string gold, and return a score between 0 and 1) - or a string (any of `prefix`, `suffix` or `full`) to define the type of exact match that you want. Defaults to "full". - `prefix` checks if the prediction starts with the gold, - `suffix` if the prediction ends with the gold, - `full` if the prediction and gold are equal + a string (any of `prefix`, `suffix` or `full`) to define the type of exact match that you want, or nothing to defaults to "full". + `prefix` checks if the prediction starts with the gold, + `suffix` if the prediction ends with the gold, + `full` if the prediction and gold are equal """ self.k = k self.n = n