added defaults

huggingface · Jan 27, 2025 · 5bea2f6 · 5bea2f6
1 parent 93d108e
commit 5bea2f6
Show file tree

Hide file tree

Showing 2 changed files with 23 additions and 7 deletions.
diff --git a/src/lighteval/metrics/metrics.py b/src/lighteval/metrics/metrics.py
@@ -365,9 +365,25 @@ class Metrics(Enum):
         corpus_level_fn=CorpusLevelF1Score(average=None, num_classes=3).compute,
         higher_is_better=True,
     )
-    pass_at_k_32 = SampleLevelMetric(
-        metric_name="pass@k:32",
-        sample_level_fn=PassAtK(k=32, strip_strings=True).compute,
+    pass_at_1 = SampleLevelMetric(
+        metric_name="pass@1:32_samples",
+        sample_level_fn=PassAtK(k=1, n=32, strip_strings=True).compute,
+        category=MetricCategory.GENERATIVE_SAMPLING,
+        use_case=MetricUseCase.REASONING,
+        corpus_level_fn=np.mean,
+        higher_is_better=True,
+    )
+    pass_at_10 = SampleLevelMetric(
+        metric_name="pass@10:32_samples",
+        sample_level_fn=PassAtK(k=10, n=32, strip_strings=True).compute,
+        category=MetricCategory.GENERATIVE_SAMPLING,
+        use_case=MetricUseCase.REASONING,
+        corpus_level_fn=np.mean,
+        higher_is_better=True,
+    )
+    pass_at_100 = SampleLevelMetric(
+        metric_name="pass@100:32_samples",
+        sample_level_fn=PassAtK(k=100, n=32, strip_strings=True).compute,
         category=MetricCategory.GENERATIVE_SAMPLING,
         use_case=MetricUseCase.REASONING,
         corpus_level_fn=np.mean,

diff --git a/src/lighteval/metrics/metrics_sample.py b/src/lighteval/metrics/metrics_sample.py
@@ -1067,10 +1067,10 @@ def __init__(
             strip_strings (bool, optional): Whether to strip both reference and predictions. Defaults to False.
             sample_scoring_function (callable or str, optional): Function to use to score each sample.
                 Either pass the full function (should take a string prediction and a string gold, and return a score between 0 and 1)
-                or a string (any of `prefix`, `suffix` or `full`) to define the type of exact match that you want. Defaults to "full".
-                `prefix` checks if the prediction starts with the gold,
-                `suffix` if the prediction ends with the gold,
-                `full` if the prediction and gold are equal
+                a string (any of `prefix`, `suffix` or `full`) to define the type of exact match that you want, or nothing to defaults to "full".
+                    `prefix` checks if the prediction starts with the gold,
+                    `suffix` if the prediction ends with the gold,
+                    `full` if the prediction and gold are equal
         """
         self.k = k
         self.n = n