Skip to content

Commit

Permalink
added defaults
Browse files Browse the repository at this point in the history
  • Loading branch information
clefourrier committed Jan 27, 2025
1 parent 93d108e commit 5bea2f6
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 7 deletions.
22 changes: 19 additions & 3 deletions src/lighteval/metrics/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,9 +365,25 @@ class Metrics(Enum):
corpus_level_fn=CorpusLevelF1Score(average=None, num_classes=3).compute,
higher_is_better=True,
)
pass_at_k_32 = SampleLevelMetric(
metric_name="pass@k:32",
sample_level_fn=PassAtK(k=32, strip_strings=True).compute,
pass_at_1 = SampleLevelMetric(
metric_name="pass@1:32_samples",
sample_level_fn=PassAtK(k=1, n=32, strip_strings=True).compute,
category=MetricCategory.GENERATIVE_SAMPLING,
use_case=MetricUseCase.REASONING,
corpus_level_fn=np.mean,
higher_is_better=True,
)
pass_at_10 = SampleLevelMetric(
metric_name="pass@10:32_samples",
sample_level_fn=PassAtK(k=10, n=32, strip_strings=True).compute,
category=MetricCategory.GENERATIVE_SAMPLING,
use_case=MetricUseCase.REASONING,
corpus_level_fn=np.mean,
higher_is_better=True,
)
pass_at_100 = SampleLevelMetric(
metric_name="pass@100:32_samples",
sample_level_fn=PassAtK(k=100, n=32, strip_strings=True).compute,
category=MetricCategory.GENERATIVE_SAMPLING,
use_case=MetricUseCase.REASONING,
corpus_level_fn=np.mean,
Expand Down
8 changes: 4 additions & 4 deletions src/lighteval/metrics/metrics_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -1067,10 +1067,10 @@ def __init__(
strip_strings (bool, optional): Whether to strip both reference and predictions. Defaults to False.
sample_scoring_function (callable or str, optional): Function to use to score each sample.
Either pass the full function (should take a string prediction and a string gold, and return a score between 0 and 1)
or a string (any of `prefix`, `suffix` or `full`) to define the type of exact match that you want. Defaults to "full".
`prefix` checks if the prediction starts with the gold,
`suffix` if the prediction ends with the gold,
`full` if the prediction and gold are equal
a string (any of `prefix`, `suffix` or `full`) to define the type of exact match that you want, or nothing to defaults to "full".
`prefix` checks if the prediction starts with the gold,
`suffix` if the prediction ends with the gold,
`full` if the prediction and gold are equal
"""
self.k = k
self.n = n
Expand Down

0 comments on commit 5bea2f6

Please sign in to comment.