Skip to content

Commit c2cb488

Browse files
committed
update extraction match to reflect newest math-verify
1 parent d7a1f11 commit c2cb488

File tree

5 files changed

+534
-121
lines changed

5 files changed

+534
-121
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ multilingual = [
109109
"jieba", # for chinese tokenizer
110110
"pyvi", # for vietnamese tokenizer
111111
]
112-
math = ["latex2sympy2_extended>=0.9.3"]
112+
math = ["latex2sympy2_extended>=1.0.2"]
113113

114114
[project.urls]
115115
Homepage = "https://github.com/huggingface/lighteval"

src/lighteval/metrics/utils/extractive_match_utils.py

Lines changed: 168 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,13 @@
2121
# SOFTWARE.
2222

2323
import re
24-
from dataclasses import dataclass, field
24+
from dataclasses import dataclass, field, replace
2525
from functools import lru_cache
2626
from itertools import groupby
2727
from typing import Any, Literal, Sequence
2828

2929
import sympy
30-
from sympy import Basic, MatrixBase, Number
30+
from sympy import Basic, FiniteSet, MatrixBase, Number
3131
from sympy.parsing import parse_expr
3232

3333
from lighteval.metrics.utils.math_comparison import should_treat_as_complex
@@ -48,7 +48,7 @@ def latex_normalization_config_default_factory():
4848
units=True,
4949
malformed_operators=True,
5050
nits=True,
51-
boxed=True,
51+
boxed="all",
5252
equations=True,
5353
)
5454

@@ -159,37 +159,91 @@ def lazy_expr_regex(expr_config: ExprExtractionConfig, language: Language) -> li
159159
return [(re.compile(pattern), priority) for pattern, priority in regexes]
160160

161161

162-
@lru_cache(maxsize=1)
163-
def lazy_latex_regex(latex_config: LatexExtractionConfig, language: Language) -> list[tuple[re.Pattern[str], int]]:
164-
# Only LaTeX expressions between delimiters
165-
percent_re_group = r"(?P<percent>\s*(?:\\?%|[Pp]ercent|[Pp]ercentage|[Pp]ct))"
166-
latex_envs_re = (
167-
r"("
168-
r"(?<!\\)\$\$(?P<latexDisplayDollar>[\s\S]+?)(?<!\\)\$\$|" # $$...$$ (display math, can be multiline)
169-
r"(?<!\\)\\\[(?P<latexDisplayBracket>[\s\S]+?)(?<!\\)\\\]|" # \[...\] (display math, can be multiline)
170-
r"(?<!\\|\d)\$(?P<latexInlineDollar>(?:\\[$]|[^\n$])+?)(?<!\\)\$|" # $...$ (inline math, single line, allows escaped $), we make sure it's not preceded by a digit to minimize false positives containing dollar as a unit
171-
r"(?<!\\)\\\((?P<latexInlineParenthesis>[^\n]+?)(?<!\\)\\\)|" # \(...\) (inline math, single line)
172-
r"(?<!\\)\[(?P<latexInlineBracket>[^\n$]+?)(?<!\\)\]" # [....] While this is not a valid display, math LLMs like to generate it. We allow it
173-
rf"){percent_re_group}?"
174-
)
162+
def make_latex_env_pattern(prefix: str = "", context: Literal["boxed", "plain"] = "plain") -> str:
163+
"""Creates a LaTeX environment pattern with uniquely prefixed group names.
164+
165+
Args:
166+
prefix (str): Prefix to add to group names to make them unique
167+
context (Literal["boxed", "plain"]): Type of content to match inside the environments
168+
- "boxed": Match environments containing \boxed{...}
169+
- "plain": Match any LaTeX content
170+
171+
Returns:
172+
str: Regex pattern for matching LaTeX environments with percent suffix
173+
"""
174+
percent_re_group = rf"(?P<{prefix}percent>(?:\\?%|[Pp]ercent|[Pp]ercentage|[Pp]ct))"
175+
176+
# Define base content patterns
177+
display_dollar_content = r"(?:[^$]|\$(?!\$))"
178+
# Either \ not followed by ] or everything but \
179+
display_content_bracket = r"(?:[^\\]|\\(?!\]))"
180+
inline_dollar_content = r"(?:\\[$]|[^\n$])"
181+
inline_content_parenthesis = r"(?:[^\\\n]|\\(?!\)))"
182+
inline_content_bracket = r"[^\n\]\[]"
183+
184+
if context == "boxed":
185+
# Rewrite patterns to optionally include boxed content
186+
display_dollar_content = rf"{display_dollar_content}*?\\boxed{{{display_dollar_content}+?}}{display_dollar_content}*?"
187+
display_content_bracket = rf"{display_content_bracket}*?\\boxed{{{display_content_bracket}+?}}{display_content_bracket}*?"
188+
inline_dollar_content = rf"{inline_dollar_content}*?\\boxed{{{inline_dollar_content}+?}}{inline_dollar_content}*?"
189+
inline_content_parenthesis = rf"{inline_content_parenthesis}*?\\boxed{{{inline_content_parenthesis}+?}}{inline_content_parenthesis}*?"
190+
inline_content_bracket = rf"{inline_content_bracket}*?\\boxed{{{inline_content_bracket}+?}}{inline_content_bracket}*?"
191+
else:
192+
display_dollar_content = rf"{display_dollar_content}+?"
193+
display_content_bracket = rf"{display_content_bracket}+?"
194+
inline_dollar_content = rf"{inline_dollar_content}+?"
195+
inline_content_parenthesis = rf"{inline_content_parenthesis}+?"
196+
inline_content_bracket = rf"{inline_content_bracket}+?"
197+
198+
# Build list of regex patterns
199+
patterns = [
200+
# Display math environments (allow multiline)
201+
rf"(?<!\\)\$\$(?P<{prefix}latexDisplayDollar>{display_dollar_content})(?<!\\)\$\$",
202+
rf"(?<!\\)\\\[(?P<{prefix}latexDisplayBracket>{display_content_bracket})(?<!\\)\\\]",
203+
# Inline math environments (single line only)
204+
rf"(?<!\\|\d)\$(?P<{prefix}latexInlineDollar>{inline_dollar_content})(?<!\\)\$",
205+
rf"(?<!\\)\\\((?P<{prefix}latexInlineParenthesis>{inline_content_parenthesis})(?<!\\)\\\)",
206+
rf"\s\[(?P<{prefix}latexInlineBracket>{inline_content_bracket})\]\s",
207+
]
208+
if context == "boxed":
209+
# allow also matching plain boxed
210+
patterns.append(rf"(?P<{prefix}latexBoxed>\\boxed{{.+}})")
211+
elif context == "plain":
212+
simple_number = r"-?\d+(?:[.,]\d+)?"
213+
patterns.append(rf"(?P<{prefix}latexFraction>-?\\frac{{{simple_number}}}{{{simple_number}}})")
214+
215+
# Join patterns with | and wrap in parentheses
216+
latex_env_re = rf"(?:(?:{'|'.join(patterns)})\s*{percent_re_group}?)"
217+
218+
return latex_env_re
175219

176-
# Match latex without environments
177-
latex_boxed = rf"(?P<latexBoxed>\\boxed{{.+}})\$?{percent_re_group}?" # Boxed number, it's fine to be as greedy as possible as we will find the correct end afterwards
178-
simple_number = r"-?\d+(?:[.,]\d+)?"
179-
latex_fraction = rf"(?P<latexFraction>-?\\frac{{{simple_number}}}{{{simple_number}}})\$?{percent_re_group}?"
180220

221+
@lru_cache(maxsize=1)
222+
def lazy_latex_regex(
223+
latex_config: LatexExtractionConfig,
224+
language: Language
225+
) -> list[tuple[re.Pattern[str], int]]:
181226
translation_literal = TRANSLATION_LITERALS[language]
227+
# Pattern for multiple latex environments connected by and/or
228+
# Create patterns for up to 5 connected expressions
229+
first_latex_group = make_latex_env_pattern('first_')
230+
and_word = translation_literal.and_word
231+
or_word = translation_literal.or_word
232+
next_groups = ''.join([rf"(?:\s*(?:{and_word}|{or_word})\s*{make_latex_env_pattern(f'next{i}_')})?" for i in range(1, 6)])
233+
234+
latex_envs_re = rf"(?:{first_latex_group}{next_groups})"
182235
colon_re = rf"[{re.escape(translation_literal.colon)}\:]"
183-
184236
answer_prefix_re = rf"(?i:{translation_literal.answer})"
185237

186238
# We first match boxed env, for some reason that's the most common case of output
187239
# Then we match the latex with environments, then we try to match the fraction
188240
regexes: list[tuple[str, int]] = []
189-
for latex_re in [latex_envs_re, latex_fraction]:
241+
for latex_re in [latex_envs_re]:
190242
if language == Language.ENGLISH:
191243
final_answer_prefixed_re = rf"(?i:final answer is)\:?\s*{latex_re}\.?\s?I hope"
192-
final_answer_prefixed_just_is = rf"(?i:final answer.{{0,100}}?)\s+is\:?\s*{latex_re}"
244+
final_answer_prefixed_just_is = (
245+
rf"(?i:final answer.{{0,100}}?)\s+is\:?\s*{latex_re}"
246+
)
193247
regexes.append((final_answer_prefixed_re, 0))
194248
regexes.append((final_answer_prefixed_just_is, 50))
195249

@@ -203,8 +257,15 @@ def lazy_latex_regex(latex_config: LatexExtractionConfig, language: Language) ->
203257
if latex_config.try_extract_without_anchor:
204258
regexes.append((latex_re, 300))
205259

260+
# This ensures that boxed is matched right after the final answer xxxx
206261
if latex_config.boxed_match_priority >= 0:
207-
regexes.append((latex_boxed, latex_config.boxed_match_priority))
262+
latex_re_boxed = make_latex_env_pattern(prefix='first_', context='boxed')
263+
next_groups = ''.join([rf"(?:\s*(?:{and_word}|{or_word})\s*{make_latex_env_pattern(f'next{i}_', context='boxed')})?" for i in range(1, 6)])
264+
latex_re_boxed = rf"{latex_re_boxed}{next_groups}"
265+
regexes.append((latex_re_boxed, latex_config.boxed_match_priority))
266+
# Match plain boxed, the issue with plain boxed is that it's impossible to know where it stops, so if there are
267+
# till last }. We do the actuall extraction in the normalization step.
268+
regexes.append((rf"(?P<first_latexBoxed>\\boxed{{.+}})", latex_config.boxed_match_priority))
208269

209270
return [(re.compile(pattern, re.DOTALL), priority) for pattern, priority in regexes]
210271

@@ -268,7 +329,9 @@ def lazy_indices_regex(
268329

269330

270331
def get_extraction_regexes(
271-
formatted_doc: Doc, target_types: Sequence[ExtractionTarget], language: Language
332+
formatted_doc: Doc,
333+
target_types: Sequence[ExtractionTarget],
334+
language: Language
272335
) -> list[tuple[list[tuple[re.Pattern[str], int]], ExtractionTarget]]:
273336
extraction_regexes: list[tuple[list[tuple[re.Pattern[str], int]], ExtractionTarget]] = [
274337
(lazy_latex_regex(target_type, language), target_type)
@@ -296,21 +359,21 @@ def get_target_type_order(target_type: ExtractionTarget) -> int:
296359

297360
# Small cache, to catche repeated calls invalid parsing
298361
@lru_cache(maxsize=20)
299-
@timeout(timeout_seconds=5)
300362
@requires_latex2sympy2_extended
301-
def parse_latex_with_timeout(latex: str):
363+
def parse_latex_with_timeout(latex: str, timeout_seconds: int):
302364
from latex2sympy2_extended.latex2sympy2 import latex2sympy
303365

304-
return latex2sympy(latex, is_real=not should_treat_as_complex(latex), convert_degrees=False)
366+
return timeout(timeout_seconds)(latex2sympy)(
367+
latex, is_real=not should_treat_as_complex(latex), convert_degrees=False, normalization_config=None
368+
)
305369

306370

307371
@lru_cache(maxsize=20)
308-
@timeout(timeout_seconds=5)
309-
def parse_expr_with_timeout(expr: str):
310-
return parse_expr(expr, evaluate=False)
372+
def parse_expr_with_timeout(expr: str, timeout_seconds: int):
373+
return timeout(timeout_seconds)(parse_expr)(expr, evaluate=False)
311374

312375

313-
def extract_expr(match: re.Match) -> tuple[str | sympy.Expr | None, str]:
376+
def extract_expr(match: re.Match, timeout_seconds: int) -> tuple[str | sympy.Expr | None, str]:
314377
# First combine the number
315378
groups = match.groupdict()
316379
# Expr group will always exist because every regex has it
@@ -338,7 +401,7 @@ def extract_expr(match: re.Match) -> tuple[str | sympy.Expr | None, str]:
338401
# Remove new lines and spaces
339402
if expr:
340403
try:
341-
return parse_expr_with_timeout(expr.replace("\n", " ").replace("^", "**")), expr
404+
return parse_expr_with_timeout(expr.replace("\n", " ").replace("^", "**"), timeout_seconds), expr
342405
except: # noqa: E722
343406
pass
344407
return None, expr
@@ -348,52 +411,90 @@ def convert_to_pct(number: Number):
348411
return sympy.Mul(number, sympy.Rational(1, 100), evaluate=False)
349412

350413

351-
@lru_cache(maxsize=1000)
352-
@timeout(timeout_seconds=5)
353414
@requires_latex2sympy2_extended
354-
def extract_latex(match: re.Match) -> tuple[sympy.Expr | str | None, str]:
355-
from latex2sympy2_extended.latex2sympy2 import NormalizationConfig, normalize_latex
356-
357-
latex = next((val for name, val in match.groupdict().items() if name.startswith("latex") and val), "")
358-
is_percentage = True if match.group("percent") else False
359-
360-
normalized_latex = normalize_latex(
361-
latex,
362-
NormalizationConfig(
363-
basic_latex=True,
364-
units=True,
365-
malformed_operators=True,
366-
nits=True,
367-
boxed=True,
368-
equations=True,
369-
),
415+
@lru_cache(maxsize=20)
416+
def extract_latex(match: re.Match, latex_config: LatexExtractionConfig, timeout_seconds: int) -> tuple[sympy.Expr | str | None, str]:
417+
from latex2sympy2_extended.latex2sympy2 import normalize_latex
418+
latex_exprs = []
419+
latex_strs = []
420+
421+
# Get all latex groups (both first_ and nextN_ prefixes)
422+
first_latex_group = next(
423+
((val, name) for name, val in match.groupdict().items() if name.startswith("first_latex") and val),
424+
None
370425
)
371-
372-
try:
373-
parsed_latex = parse_latex_with_timeout(normalized_latex)
374-
if is_percentage:
375-
parsed_latex = convert_to_pct(parsed_latex)
376-
except: # noqa: E722
377-
return None, normalized_latex
378-
return parsed_latex, normalized_latex
379-
380-
381-
def extract_match(match: re.Match, target_type: ExtractionTarget) -> tuple[Basic | MatrixBase | str | None, str]:
426+
427+
# Get all nextN_ groups
428+
next_latex_groups = [
429+
next(
430+
((val, name) for name, val in match.groupdict().items() if name.startswith(f"next{i}_latex") and val),
431+
None
432+
)
433+
for i in range(1, 6)
434+
]
435+
436+
all_latex = list(filter(lambda x: x is not None, [first_latex_group] + next_latex_groups))
437+
438+
for latex, name in all_latex:
439+
name_without_prefix = name.split('_')[0]
440+
group_name = name.split('_')[1] if len(name.split('_')) > 1 else None
441+
is_percentage = True if match.groupdict().get(f"{name_without_prefix}_percent") else False
442+
443+
# Use modified config if group name is 'boxed'
444+
config = latex_config.normalization_config
445+
if group_name == 'latexBoxed':
446+
config = replace(config, boxed="last") # Use replace to modify single field
447+
448+
normalized_latex = normalize_latex(
449+
latex,
450+
config=config,
451+
)
452+
latex_strs.append(normalized_latex)
453+
454+
try:
455+
parsed_latex = parse_latex_with_timeout(normalized_latex, timeout_seconds=timeout_seconds)
456+
if is_percentage:
457+
parsed_latex = convert_to_pct(parsed_latex)
458+
latex_exprs.append(parsed_latex)
459+
except: # noqa: E722
460+
latex_exprs.append(None)
461+
pass
462+
463+
if not latex_exprs:
464+
return None, ""
465+
466+
# If we have multiple expressions and all of them are parsed, wrap them in a Tuple
467+
if len(latex_exprs) > 1 and all(expr is not None for expr in latex_exprs):
468+
# To handle solution is: 1,2 and 3
469+
all_elements = []
470+
for expr in latex_exprs:
471+
if isinstance(expr, FiniteSet):
472+
all_elements.extend(expr.args)
473+
else:
474+
all_elements.append(expr)
475+
return FiniteSet(*all_elements), " and ".join(latex_strs)
476+
477+
# Otherwise return the single expression
478+
return latex_exprs[0], latex_strs[0]
479+
480+
481+
def extract_match(match: re.Match, target_type: ExtractionTarget, timeout_seconds: int) -> tuple[Basic | MatrixBase | str | None, str]:
382482
"""Extracts the match from the regex match.
383483
384484
Args:
385485
match (re.Match): The regex match object containing the extracted text
386486
target_type (ExtractionTarget): The type of extraction to perform (latex, expression, or indices)
487+
timeout_seconds (int): Maximum time in seconds to spend parsing expressions
387488
388489
Returns:
389490
tuple[Basic | MatrixBase | str | None, str]: A tuple containing:
390491
- The extracted and parsed value (if successful) or None (if parsing failed)
391492
- The string representation of the extracted text
392493
"""
393494
if isinstance(target_type, LatexExtractionConfig):
394-
return extract_latex(match)
495+
return extract_latex(match, target_type, timeout_seconds=timeout_seconds)
395496
elif isinstance(target_type, ExprExtractionConfig):
396-
return extract_expr(match)
497+
return extract_expr(match, timeout_seconds=timeout_seconds)
397498
elif isinstance(target_type, IndicesExtractionConfig):
398499
return match.group("indices"), match.group("indices")
399500

@@ -403,6 +504,7 @@ def extract_target_from_pred(
403504
target_res: list[tuple[list[tuple[re.Pattern[str], int]], ExtractionTarget]],
404505
fallback_mode: Literal["no_fallback", "first_match"] = "no_fallback",
405506
extraction_mode: Literal["first_match", "any_match"] = "any_match",
507+
timeout_seconds: int = 5,
406508
):
407509
"""Extracts targets from a prediction string using regex patterns.
408510
Returns first sucesffuly extracted match.
@@ -416,6 +518,7 @@ def extract_target_from_pred(
416518
extraction_mode (Literal["first_match", "any_match"], optional): How to handle extraction failures. Defaults to "any_match".
417519
- "first_match": Only tries to extract the first match
418520
- "any_match": Tries to extract any match
521+
timeout_seconds (int, optional): Maximum time in seconds to spend parsing each expression. Defaults to 5.
419522
420523
Returns:
421524
list: List of extracted predictions, with first fallbac string appended if fallback_mode is "first_match"
@@ -445,7 +548,7 @@ def extract_target_from_pred(
445548

446549
# Try to extract from each match, starting from rightmost
447550
for match, _, _, target_type in matches_with_pos:
448-
extracted_match, str_fallback = extract_match(match, target_type)
551+
extracted_match, str_fallback = extract_match(match, target_type, timeout_seconds)
449552
match_found = True
450553

451554
if str_fallback:

0 commit comments

Comments
 (0)