2121# SOFTWARE.
2222
2323import re
24- from dataclasses import dataclass , field
24+ from dataclasses import dataclass , field , replace
2525from functools import lru_cache
2626from itertools import groupby
2727from typing import Any , Literal , Sequence
2828
2929import sympy
30- from sympy import Basic , MatrixBase , Number
30+ from sympy import Basic , FiniteSet , MatrixBase , Number
3131from sympy .parsing import parse_expr
3232
3333from lighteval .metrics .utils .math_comparison import should_treat_as_complex
@@ -48,7 +48,7 @@ def latex_normalization_config_default_factory():
4848 units = True ,
4949 malformed_operators = True ,
5050 nits = True ,
51- boxed = True ,
51+ boxed = "all" ,
5252 equations = True ,
5353 )
5454
@@ -159,37 +159,91 @@ def lazy_expr_regex(expr_config: ExprExtractionConfig, language: Language) -> li
159159 return [(re .compile (pattern ), priority ) for pattern , priority in regexes ]
160160
161161
162- @lru_cache (maxsize = 1 )
163- def lazy_latex_regex (latex_config : LatexExtractionConfig , language : Language ) -> list [tuple [re .Pattern [str ], int ]]:
164- # Only LaTeX expressions between delimiters
165- percent_re_group = r"(?P<percent>\s*(?:\\?%|[Pp]ercent|[Pp]ercentage|[Pp]ct))"
166- latex_envs_re = (
167- r"("
168- r"(?<!\\)\$\$(?P<latexDisplayDollar>[\s\S]+?)(?<!\\)\$\$|" # $$...$$ (display math, can be multiline)
169- r"(?<!\\)\\\[(?P<latexDisplayBracket>[\s\S]+?)(?<!\\)\\\]|" # \[...\] (display math, can be multiline)
170- r"(?<!\\|\d)\$(?P<latexInlineDollar>(?:\\[$]|[^\n$])+?)(?<!\\)\$|" # $...$ (inline math, single line, allows escaped $), we make sure it's not preceded by a digit to minimize false positives containing dollar as a unit
171- r"(?<!\\)\\\((?P<latexInlineParenthesis>[^\n]+?)(?<!\\)\\\)|" # \(...\) (inline math, single line)
172- r"(?<!\\)\[(?P<latexInlineBracket>[^\n$]+?)(?<!\\)\]" # [....] While this is not a valid display, math LLMs like to generate it. We allow it
173- rf"){ percent_re_group } ?"
174- )
162+ def make_latex_env_pattern (prefix : str = "" , context : Literal ["boxed" , "plain" ] = "plain" ) -> str :
163+ """Creates a LaTeX environment pattern with uniquely prefixed group names.
164+
165+ Args:
166+ prefix (str): Prefix to add to group names to make them unique
167+ context (Literal["boxed", "plain"]): Type of content to match inside the environments
168+ - "boxed": Match environments containing \b oxed{...}
169+ - "plain": Match any LaTeX content
170+
171+ Returns:
172+ str: Regex pattern for matching LaTeX environments with percent suffix
173+ """
174+ percent_re_group = rf"(?P<{ prefix } percent>(?:\\?%|[Pp]ercent|[Pp]ercentage|[Pp]ct))"
175+
176+ # Define base content patterns
177+ display_dollar_content = r"(?:[^$]|\$(?!\$))"
178+ # Either \ not followed by ] or everything but \
179+ display_content_bracket = r"(?:[^\\]|\\(?!\]))"
180+ inline_dollar_content = r"(?:\\[$]|[^\n$])"
181+ inline_content_parenthesis = r"(?:[^\\\n]|\\(?!\)))"
182+ inline_content_bracket = r"[^\n\]\[]"
183+
184+ if context == "boxed" :
185+ # Rewrite patterns to optionally include boxed content
186+ display_dollar_content = rf"{ display_dollar_content } *?\\boxed{{{ display_dollar_content } +?}}{ display_dollar_content } *?"
187+ display_content_bracket = rf"{ display_content_bracket } *?\\boxed{{{ display_content_bracket } +?}}{ display_content_bracket } *?"
188+ inline_dollar_content = rf"{ inline_dollar_content } *?\\boxed{{{ inline_dollar_content } +?}}{ inline_dollar_content } *?"
189+ inline_content_parenthesis = rf"{ inline_content_parenthesis } *?\\boxed{{{ inline_content_parenthesis } +?}}{ inline_content_parenthesis } *?"
190+ inline_content_bracket = rf"{ inline_content_bracket } *?\\boxed{{{ inline_content_bracket } +?}}{ inline_content_bracket } *?"
191+ else :
192+ display_dollar_content = rf"{ display_dollar_content } +?"
193+ display_content_bracket = rf"{ display_content_bracket } +?"
194+ inline_dollar_content = rf"{ inline_dollar_content } +?"
195+ inline_content_parenthesis = rf"{ inline_content_parenthesis } +?"
196+ inline_content_bracket = rf"{ inline_content_bracket } +?"
197+
198+ # Build list of regex patterns
199+ patterns = [
200+ # Display math environments (allow multiline)
201+ rf"(?<!\\)\$\$(?P<{ prefix } latexDisplayDollar>{ display_dollar_content } )(?<!\\)\$\$" ,
202+ rf"(?<!\\)\\\[(?P<{ prefix } latexDisplayBracket>{ display_content_bracket } )(?<!\\)\\\]" ,
203+ # Inline math environments (single line only)
204+ rf"(?<!\\|\d)\$(?P<{ prefix } latexInlineDollar>{ inline_dollar_content } )(?<!\\)\$" ,
205+ rf"(?<!\\)\\\((?P<{ prefix } latexInlineParenthesis>{ inline_content_parenthesis } )(?<!\\)\\\)" ,
206+ rf"\s\[(?P<{ prefix } latexInlineBracket>{ inline_content_bracket } )\]\s" ,
207+ ]
208+ if context == "boxed" :
209+ # allow also matching plain boxed
210+ patterns .append (rf"(?P<{ prefix } latexBoxed>\\boxed{{.+}})" )
211+ elif context == "plain" :
212+ simple_number = r"-?\d+(?:[.,]\d+)?"
213+ patterns .append (rf"(?P<{ prefix } latexFraction>-?\\frac{{{ simple_number } }}{{{ simple_number } }})" )
214+
215+ # Join patterns with | and wrap in parentheses
216+ latex_env_re = rf"(?:(?:{ '|' .join (patterns )} )\s*{ percent_re_group } ?)"
217+
218+ return latex_env_re
175219
176- # Match latex without environments
177- latex_boxed = rf"(?P<latexBoxed>\\boxed{{.+}})\$?{ percent_re_group } ?" # Boxed number, it's fine to be as greedy as possible as we will find the correct end afterwards
178- simple_number = r"-?\d+(?:[.,]\d+)?"
179- latex_fraction = rf"(?P<latexFraction>-?\\frac{{{ simple_number } }}{{{ simple_number } }})\$?{ percent_re_group } ?"
180220
221+ @lru_cache (maxsize = 1 )
222+ def lazy_latex_regex (
223+ latex_config : LatexExtractionConfig ,
224+ language : Language
225+ ) -> list [tuple [re .Pattern [str ], int ]]:
181226 translation_literal = TRANSLATION_LITERALS [language ]
227+ # Pattern for multiple latex environments connected by and/or
228+ # Create patterns for up to 5 connected expressions
229+ first_latex_group = make_latex_env_pattern ('first_' )
230+ and_word = translation_literal .and_word
231+ or_word = translation_literal .or_word
232+ next_groups = '' .join ([rf"(?:\s*(?:{ and_word } |{ or_word } )\s*{ make_latex_env_pattern (f'next{ i } _' )} )?" for i in range (1 , 6 )])
233+
234+ latex_envs_re = rf"(?:{ first_latex_group } { next_groups } )"
182235 colon_re = rf"[{ re .escape (translation_literal .colon )} \:]"
183-
184236 answer_prefix_re = rf"(?i:{ translation_literal .answer } )"
185237
186238 # We first match boxed env, for some reason that's the most common case of output
187239 # Then we match the latex with environments, then we try to match the fraction
188240 regexes : list [tuple [str , int ]] = []
189- for latex_re in [latex_envs_re , latex_fraction ]:
241+ for latex_re in [latex_envs_re ]:
190242 if language == Language .ENGLISH :
191243 final_answer_prefixed_re = rf"(?i:final answer is)\:?\s*{ latex_re } \.?\s?I hope"
192- final_answer_prefixed_just_is = rf"(?i:final answer.{{0,100}}?)\s+is\:?\s*{ latex_re } "
244+ final_answer_prefixed_just_is = (
245+ rf"(?i:final answer.{{0,100}}?)\s+is\:?\s*{ latex_re } "
246+ )
193247 regexes .append ((final_answer_prefixed_re , 0 ))
194248 regexes .append ((final_answer_prefixed_just_is , 50 ))
195249
@@ -203,8 +257,15 @@ def lazy_latex_regex(latex_config: LatexExtractionConfig, language: Language) ->
203257 if latex_config .try_extract_without_anchor :
204258 regexes .append ((latex_re , 300 ))
205259
260+ # This ensures that boxed is matched right after the final answer xxxx
206261 if latex_config .boxed_match_priority >= 0 :
207- regexes .append ((latex_boxed , latex_config .boxed_match_priority ))
262+ latex_re_boxed = make_latex_env_pattern (prefix = 'first_' , context = 'boxed' )
263+ next_groups = '' .join ([rf"(?:\s*(?:{ and_word } |{ or_word } )\s*{ make_latex_env_pattern (f'next{ i } _' , context = 'boxed' )} )?" for i in range (1 , 6 )])
264+ latex_re_boxed = rf"{ latex_re_boxed } { next_groups } "
265+ regexes .append ((latex_re_boxed , latex_config .boxed_match_priority ))
266+ # Match plain boxed, the issue with plain boxed is that it's impossible to know where it stops, so if there are
267+ # till last }. We do the actuall extraction in the normalization step.
268+ regexes .append ((rf"(?P<first_latexBoxed>\\boxed{{.+}})" , latex_config .boxed_match_priority ))
208269
209270 return [(re .compile (pattern , re .DOTALL ), priority ) for pattern , priority in regexes ]
210271
@@ -268,7 +329,9 @@ def lazy_indices_regex(
268329
269330
270331def get_extraction_regexes (
271- formatted_doc : Doc , target_types : Sequence [ExtractionTarget ], language : Language
332+ formatted_doc : Doc ,
333+ target_types : Sequence [ExtractionTarget ],
334+ language : Language
272335) -> list [tuple [list [tuple [re .Pattern [str ], int ]], ExtractionTarget ]]:
273336 extraction_regexes : list [tuple [list [tuple [re .Pattern [str ], int ]], ExtractionTarget ]] = [
274337 (lazy_latex_regex (target_type , language ), target_type )
@@ -296,21 +359,21 @@ def get_target_type_order(target_type: ExtractionTarget) -> int:
296359
297360# Small cache, to catche repeated calls invalid parsing
298361@lru_cache (maxsize = 20 )
299- @timeout (timeout_seconds = 5 )
300362@requires_latex2sympy2_extended
301- def parse_latex_with_timeout (latex : str ):
363+ def parse_latex_with_timeout (latex : str , timeout_seconds : int ):
302364 from latex2sympy2_extended .latex2sympy2 import latex2sympy
303365
304- return latex2sympy (latex , is_real = not should_treat_as_complex (latex ), convert_degrees = False )
366+ return timeout (timeout_seconds )(latex2sympy )(
367+ latex , is_real = not should_treat_as_complex (latex ), convert_degrees = False , normalization_config = None
368+ )
305369
306370
307371@lru_cache (maxsize = 20 )
308- @timeout (timeout_seconds = 5 )
309- def parse_expr_with_timeout (expr : str ):
310- return parse_expr (expr , evaluate = False )
372+ def parse_expr_with_timeout (expr : str , timeout_seconds : int ):
373+ return timeout (timeout_seconds )(parse_expr )(expr , evaluate = False )
311374
312375
313- def extract_expr (match : re .Match ) -> tuple [str | sympy .Expr | None , str ]:
376+ def extract_expr (match : re .Match , timeout_seconds : int ) -> tuple [str | sympy .Expr | None , str ]:
314377 # First combine the number
315378 groups = match .groupdict ()
316379 # Expr group will always exist because every regex has it
@@ -338,7 +401,7 @@ def extract_expr(match: re.Match) -> tuple[str | sympy.Expr | None, str]:
338401 # Remove new lines and spaces
339402 if expr :
340403 try :
341- return parse_expr_with_timeout (expr .replace ("\n " , " " ).replace ("^" , "**" )), expr
404+ return parse_expr_with_timeout (expr .replace ("\n " , " " ).replace ("^" , "**" ), timeout_seconds ), expr
342405 except : # noqa: E722
343406 pass
344407 return None , expr
@@ -348,52 +411,90 @@ def convert_to_pct(number: Number):
348411 return sympy .Mul (number , sympy .Rational (1 , 100 ), evaluate = False )
349412
350413
351- @lru_cache (maxsize = 1000 )
352- @timeout (timeout_seconds = 5 )
353414@requires_latex2sympy2_extended
354- def extract_latex (match : re .Match ) -> tuple [sympy .Expr | str | None , str ]:
355- from latex2sympy2_extended .latex2sympy2 import NormalizationConfig , normalize_latex
356-
357- latex = next ((val for name , val in match .groupdict ().items () if name .startswith ("latex" ) and val ), "" )
358- is_percentage = True if match .group ("percent" ) else False
359-
360- normalized_latex = normalize_latex (
361- latex ,
362- NormalizationConfig (
363- basic_latex = True ,
364- units = True ,
365- malformed_operators = True ,
366- nits = True ,
367- boxed = True ,
368- equations = True ,
369- ),
415+ @lru_cache (maxsize = 20 )
416+ def extract_latex (match : re .Match , latex_config : LatexExtractionConfig , timeout_seconds : int ) -> tuple [sympy .Expr | str | None , str ]:
417+ from latex2sympy2_extended .latex2sympy2 import normalize_latex
418+ latex_exprs = []
419+ latex_strs = []
420+
421+ # Get all latex groups (both first_ and nextN_ prefixes)
422+ first_latex_group = next (
423+ ((val , name ) for name , val in match .groupdict ().items () if name .startswith ("first_latex" ) and val ),
424+ None
370425 )
371-
372- try :
373- parsed_latex = parse_latex_with_timeout (normalized_latex )
374- if is_percentage :
375- parsed_latex = convert_to_pct (parsed_latex )
376- except : # noqa: E722
377- return None , normalized_latex
378- return parsed_latex , normalized_latex
379-
380-
381- def extract_match (match : re .Match , target_type : ExtractionTarget ) -> tuple [Basic | MatrixBase | str | None , str ]:
426+
427+ # Get all nextN_ groups
428+ next_latex_groups = [
429+ next (
430+ ((val , name ) for name , val in match .groupdict ().items () if name .startswith (f"next{ i } _latex" ) and val ),
431+ None
432+ )
433+ for i in range (1 , 6 )
434+ ]
435+
436+ all_latex = list (filter (lambda x : x is not None , [first_latex_group ] + next_latex_groups ))
437+
438+ for latex , name in all_latex :
439+ name_without_prefix = name .split ('_' )[0 ]
440+ group_name = name .split ('_' )[1 ] if len (name .split ('_' )) > 1 else None
441+ is_percentage = True if match .groupdict ().get (f"{ name_without_prefix } _percent" ) else False
442+
443+ # Use modified config if group name is 'boxed'
444+ config = latex_config .normalization_config
445+ if group_name == 'latexBoxed' :
446+ config = replace (config , boxed = "last" ) # Use replace to modify single field
447+
448+ normalized_latex = normalize_latex (
449+ latex ,
450+ config = config ,
451+ )
452+ latex_strs .append (normalized_latex )
453+
454+ try :
455+ parsed_latex = parse_latex_with_timeout (normalized_latex , timeout_seconds = timeout_seconds )
456+ if is_percentage :
457+ parsed_latex = convert_to_pct (parsed_latex )
458+ latex_exprs .append (parsed_latex )
459+ except : # noqa: E722
460+ latex_exprs .append (None )
461+ pass
462+
463+ if not latex_exprs :
464+ return None , ""
465+
466+ # If we have multiple expressions and all of them are parsed, wrap them in a Tuple
467+ if len (latex_exprs ) > 1 and all (expr is not None for expr in latex_exprs ):
468+ # To handle solution is: 1,2 and 3
469+ all_elements = []
470+ for expr in latex_exprs :
471+ if isinstance (expr , FiniteSet ):
472+ all_elements .extend (expr .args )
473+ else :
474+ all_elements .append (expr )
475+ return FiniteSet (* all_elements ), " and " .join (latex_strs )
476+
477+ # Otherwise return the single expression
478+ return latex_exprs [0 ], latex_strs [0 ]
479+
480+
481+ def extract_match (match : re .Match , target_type : ExtractionTarget , timeout_seconds : int ) -> tuple [Basic | MatrixBase | str | None , str ]:
382482 """Extracts the match from the regex match.
383483
384484 Args:
385485 match (re.Match): The regex match object containing the extracted text
386486 target_type (ExtractionTarget): The type of extraction to perform (latex, expression, or indices)
487+ timeout_seconds (int): Maximum time in seconds to spend parsing expressions
387488
388489 Returns:
389490 tuple[Basic | MatrixBase | str | None, str]: A tuple containing:
390491 - The extracted and parsed value (if successful) or None (if parsing failed)
391492 - The string representation of the extracted text
392493 """
393494 if isinstance (target_type , LatexExtractionConfig ):
394- return extract_latex (match )
495+ return extract_latex (match , target_type , timeout_seconds = timeout_seconds )
395496 elif isinstance (target_type , ExprExtractionConfig ):
396- return extract_expr (match )
497+ return extract_expr (match , timeout_seconds = timeout_seconds )
397498 elif isinstance (target_type , IndicesExtractionConfig ):
398499 return match .group ("indices" ), match .group ("indices" )
399500
@@ -403,6 +504,7 @@ def extract_target_from_pred(
403504 target_res : list [tuple [list [tuple [re .Pattern [str ], int ]], ExtractionTarget ]],
404505 fallback_mode : Literal ["no_fallback" , "first_match" ] = "no_fallback" ,
405506 extraction_mode : Literal ["first_match" , "any_match" ] = "any_match" ,
507+ timeout_seconds : int = 5 ,
406508):
407509 """Extracts targets from a prediction string using regex patterns.
408510 Returns first sucesffuly extracted match.
@@ -416,6 +518,7 @@ def extract_target_from_pred(
416518 extraction_mode (Literal["first_match", "any_match"], optional): How to handle extraction failures. Defaults to "any_match".
417519 - "first_match": Only tries to extract the first match
418520 - "any_match": Tries to extract any match
521+ timeout_seconds (int, optional): Maximum time in seconds to spend parsing each expression. Defaults to 5.
419522
420523 Returns:
421524 list: List of extracted predictions, with first fallbac string appended if fallback_mode is "first_match"
@@ -445,7 +548,7 @@ def extract_target_from_pred(
445548
446549 # Try to extract from each match, starting from rightmost
447550 for match , _ , _ , target_type in matches_with_pos :
448- extracted_match , str_fallback = extract_match (match , target_type )
551+ extracted_match , str_fallback = extract_match (match , target_type , timeout_seconds )
449552 match_found = True
450553
451554 if str_fallback :
0 commit comments