21
21
# SOFTWARE.
22
22
23
23
import re
24
- from dataclasses import dataclass , field
24
+ from dataclasses import dataclass , field , replace
25
25
from functools import lru_cache
26
26
from itertools import groupby
27
27
from typing import Any , Literal , Sequence
28
28
29
29
import sympy
30
- from sympy import Basic , MatrixBase , Number
30
+ from sympy import Basic , FiniteSet , MatrixBase , Number
31
31
from sympy .parsing import parse_expr
32
32
33
33
from lighteval .metrics .utils .math_comparison import should_treat_as_complex
@@ -48,7 +48,7 @@ def latex_normalization_config_default_factory():
48
48
units = True ,
49
49
malformed_operators = True ,
50
50
nits = True ,
51
- boxed = True ,
51
+ boxed = "all" ,
52
52
equations = True ,
53
53
)
54
54
@@ -159,37 +159,91 @@ def lazy_expr_regex(expr_config: ExprExtractionConfig, language: Language) -> li
159
159
return [(re .compile (pattern ), priority ) for pattern , priority in regexes ]
160
160
161
161
162
- @lru_cache (maxsize = 1 )
163
- def lazy_latex_regex (latex_config : LatexExtractionConfig , language : Language ) -> list [tuple [re .Pattern [str ], int ]]:
164
- # Only LaTeX expressions between delimiters
165
- percent_re_group = r"(?P<percent>\s*(?:\\?%|[Pp]ercent|[Pp]ercentage|[Pp]ct))"
166
- latex_envs_re = (
167
- r"("
168
- r"(?<!\\)\$\$(?P<latexDisplayDollar>[\s\S]+?)(?<!\\)\$\$|" # $$...$$ (display math, can be multiline)
169
- r"(?<!\\)\\\[(?P<latexDisplayBracket>[\s\S]+?)(?<!\\)\\\]|" # \[...\] (display math, can be multiline)
170
- r"(?<!\\|\d)\$(?P<latexInlineDollar>(?:\\[$]|[^\n$])+?)(?<!\\)\$|" # $...$ (inline math, single line, allows escaped $), we make sure it's not preceded by a digit to minimize false positives containing dollar as a unit
171
- r"(?<!\\)\\\((?P<latexInlineParenthesis>[^\n]+?)(?<!\\)\\\)|" # \(...\) (inline math, single line)
172
- r"(?<!\\)\[(?P<latexInlineBracket>[^\n$]+?)(?<!\\)\]" # [....] While this is not a valid display, math LLMs like to generate it. We allow it
173
- rf"){ percent_re_group } ?"
174
- )
162
+ def make_latex_env_pattern (prefix : str = "" , context : Literal ["boxed" , "plain" ] = "plain" ) -> str :
163
+ """Creates a LaTeX environment pattern with uniquely prefixed group names.
164
+
165
+ Args:
166
+ prefix (str): Prefix to add to group names to make them unique
167
+ context (Literal["boxed", "plain"]): Type of content to match inside the environments
168
+ - "boxed": Match environments containing \b oxed{...}
169
+ - "plain": Match any LaTeX content
170
+
171
+ Returns:
172
+ str: Regex pattern for matching LaTeX environments with percent suffix
173
+ """
174
+ percent_re_group = rf"(?P<{ prefix } percent>(?:\\?%|[Pp]ercent|[Pp]ercentage|[Pp]ct))"
175
+
176
+ # Define base content patterns
177
+ display_dollar_content = r"(?:[^$]|\$(?!\$))"
178
+ # Either \ not followed by ] or everything but \
179
+ display_content_bracket = r"(?:[^\\]|\\(?!\]))"
180
+ inline_dollar_content = r"(?:\\[$]|[^\n$])"
181
+ inline_content_parenthesis = r"(?:[^\\\n]|\\(?!\)))"
182
+ inline_content_bracket = r"[^\n\]\[]"
183
+
184
+ if context == "boxed" :
185
+ # Rewrite patterns to optionally include boxed content
186
+ display_dollar_content = rf"{ display_dollar_content } *?\\boxed{{{ display_dollar_content } +?}}{ display_dollar_content } *?"
187
+ display_content_bracket = rf"{ display_content_bracket } *?\\boxed{{{ display_content_bracket } +?}}{ display_content_bracket } *?"
188
+ inline_dollar_content = rf"{ inline_dollar_content } *?\\boxed{{{ inline_dollar_content } +?}}{ inline_dollar_content } *?"
189
+ inline_content_parenthesis = rf"{ inline_content_parenthesis } *?\\boxed{{{ inline_content_parenthesis } +?}}{ inline_content_parenthesis } *?"
190
+ inline_content_bracket = rf"{ inline_content_bracket } *?\\boxed{{{ inline_content_bracket } +?}}{ inline_content_bracket } *?"
191
+ else :
192
+ display_dollar_content = rf"{ display_dollar_content } +?"
193
+ display_content_bracket = rf"{ display_content_bracket } +?"
194
+ inline_dollar_content = rf"{ inline_dollar_content } +?"
195
+ inline_content_parenthesis = rf"{ inline_content_parenthesis } +?"
196
+ inline_content_bracket = rf"{ inline_content_bracket } +?"
197
+
198
+ # Build list of regex patterns
199
+ patterns = [
200
+ # Display math environments (allow multiline)
201
+ rf"(?<!\\)\$\$(?P<{ prefix } latexDisplayDollar>{ display_dollar_content } )(?<!\\)\$\$" ,
202
+ rf"(?<!\\)\\\[(?P<{ prefix } latexDisplayBracket>{ display_content_bracket } )(?<!\\)\\\]" ,
203
+ # Inline math environments (single line only)
204
+ rf"(?<!\\|\d)\$(?P<{ prefix } latexInlineDollar>{ inline_dollar_content } )(?<!\\)\$" ,
205
+ rf"(?<!\\)\\\((?P<{ prefix } latexInlineParenthesis>{ inline_content_parenthesis } )(?<!\\)\\\)" ,
206
+ rf"\s\[(?P<{ prefix } latexInlineBracket>{ inline_content_bracket } )\]\s" ,
207
+ ]
208
+ if context == "boxed" :
209
+ # allow also matching plain boxed
210
+ patterns .append (rf"(?P<{ prefix } latexBoxed>\\boxed{{.+}})" )
211
+ elif context == "plain" :
212
+ simple_number = r"-?\d+(?:[.,]\d+)?"
213
+ patterns .append (rf"(?P<{ prefix } latexFraction>-?\\frac{{{ simple_number } }}{{{ simple_number } }})" )
214
+
215
+ # Join patterns with | and wrap in parentheses
216
+ latex_env_re = rf"(?:(?:{ '|' .join (patterns )} )\s*{ percent_re_group } ?)"
217
+
218
+ return latex_env_re
175
219
176
- # Match latex without environments
177
- latex_boxed = rf"(?P<latexBoxed>\\boxed{{.+}})\$?{ percent_re_group } ?" # Boxed number, it's fine to be as greedy as possible as we will find the correct end afterwards
178
- simple_number = r"-?\d+(?:[.,]\d+)?"
179
- latex_fraction = rf"(?P<latexFraction>-?\\frac{{{ simple_number } }}{{{ simple_number } }})\$?{ percent_re_group } ?"
180
220
221
+ @lru_cache (maxsize = 1 )
222
+ def lazy_latex_regex (
223
+ latex_config : LatexExtractionConfig ,
224
+ language : Language
225
+ ) -> list [tuple [re .Pattern [str ], int ]]:
181
226
translation_literal = TRANSLATION_LITERALS [language ]
227
+ # Pattern for multiple latex environments connected by and/or
228
+ # Create patterns for up to 5 connected expressions
229
+ first_latex_group = make_latex_env_pattern ('first_' )
230
+ and_word = translation_literal .and_word
231
+ or_word = translation_literal .or_word
232
+ next_groups = '' .join ([rf"(?:\s*(?:{ and_word } |{ or_word } )\s*{ make_latex_env_pattern (f'next{ i } _' )} )?" for i in range (1 , 6 )])
233
+
234
+ latex_envs_re = rf"(?:{ first_latex_group } { next_groups } )"
182
235
colon_re = rf"[{ re .escape (translation_literal .colon )} \:]"
183
-
184
236
answer_prefix_re = rf"(?i:{ translation_literal .answer } )"
185
237
186
238
# We first match boxed env, for some reason that's the most common case of output
187
239
# Then we match the latex with environments, then we try to match the fraction
188
240
regexes : list [tuple [str , int ]] = []
189
- for latex_re in [latex_envs_re , latex_fraction ]:
241
+ for latex_re in [latex_envs_re ]:
190
242
if language == Language .ENGLISH :
191
243
final_answer_prefixed_re = rf"(?i:final answer is)\:?\s*{ latex_re } \.?\s?I hope"
192
- final_answer_prefixed_just_is = rf"(?i:final answer.{{0,100}}?)\s+is\:?\s*{ latex_re } "
244
+ final_answer_prefixed_just_is = (
245
+ rf"(?i:final answer.{{0,100}}?)\s+is\:?\s*{ latex_re } "
246
+ )
193
247
regexes .append ((final_answer_prefixed_re , 0 ))
194
248
regexes .append ((final_answer_prefixed_just_is , 50 ))
195
249
@@ -203,8 +257,15 @@ def lazy_latex_regex(latex_config: LatexExtractionConfig, language: Language) ->
203
257
if latex_config .try_extract_without_anchor :
204
258
regexes .append ((latex_re , 300 ))
205
259
260
+ # This ensures that boxed is matched right after the final answer xxxx
206
261
if latex_config .boxed_match_priority >= 0 :
207
- regexes .append ((latex_boxed , latex_config .boxed_match_priority ))
262
+ latex_re_boxed = make_latex_env_pattern (prefix = 'first_' , context = 'boxed' )
263
+ next_groups = '' .join ([rf"(?:\s*(?:{ and_word } |{ or_word } )\s*{ make_latex_env_pattern (f'next{ i } _' , context = 'boxed' )} )?" for i in range (1 , 6 )])
264
+ latex_re_boxed = rf"{ latex_re_boxed } { next_groups } "
265
+ regexes .append ((latex_re_boxed , latex_config .boxed_match_priority ))
266
+ # Match plain boxed, the issue with plain boxed is that it's impossible to know where it stops, so if there are
267
+ # till last }. We do the actuall extraction in the normalization step.
268
+ regexes .append ((rf"(?P<first_latexBoxed>\\boxed{{.+}})" , latex_config .boxed_match_priority ))
208
269
209
270
return [(re .compile (pattern , re .DOTALL ), priority ) for pattern , priority in regexes ]
210
271
@@ -268,7 +329,9 @@ def lazy_indices_regex(
268
329
269
330
270
331
def get_extraction_regexes (
271
- formatted_doc : Doc , target_types : Sequence [ExtractionTarget ], language : Language
332
+ formatted_doc : Doc ,
333
+ target_types : Sequence [ExtractionTarget ],
334
+ language : Language
272
335
) -> list [tuple [list [tuple [re .Pattern [str ], int ]], ExtractionTarget ]]:
273
336
extraction_regexes : list [tuple [list [tuple [re .Pattern [str ], int ]], ExtractionTarget ]] = [
274
337
(lazy_latex_regex (target_type , language ), target_type )
@@ -296,21 +359,21 @@ def get_target_type_order(target_type: ExtractionTarget) -> int:
296
359
297
360
# Small cache, to catche repeated calls invalid parsing
298
361
@lru_cache (maxsize = 20 )
299
- @timeout (timeout_seconds = 5 )
300
362
@requires_latex2sympy2_extended
301
- def parse_latex_with_timeout (latex : str ):
363
+ def parse_latex_with_timeout (latex : str , timeout_seconds : int ):
302
364
from latex2sympy2_extended .latex2sympy2 import latex2sympy
303
365
304
- return latex2sympy (latex , is_real = not should_treat_as_complex (latex ), convert_degrees = False )
366
+ return timeout (timeout_seconds )(latex2sympy )(
367
+ latex , is_real = not should_treat_as_complex (latex ), convert_degrees = False , normalization_config = None
368
+ )
305
369
306
370
307
371
@lru_cache (maxsize = 20 )
308
- @timeout (timeout_seconds = 5 )
309
- def parse_expr_with_timeout (expr : str ):
310
- return parse_expr (expr , evaluate = False )
372
+ def parse_expr_with_timeout (expr : str , timeout_seconds : int ):
373
+ return timeout (timeout_seconds )(parse_expr )(expr , evaluate = False )
311
374
312
375
313
- def extract_expr (match : re .Match ) -> tuple [str | sympy .Expr | None , str ]:
376
+ def extract_expr (match : re .Match , timeout_seconds : int ) -> tuple [str | sympy .Expr | None , str ]:
314
377
# First combine the number
315
378
groups = match .groupdict ()
316
379
# Expr group will always exist because every regex has it
@@ -338,7 +401,7 @@ def extract_expr(match: re.Match) -> tuple[str | sympy.Expr | None, str]:
338
401
# Remove new lines and spaces
339
402
if expr :
340
403
try :
341
- return parse_expr_with_timeout (expr .replace ("\n " , " " ).replace ("^" , "**" )), expr
404
+ return parse_expr_with_timeout (expr .replace ("\n " , " " ).replace ("^" , "**" ), timeout_seconds ), expr
342
405
except : # noqa: E722
343
406
pass
344
407
return None , expr
@@ -348,52 +411,90 @@ def convert_to_pct(number: Number):
348
411
return sympy .Mul (number , sympy .Rational (1 , 100 ), evaluate = False )
349
412
350
413
351
- @lru_cache (maxsize = 1000 )
352
- @timeout (timeout_seconds = 5 )
353
414
@requires_latex2sympy2_extended
354
- def extract_latex (match : re .Match ) -> tuple [sympy .Expr | str | None , str ]:
355
- from latex2sympy2_extended .latex2sympy2 import NormalizationConfig , normalize_latex
356
-
357
- latex = next ((val for name , val in match .groupdict ().items () if name .startswith ("latex" ) and val ), "" )
358
- is_percentage = True if match .group ("percent" ) else False
359
-
360
- normalized_latex = normalize_latex (
361
- latex ,
362
- NormalizationConfig (
363
- basic_latex = True ,
364
- units = True ,
365
- malformed_operators = True ,
366
- nits = True ,
367
- boxed = True ,
368
- equations = True ,
369
- ),
415
+ @lru_cache (maxsize = 20 )
416
+ def extract_latex (match : re .Match , latex_config : LatexExtractionConfig , timeout_seconds : int ) -> tuple [sympy .Expr | str | None , str ]:
417
+ from latex2sympy2_extended .latex2sympy2 import normalize_latex
418
+ latex_exprs = []
419
+ latex_strs = []
420
+
421
+ # Get all latex groups (both first_ and nextN_ prefixes)
422
+ first_latex_group = next (
423
+ ((val , name ) for name , val in match .groupdict ().items () if name .startswith ("first_latex" ) and val ),
424
+ None
370
425
)
371
-
372
- try :
373
- parsed_latex = parse_latex_with_timeout (normalized_latex )
374
- if is_percentage :
375
- parsed_latex = convert_to_pct (parsed_latex )
376
- except : # noqa: E722
377
- return None , normalized_latex
378
- return parsed_latex , normalized_latex
379
-
380
-
381
- def extract_match (match : re .Match , target_type : ExtractionTarget ) -> tuple [Basic | MatrixBase | str | None , str ]:
426
+
427
+ # Get all nextN_ groups
428
+ next_latex_groups = [
429
+ next (
430
+ ((val , name ) for name , val in match .groupdict ().items () if name .startswith (f"next{ i } _latex" ) and val ),
431
+ None
432
+ )
433
+ for i in range (1 , 6 )
434
+ ]
435
+
436
+ all_latex = list (filter (lambda x : x is not None , [first_latex_group ] + next_latex_groups ))
437
+
438
+ for latex , name in all_latex :
439
+ name_without_prefix = name .split ('_' )[0 ]
440
+ group_name = name .split ('_' )[1 ] if len (name .split ('_' )) > 1 else None
441
+ is_percentage = True if match .groupdict ().get (f"{ name_without_prefix } _percent" ) else False
442
+
443
+ # Use modified config if group name is 'boxed'
444
+ config = latex_config .normalization_config
445
+ if group_name == 'latexBoxed' :
446
+ config = replace (config , boxed = "last" ) # Use replace to modify single field
447
+
448
+ normalized_latex = normalize_latex (
449
+ latex ,
450
+ config = config ,
451
+ )
452
+ latex_strs .append (normalized_latex )
453
+
454
+ try :
455
+ parsed_latex = parse_latex_with_timeout (normalized_latex , timeout_seconds = timeout_seconds )
456
+ if is_percentage :
457
+ parsed_latex = convert_to_pct (parsed_latex )
458
+ latex_exprs .append (parsed_latex )
459
+ except : # noqa: E722
460
+ latex_exprs .append (None )
461
+ pass
462
+
463
+ if not latex_exprs :
464
+ return None , ""
465
+
466
+ # If we have multiple expressions and all of them are parsed, wrap them in a Tuple
467
+ if len (latex_exprs ) > 1 and all (expr is not None for expr in latex_exprs ):
468
+ # To handle solution is: 1,2 and 3
469
+ all_elements = []
470
+ for expr in latex_exprs :
471
+ if isinstance (expr , FiniteSet ):
472
+ all_elements .extend (expr .args )
473
+ else :
474
+ all_elements .append (expr )
475
+ return FiniteSet (* all_elements ), " and " .join (latex_strs )
476
+
477
+ # Otherwise return the single expression
478
+ return latex_exprs [0 ], latex_strs [0 ]
479
+
480
+
481
+ def extract_match (match : re .Match , target_type : ExtractionTarget , timeout_seconds : int ) -> tuple [Basic | MatrixBase | str | None , str ]:
382
482
"""Extracts the match from the regex match.
383
483
384
484
Args:
385
485
match (re.Match): The regex match object containing the extracted text
386
486
target_type (ExtractionTarget): The type of extraction to perform (latex, expression, or indices)
487
+ timeout_seconds (int): Maximum time in seconds to spend parsing expressions
387
488
388
489
Returns:
389
490
tuple[Basic | MatrixBase | str | None, str]: A tuple containing:
390
491
- The extracted and parsed value (if successful) or None (if parsing failed)
391
492
- The string representation of the extracted text
392
493
"""
393
494
if isinstance (target_type , LatexExtractionConfig ):
394
- return extract_latex (match )
495
+ return extract_latex (match , target_type , timeout_seconds = timeout_seconds )
395
496
elif isinstance (target_type , ExprExtractionConfig ):
396
- return extract_expr (match )
497
+ return extract_expr (match , timeout_seconds = timeout_seconds )
397
498
elif isinstance (target_type , IndicesExtractionConfig ):
398
499
return match .group ("indices" ), match .group ("indices" )
399
500
@@ -403,6 +504,7 @@ def extract_target_from_pred(
403
504
target_res : list [tuple [list [tuple [re .Pattern [str ], int ]], ExtractionTarget ]],
404
505
fallback_mode : Literal ["no_fallback" , "first_match" ] = "no_fallback" ,
405
506
extraction_mode : Literal ["first_match" , "any_match" ] = "any_match" ,
507
+ timeout_seconds : int = 5 ,
406
508
):
407
509
"""Extracts targets from a prediction string using regex patterns.
408
510
Returns first sucesffuly extracted match.
@@ -416,6 +518,7 @@ def extract_target_from_pred(
416
518
extraction_mode (Literal["first_match", "any_match"], optional): How to handle extraction failures. Defaults to "any_match".
417
519
- "first_match": Only tries to extract the first match
418
520
- "any_match": Tries to extract any match
521
+ timeout_seconds (int, optional): Maximum time in seconds to spend parsing each expression. Defaults to 5.
419
522
420
523
Returns:
421
524
list: List of extracted predictions, with first fallbac string appended if fallback_mode is "first_match"
@@ -445,7 +548,7 @@ def extract_target_from_pred(
445
548
446
549
# Try to extract from each match, starting from rightmost
447
550
for match , _ , _ , target_type in matches_with_pos :
448
- extracted_match , str_fallback = extract_match (match , target_type )
551
+ extracted_match , str_fallback = extract_match (match , target_type , timeout_seconds )
449
552
match_found = True
450
553
451
554
if str_fallback :
0 commit comments