Skip to content

Commit 55a3726

Browse files
robustness to failures, gpt-5-mini as a judge for webarenalite
1 parent 50a38f5 commit 55a3726

File tree

2 files changed

+24
-4
lines changed

2 files changed

+24
-4
lines changed

browsergym/core/src/browsergym/core/env.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import copy
22
import logging
33
import re
4+
import json
5+
import os
46
import time
57
from abc import ABC
68
from pathlib import Path
@@ -10,7 +12,7 @@
1012
import numpy as np
1113
import playwright.sync_api
1214

13-
from . import _get_global_playwright
15+
from . import _get_global_playwright, _set_global_playwright
1416
from .action.base import execute_python_code
1517
from .action.highlevel import HighLevelActionSet
1618
from .chat import Chat
@@ -256,7 +258,21 @@ def override_property(task, env, property):
256258
# use the global Playwright instance
257259
pw: playwright.sync_api.Playwright = _get_global_playwright()
258260
# important: change playwright's test id attribute from "data-testid" to "bid"
259-
pw.selectors.set_test_id_attribute(BROWSERGYM_ID_ATTRIBUTE)
261+
try:
262+
pw.selectors.set_test_id_attribute(BROWSERGYM_ID_ATTRIBUTE)
263+
except RuntimeError as exc:
264+
if "no running event loop" not in str(exc):
265+
raise
266+
logger.warning(
267+
"Playwright global instance lost its event loop; restarting Playwright."
268+
)
269+
try:
270+
pw.stop()
271+
except Exception:
272+
logger.debug("Failed to stop stale Playwright instance.", exc_info=True)
273+
pw = playwright.sync_api.sync_playwright().start()
274+
_set_global_playwright(pw)
275+
pw.selectors.set_test_id_attribute(BROWSERGYM_ID_ATTRIBUTE)
260276
args = [
261277
(
262278
f"--window-size={viewport['width']},{viewport['height']}"
@@ -281,6 +297,7 @@ def override_property(task, env, property):
281297
)
282298

283299
# create a new browser context for pages
300+
extra_http_headers = json.loads(os.getenv("EXTRA_HTTP_HEADERS", "{}"))
284301
self.context = self.browser.new_context(
285302
no_viewport=True if self.resizeable_window else None,
286303
viewport=viewport if not self.resizeable_window else None,
@@ -291,6 +308,7 @@ def override_property(task, env, property):
291308
locale=locale,
292309
timezone_id=timezone_id,
293310
ignore_https_errors=True,
311+
extra_http_headers=extra_http_headers,
294312
# will raise an Exception if above args are overriden
295313
**self.pw_context_kwargs,
296314
)

browsergym/webarenalite/src/browsergym/webarenalite/helper_functions.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,8 @@ def llm_fuzzy_match(pred: str, reference: str, question: str) -> float:
247247
]
248248

249249
raw_response = generate_from_openai_chat_completion(
250-
model="gpt-4-1106-preview",
250+
# model="gpt-4-1106-preview",
251+
model="gpt-5-mini",
251252
messages=messages,
252253
temperature=0,
253254
max_tokens=768,
@@ -283,7 +284,8 @@ def llm_ua_match(pred: str, reference: str, question: str) -> float:
283284
]
284285

285286
raw_response = generate_from_openai_chat_completion(
286-
model="gpt-4-1106-preview",
287+
# model="gpt-4-1106-preview",
288+
model="gpt-5-mini",
287289
messages=messages,
288290
temperature=0,
289291
max_tokens=768,

0 commit comments

Comments
 (0)