From a8fdedd82c25721022c14496b9b4311a6a48132b Mon Sep 17 00:00:00 2001 From: Leo Boisvert Date: Fri, 7 Feb 2025 17:31:28 +0000 Subject: [PATCH 1/3] add o1-mini + o3-mini configs --- add_study_to_repro_journal.py | 18 ++++++++++++++++++ src/agentlab/agents/generic_agent/__init__.py | 6 ++++-- .../agents/generic_agent/agent_configs.py | 9 +++++++++ src/agentlab/llm/llm_configs.py | 13 +++++++++++++ 4 files changed, 44 insertions(+), 2 deletions(-) create mode 100644 add_study_to_repro_journal.py diff --git a/add_study_to_repro_journal.py b/add_study_to_repro_journal.py new file mode 100644 index 00000000..4a6b9fe3 --- /dev/null +++ b/add_study_to_repro_journal.py @@ -0,0 +1,18 @@ +import os +from pathlib import Path +from agentlab.experiments.study import Study + + +base_dir = "/home/toolkit/ui_copilot_results" + +exp_paths = [ + "2025-01-31_22-08-34_genericagent-o3-mini-2025-01-31-on-workarena-l1", + # '2025-02-02_01-53-45_genericagent-openai-o1-mini-2024-09-12-on-workarena-l1', + "2025-02-02_01-55-04_genericagent-openai-o1-mini-2024-09-12-on-workarena-l1", +] +full_paths = [os.path.join(base_dir, exp_path) for exp_path in exp_paths] + +for full_path in full_paths: + study = Study.load(Path(full_path)) + + study.append_to_journal(strict_reproducibility=False) diff --git a/src/agentlab/agents/generic_agent/__init__.py b/src/agentlab/agents/generic_agent/__init__.py index f6cbc89c..b4712cfe 100644 --- a/src/agentlab/agents/generic_agent/__init__.py +++ b/src/agentlab/agents/generic_agent/__init__.py @@ -17,8 +17,8 @@ AGENT_4o_MINI, AGENT_CLAUDE_SONNET_35, AGENT_4o_VISION, - AGENT_4o_MINI_VISION, - AGENT_CLAUDE_SONNET_35_VISION, + AGENT_o3_MINI, + AGENT_o1_MINI, ) __all__ = [ @@ -26,6 +26,8 @@ "AGENT_4o", "AGENT_4o_MINI", "AGENT_4o_VISION", + "AGENT_o3_MINI", + "AGENT_o1_MINI", "AGENT_LLAMA3_70B", "AGENT_LLAMA31_70B", "AGENT_8B", diff --git a/src/agentlab/agents/generic_agent/agent_configs.py b/src/agentlab/agents/generic_agent/agent_configs.py index 02a4c7dc..e21ada58 100644 --- a/src/agentlab/agents/generic_agent/agent_configs.py +++ b/src/agentlab/agents/generic_agent/agent_configs.py @@ -265,6 +265,15 @@ flags=FLAGS_GPT_4o, ) +AGENT_o3_MINI = GenericAgentArgs( + chat_model_args=CHAT_MODEL_ARGS_DICT["openai/o3-mini-2025-01-31"], + flags=FLAGS_GPT_4o, +) + +AGENT_o1_MINI = GenericAgentArgs( + chat_model_args=CHAT_MODEL_ARGS_DICT["openrouter/openai/o1-mini-2024-09-12"], + flags=FLAGS_GPT_4o, +) # GPT-4o vision default config FLAGS_GPT_4o_VISION = FLAGS_GPT_4o.copy() FLAGS_GPT_4o_VISION.obs.use_screenshot = True diff --git a/src/agentlab/llm/llm_configs.py b/src/agentlab/llm/llm_configs.py index 725d8a0c..4210d45e 100644 --- a/src/agentlab/llm/llm_configs.py +++ b/src/agentlab/llm/llm_configs.py @@ -63,6 +63,19 @@ max_input_tokens=16_384, max_new_tokens=4096, ), + "openai/o3-mini-2025-01-31": OpenAIModelArgs( + model_name="o3-mini-2025-01-31", + max_total_tokens=200_000, + max_input_tokens=200_000, + max_new_tokens=100_000, + ), + "openai/o1-mini": OpenAIModelArgs( + model_name="openai/o1-mini", + max_total_tokens=128_000, + max_input_tokens=128_000, + max_new_tokens=64_000, + temperature=1e-1, + ), "azure/gpt-35-turbo/gpt-35-turbo": AzureModelArgs( model_name="gpt-35-turbo", deployment_name="gpt-35-turbo", From c7bc6d6b223bf36fee3fee5a47077ebfd389fff9 Mon Sep 17 00:00:00 2001 From: Leo Boisvert Date: Fri, 7 Feb 2025 17:32:57 +0000 Subject: [PATCH 2/3] Add o1-mini + o1-mini to repro journal --- reproducibility_journal.csv | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/reproducibility_journal.csv b/reproducibility_journal.csv index aa38edad..991cedae 100644 --- a/reproducibility_journal.csv +++ b/reproducibility_journal.csv @@ -64,4 +64,12 @@ ThibaultLSDC,GenericAgent-gpt-4o-mini_vision,visualwebarena,0.13.3,2024-12-02_02 ThibaultLSDC,GenericAgent-gpt-4o_vision,visualwebarena,0.13.3,2024-12-02_07-17-28,7fb7eac8-4bbd-4ebe-be32-15901a7678f2,0.267,0.015,65,910/910,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.1,df7bc706f3793f47a456d1bda0485b306b8cf612,,0.13.3,None, ThibaultLSDC,GenericAgent-anthropic_claude-3.5-sonnet:beta_vision,visualwebarena,0.13.3,2024-12-02_09-11-35,22f0611d-aeea-4ee9-a533-b45442b5e080,0.21,0.013,178,910/910,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.1,df7bc706f3793f47a456d1bda0485b306b8cf612,,0.13.3,None, ThibaultLSDC,GenericAgent-meta-llama_llama-3.1-70b-instruct,webarena,0.13.3,2024-12-02_23-18-38,fc5747bc-d998-4942-a0eb-e55a3ccc1cb3,0.184,0.014,213,811/812,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.1,df7bc706f3793f47a456d1bda0485b306b8cf612,,0.13.3,None, - +Leo Boisvert,GenericAgent-o3-mini-2025-01-31,workarena_l1,0.4.1,2025-01-31_22-08-33,a74cc00f-f743-43a1-9cab-59af8bffa3a2,0.482,0.028,3,330/330,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.3,1.44.0,v0.3.2,73baabee6d7ac37a5b8677c80baf83914a4f4dc4," M: src/agentlab/agents/generic_agent/__init__.py + M: src/agentlab/agents/generic_agent/agent_configs.py + M: src/agentlab/analyze/agent_xray.py + M: src/agentlab/llm/chat_api.py + M: src/agentlab/llm/llm_configs.py",0.13.3,1d2d7160e5b7ec9954ecb48988f71eb56288dd29," +Leo Boisvert,GenericAgent-openai_o1-mini-2024-09-12,workarena_l1,0.4.1,2025-02-02_01-55-04,f3e1fcb8-5fc5-4115-9e00-27251508e2c7,0.518,0.028,5,330/330,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.3,1.44.0,v0.3.2,73baabee6d7ac37a5b8677c80baf83914a4f4dc4," M: src/agentlab/agents/generic_agent/__init__.py + M: src/agentlab/agents/generic_agent/agent_configs.py + M: src/agentlab/analyze/agent_xray.py + M: src/agentlab/llm/llm_configs.py",0.13.3,1d2d7160e5b7ec9954ecb48988f71eb56288dd29," From d2a9edb3a67417f26427b01be073fee970a796cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20Boisvert?= Date: Wed, 12 Feb 2025 17:44:51 -0500 Subject: [PATCH 3/3] remove duplicate o3-mini --- src/agentlab/llm/llm_configs.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/agentlab/llm/llm_configs.py b/src/agentlab/llm/llm_configs.py index 4210d45e..2958f92b 100644 --- a/src/agentlab/llm/llm_configs.py +++ b/src/agentlab/llm/llm_configs.py @@ -63,12 +63,6 @@ max_input_tokens=16_384, max_new_tokens=4096, ), - "openai/o3-mini-2025-01-31": OpenAIModelArgs( - model_name="o3-mini-2025-01-31", - max_total_tokens=200_000, - max_input_tokens=200_000, - max_new_tokens=100_000, - ), "openai/o1-mini": OpenAIModelArgs( model_name="openai/o1-mini", max_total_tokens=128_000,