diff --git a/.github/workflows/tests_mcp.yml b/.github/workflows/tests_mcp.yml index 753bb7412eba5..7b1f49ef53622 100644 --- a/.github/workflows/tests_mcp.yml +++ b/.github/workflows/tests_mcp.yml @@ -47,3 +47,5 @@ jobs: flakiness-client-id: ${{ secrets.AZURE_FLAKINESS_DASHBOARD_CLIENT_ID }} flakiness-tenant-id: ${{ secrets.AZURE_FLAKINESS_DASHBOARD_TENANT_ID }} flakiness-subscription-id: ${{ secrets.AZURE_FLAKINESS_DASHBOARD_SUBSCRIPTION_ID }} + env: + DEBUG: "loop:*" diff --git a/packages/playwright/src/agents/agent.ts b/packages/playwright/src/agents/agent.ts index 6510ffb1342eb..ae198ac302a87 100644 --- a/packages/playwright/src/agents/agent.ts +++ b/packages/playwright/src/agents/agent.ts @@ -18,7 +18,6 @@ import { Loop } from '../mcp/sdk/bundle'; import type z from 'zod'; import type { Client } from '@modelcontextprotocol/sdk/client/index.js'; -import type { Tool } from '@modelcontextprotocol/sdk/types.js'; import type * as tinyLoop from 'tiny-loop'; type Logger = (category: string, text: string, details?: string) => void; @@ -33,14 +32,18 @@ export type AgentSpec = { examples: string[]; }; +type LoopOptions = ConstructorParameters[1] & { + loopName: 'copilot' | 'claude' | 'openai'; +}; + export class Agent> { readonly loop: tinyLoop.Loop; readonly spec: AgentSpec; readonly clients: Map; - readonly resultSchema: Tool['inputSchema']; + readonly resultSchema: tinyLoop.Schema; - constructor(loopName: 'copilot' | 'claude' | 'openai', spec: AgentSpec, clients: Map, resultSchema: Tool['inputSchema']) { - this.loop = new Loop(loopName); + constructor(loopOptions: LoopOptions, spec: AgentSpec, clients: Map, resultSchema: tinyLoop.Schema) { + this.loop = new Loop(loopOptions.loopName, loopOptions); this.spec = spec; this.clients = clients; this.resultSchema = resultSchema; @@ -52,9 +55,8 @@ export class Agent> { try { return await this.loop.run>(`${prompt}\n\nTask:\n${task}\n\nParams:\n${JSON.stringify(params, null, 2)}`, { ...options, - // TODO: fix types in tiny-loop - tools: tools as any, - callTool: callTool as any, + tools, + callTool, resultSchema: this.resultSchema }); } finally { @@ -65,7 +67,7 @@ export class Agent> { private async _initClients() { const clients: Record = {}; const agentToolNames = new Set(this.spec.tools); - const tools: Tool[] = []; + const tools: tinyLoop.Tool[] = []; for (const [name, client] of this.clients.entries()) { const list = await client.listTools(); @@ -73,7 +75,7 @@ export class Agent> { if (!agentToolNames.has(name + '/' + tool.name)) continue; agentToolNames.delete(name + '/' + tool.name); - tools.push({ ...tool, name: name + '__' + tool.name }); + tools.push({ ...tool as tinyLoop.Tool, name: name + '__' + tool.name }); } clients[name] = client; } diff --git a/packages/playwright/src/agents/performTask.ts b/packages/playwright/src/agents/performTask.ts index d425b5508e08b..df11ef63787a5 100644 --- a/packages/playwright/src/agents/performTask.ts +++ b/packages/playwright/src/agents/performTask.ts @@ -36,9 +36,8 @@ export async function performTask(context: playwright.BrowserContext, task: stri try { return await loop.run(task, { - // TODO: fix types in tiny-loop - tools: await backend.listTools() as any, - callTool: callTool as any, + tools: await backend.listTools(), + callTool, logger, }); } finally { diff --git a/tests/mcp/__cache__/copilot/mcp-generator-eval-generate-order-two-items-repeat0.json b/tests/mcp/__cache__/copilot/mcp-generator-eval-generate-order-two-items-repeat0.json new file mode 100644 index 0000000000000..91dd51eec4ded --- /dev/null +++ b/tests/mcp/__cache__/copilot/mcp-generator-eval-generate-order-two-items-repeat0.json @@ -0,0 +1,601 @@ +{ + "5f8a61ad99f22c5ad550e02d6fc1a9f902d4ee92": { + "result": { + "role": "assistant", + "content": "Setting up the test page for ordering two items test with the provided seed file and test plan", + "toolCalls": [ + { + "name": "playwright-test__generator_setup_page", + "arguments": { + "plan": "1. Fill out the form to order a blue table.\n2. Confirm the order.\n3. Verify the total price and estimated delivery for the table.\n4. Make a new order.\n5. Fill out the form to order a green desk.\n6. Confirm the order.\n7. Verify the total price and estimated delivery for the desk.\n\nTest data to use:\n- Name: John Doe\n- Address: 123 Main St, Anytown, XYZ state\n- Zip Code: 12345\n- Email: john@doe.me", + "seedFile": "tests/ordering/seed.spec.ts" + }, + "id": "tooluse_3OOTy9zxRJeaZ-0xLHVF4A" + } + ] + }, + "usage": { + "input": 4661, + "output": 246 + } + }, + "080c929daf31c6cdd1805325ae860674abab210e": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_select_option", + "arguments": { + "element": "Product Type dropdown", + "intent": "Step 1: Fill out the form to order a blue table - select Table as product type", + "ref": "e7", + "values": [ + "Table" + ] + }, + "id": "tooluse_8l8QZMsLRj6ZIdEcOIP5-Q" + } + ] + }, + "usage": { + "input": 5507, + "output": 133 + } + }, + "a893785d8148d3190e347d87e7f4d1551d42f2b1": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_select_option", + "arguments": { + "element": "Color dropdown", + "intent": "Step 1: Fill out the form to order a blue table - select Blue color", + "ref": "e10", + "values": [ + "Blue" + ] + }, + "id": "tooluse_ii-LuAu6QM651m9Gx6A_dw" + } + ] + }, + "usage": { + "input": 6259, + "output": 130 + } + }, + "48e11c3ee724b31813a9babc3705a5e0f0b511a9": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_type", + "arguments": { + "element": "Full Name textbox", + "intent": "Step 1: Fill out the form - enter Full Name", + "ref": "e13", + "text": "John Doe" + }, + "id": "tooluse_Kj_cCra6RTGJwbY2_H_E0w" + } + ] + }, + "usage": { + "input": 6542, + "output": 125 + } + }, + "430f79ed87c0d8729a5f09a958c54ecba73c172c": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_type", + "arguments": { + "element": "Email Address textbox", + "intent": "Step 1: Fill out the form - enter Email Address", + "ref": "e16", + "text": "john@doe.me" + }, + "id": "tooluse_HPIC_IVLSG2StAZ-IXyeVQ" + } + ] + }, + "usage": { + "input": 6717, + "output": 127 + } + }, + "6f64facfd5b0ebf7097e7051dff4123839f6e2da": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_type", + "arguments": { + "element": "Street Address textbox", + "intent": "Step 1: Fill out the form - enter Street Address", + "ref": "e19", + "text": "123 Main St" + }, + "id": "tooluse_dI-_YWw0TriJeHhl2YJncw" + } + ] + }, + "usage": { + "input": 6896, + "output": 126 + } + }, + "83631c8034e368d5fef4ace108f024593b4b010a": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_type", + "arguments": { + "element": "City textbox", + "intent": "Step 1: Fill out the form - enter City", + "ref": "e22", + "text": "Anytown" + }, + "id": "tooluse_k2bP_nwbSxSmTzXLGPcFwQ" + } + ] + }, + "usage": { + "input": 7073, + "output": 123 + } + }, + "47b927d7234c48b09f9c633b7d94873e99bc40eb": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_type", + "arguments": { + "element": "State/Province textbox", + "intent": "Step 1: Fill out the form - enter State/Province", + "ref": "e25", + "text": "XYZ state" + }, + "id": "tooluse__N9k6LU5SdSwsLCqP5_BcQ" + } + ] + }, + "usage": { + "input": 7245, + "output": 128 + } + }, + "22d39925429c57c63fc98af97d37d6b52f2ff20f": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_type", + "arguments": { + "element": "ZIP/Postal Code textbox", + "intent": "Step 1: Fill out the form - enter ZIP/Postal Code", + "ref": "e28", + "text": "12345" + }, + "id": "tooluse_r3V42qlPQV2g8z3z5kkgNg" + } + ] + }, + "usage": { + "input": 7425, + "output": 130 + } + }, + "74d5b916d0ba4e0eaaf55358daf5641405ab8e27": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_click", + "arguments": { + "element": "Order button", + "intent": "Step 2: Confirm the order - click Order button", + "ref": "e29" + }, + "id": "tooluse_XMnUow7nT567WXLeV-jzQQ" + } + ] + }, + "usage": { + "input": 7607, + "output": 104 + } + }, + "180b87a9c2c5b0859ae142108fbcb0330cc06f6e": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_handle_dialog", + "arguments": { + "accept": true, + "intent": "Step 2: Confirm the order - accept the confirmation dialog for blue table" + }, + "id": "tooluse_FLHf_KFUTlStNMajx3MITg" + } + ] + }, + "usage": { + "input": 7790, + "output": 91 + } + }, + "5892d1ae343f8dc2c972204852adb5fd6f09ceae": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_verify_text_visible", + "arguments": { + "intent": "Step 3: Verify the total price for the table order", + "text": "$353.98" + }, + "id": "tooluse_E-P8M3LPS0qW5tP_RJ7hTQ" + } + ] + }, + "usage": { + "input": 8690, + "output": 93 + } + }, + "f4d49f067d24abf0bad63bb043d9f4d755c59e40": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_verify_text_visible", + "arguments": { + "intent": "Step 3: Verify the estimated delivery for the table order", + "text": "Your order will arrive in approximately 3 days" + }, + "id": "tooluse_GtxADqLzQjuisep8llDCuA" + } + ] + }, + "usage": { + "input": 8831, + "output": 99 + } + }, + "d85838997ca0227d24d29f8cce5247b1047f2600": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_click", + "arguments": { + "element": "Place Another Order button", + "intent": "Step 4: Make a new order - click Place Another Order button", + "ref": "e58" + }, + "id": "tooluse_RtarjSH4TTaZf1hsMGxoOQ" + } + ] + }, + "usage": { + "input": 8984, + "output": 108 + } + }, + "5ab856d00699c2a8aaaa07c330849a26f8cd2ea0": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_select_option", + "arguments": { + "element": "Product Type dropdown", + "intent": "Step 5: Fill out the form to order a green desk - select Desk as product type", + "ref": "e7", + "values": [ + "Desk" + ] + }, + "id": "tooluse_-Fm07ABZSWi6RVkpqUjyOg" + } + ] + }, + "usage": { + "input": 9716, + "output": 135 + } + }, + "0a4dc1e4c3b1fc718c3825470306e9c613d9d999": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_select_option", + "arguments": { + "element": "Color dropdown", + "intent": "Step 5: Fill out the form to order a green desk - select Green color", + "ref": "e10", + "values": [ + "Green" + ] + }, + "id": "tooluse_wvvevgavQMeTwAbgVOO9IA" + } + ] + }, + "usage": { + "input": 10003, + "output": 130 + } + }, + "fd5f8cceb0687730401fdeeba277fc7d2bb83fb2": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_type", + "arguments": { + "element": "Full Name textbox", + "intent": "Step 5: Fill out the form - enter Full Name", + "ref": "e13", + "text": "John Doe" + }, + "id": "tooluse_-ZV0ZkVaQ-C5EJ42_VO5wQ" + } + ] + }, + "usage": { + "input": 10286, + "output": 125 + } + }, + "498ebc8cb96b60f020b5d869964f0d3c89271257": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_type", + "arguments": { + "element": "Email Address textbox", + "intent": "Step 5: Fill out the form - enter Email Address", + "ref": "e16", + "text": "john@doe.me" + }, + "id": "tooluse_DuXKwG6yQF6Qoxfyjdr6Yg" + } + ] + }, + "usage": { + "input": 10461, + "output": 127 + } + }, + "1fe8d8da48eb1649d4a79727204fa679b2a3ddcf": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_type", + "arguments": { + "element": "Street Address textbox", + "intent": "Step 5: Fill out the form - enter Street Address", + "ref": "e19", + "text": "123 Main St" + }, + "id": "tooluse_3LdCLvf6RSO7--Uqrt3t5g" + } + ] + }, + "usage": { + "input": 10640, + "output": 126 + } + }, + "5c28ad28d94e090c0a0d4adacda035388846974f": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_type", + "arguments": { + "element": "City textbox", + "intent": "Step 5: Fill out the form - enter City", + "ref": "e22", + "text": "Anytown" + }, + "id": "tooluse_sLF0hgVOTWimkvI9otspaQ" + } + ] + }, + "usage": { + "input": 10817, + "output": 123 + } + }, + "e96e633993023d92126a4f9eb01620787e875df2": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_type", + "arguments": { + "element": "State/Province textbox", + "intent": "Step 5: Fill out the form - enter State/Province", + "ref": "e25", + "text": "XYZ state" + }, + "id": "tooluse_dFyZ-ErtRHy78fwwsPl00A" + } + ] + }, + "usage": { + "input": 10989, + "output": 128 + } + }, + "8c738c785e2443be604f3ee9004afcf977e94134": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_type", + "arguments": { + "element": "ZIP/Postal Code textbox", + "intent": "Step 5: Fill out the form - enter ZIP/Postal Code", + "ref": "e28", + "text": "12345" + }, + "id": "tooluse_ES03TWOqRMC97H61q_1JXA" + } + ] + }, + "usage": { + "input": 11169, + "output": 130 + } + }, + "5b140016ec0c5b86488afd33cc9157da84e00177": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_click", + "arguments": { + "element": "Order button", + "intent": "Step 6: Confirm the order - click Order button", + "ref": "e29" + }, + "id": "tooluse_UGJXuPeIRHaNVkomBBsTOA" + } + ] + }, + "usage": { + "input": 11351, + "output": 104 + } + }, + "9655dca9eb0b306dd30a1d2f89f0aaa36e76d975": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_handle_dialog", + "arguments": { + "accept": true, + "intent": "Step 6: Confirm the order - accept the confirmation dialog for green desk" + }, + "id": "tooluse_7QOz5iOzRK6Xq-V7kggP5A" + } + ] + }, + "usage": { + "input": 11534, + "output": 91 + } + }, + "a58814f2d8fe45e6782283e020b2cdd1a7349464": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_verify_text_visible", + "arguments": { + "intent": "Step 7: Verify the total price for the desk order", + "text": "$299.98" + }, + "id": "tooluse_OldDL_0sR5iR_5BqbjzUgQ" + } + ] + }, + "usage": { + "input": 12435, + "output": 93 + } + }, + "5bafbfab44e29eebf60b9034e47df0a055e25292": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_verify_text_visible", + "arguments": { + "intent": "Step 7: Verify the estimated delivery for the desk order", + "text": "Your order will arrive in approximately 3 days" + }, + "id": "tooluse_KCSXDsM9TdGSe_BEjzo1Kw" + } + ] + }, + "usage": { + "input": 12576, + "output": 99 + } + }, + "53d0b0e25ff3f1e90cb997a1cbea249de3e868a3": { + "result": { + "role": "assistant", + "content": "Read the generated test log to retrieve the complete test code", + "toolCalls": [ + { + "name": "playwright-test__generator_read_log", + "arguments": {}, + "id": "tooluse_0E74RBGZR4qcfbrzz5phjA" + } + ] + }, + "usage": { + "input": 12729, + "output": 69 + } + }, + "f5a9917e6276d8ce13a3e1baf3cf578b9bf6bd16": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "report_result", + "arguments": { + "code": "import { test, expect } from '@playwright/test';\n\ntest.beforeEach(async ({ page }) => {\n await page.goto('http://localhost:/evals/fill-form.html');\n});\n\ntest('Order two items', async ({ page }) => {\n // Fill out the form to order a blue table\n await page.getByLabel('Product Type *').selectOption(['Table']);\n await page.getByLabel('Color *').selectOption(['Blue']);\n await page.getByRole('textbox', { name: 'Full Name *' }).fill('John Doe');\n await page.getByRole('textbox', { name: 'Email Address *' }).fill('john@doe.me');\n await page.getByRole('textbox', { name: 'Street Address *' }).fill('123 Main St');\n await page.getByRole('textbox', { name: 'City *' }).fill('Anytown');\n await page.getByRole('textbox', { name: 'State/Province *' }).fill('XYZ state');\n await page.getByRole('textbox', { name: 'ZIP/Postal Code *' }).fill('12345');\n\n // Confirm the order\n page.on('dialog', dialog => dialog.accept());\n await page.getByRole('button', { name: 'Order' }).click();\n\n // Verify the total price and estimated delivery for the table\n await expect(page.getByText('$353.98')).toBeVisible();\n await expect(page.getByText('Your order will arrive in approximately 3 days')).toBeVisible();\n\n // Make a new order\n await page.getByRole('button', { name: 'Place Another Order' }).click();\n\n // Fill out the form to order a green desk\n await page.getByLabel('Product Type *').selectOption(['Desk']);\n await page.getByLabel('Color *').selectOption(['Green']);\n await page.getByRole('textbox', { name: 'Full Name *' }).fill('John Doe');\n await page.getByRole('textbox', { name: 'Email Address *' }).fill('john@doe.me');\n await page.getByRole('textbox', { name: 'Street Address *' }).fill('123 Main St');\n await page.getByRole('textbox', { name: 'City *' }).fill('Anytown');\n await page.getByRole('textbox', { name: 'State/Province *' }).fill('XYZ state');\n await page.getByRole('textbox', { name: 'ZIP/Postal Code *' }).fill('12345');\n\n // Confirm the order\n await page.getByRole('button', { name: 'Order' }).click();\n\n // Verify the total price and estimated delivery for the desk\n await expect(page.getByText('$299.98')).toBeVisible();\n await expect(page.getByText('Your order will arrive in approximately 3 days')).toBeVisible();\n});" + }, + "id": "tooluse_mmzAZJTWQiCwKiiy8glUPw" + } + ] + }, + "usage": { + "input": 14168, + "output": 784 + } + } +} \ No newline at end of file diff --git a/tests/mcp/__cache__/copilot/mcp-generator-eval-generate-order-two-items-repeat1.json b/tests/mcp/__cache__/copilot/mcp-generator-eval-generate-order-two-items-repeat1.json new file mode 100644 index 0000000000000..432cf55f68293 --- /dev/null +++ b/tests/mcp/__cache__/copilot/mcp-generator-eval-generate-order-two-items-repeat1.json @@ -0,0 +1,612 @@ +{ + "5f8a61ad99f22c5ad550e02d6fc1a9f902d4ee92": { + "result": { + "role": "assistant", + "content": "Setting up the test page for ordering two items test with the complete test plan", + "toolCalls": [ + { + "name": "playwright-test__generator_setup_page", + "arguments": { + "plan": "1. Fill out the form to order a blue table with Name: John Doe, Address: 123 Main St, Anytown, XYZ state, Zip Code: 12345, Email: john@doe.me\n2. Confirm the order.\n3. Verify the total price and estimated delivery for the table.\n4. Make a new order.\n5. Fill out the form to order a green desk with Name: John Doe, Address: 123 Main St, Anytown, XYZ state, Zip Code: 12345, Email: john@doe.me\n6. Confirm the order.\n7. Verify the total price and estimated delivery for the desk.", + "seedFile": "tests/ordering/seed.spec.ts" + }, + "id": "tooluse_Mhk3FnUmTNWiKZVtaN49IA" + } + ] + }, + "usage": { + "input": 4661, + "output": 269 + } + }, + "787af956a5ee8a01a7dcf8a144921f3363933979": { + "result": { + "role": "assistant", + "content": "I can see the order form page. Now I'll start filling out the form for a blue table.", + "toolCalls": [] + }, + "usage": { + "input": 5530, + "output": 148 + } + }, + "4b36e7bfacefb1e023db9623ead3319d54e064de": { + "result": { + "role": "assistant", + "content": "Step 1: Select Table as the product type", + "toolCalls": [ + { + "name": "playwright-test__browser_select_option", + "arguments": { + "element": "Product Type dropdown", + "intent": "Fill out the form to order a blue table - select product type", + "ref": "e7", + "values": [ + "Table" + ] + }, + "id": "tooluse_FVaXBgOoTti4hjwSQd16Lw" + } + ] + }, + "usage": { + "input": 5575, + "output": 153 + } + }, + "af596db0bee348f90d2776bc763690fd7d652b87": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_select_option", + "arguments": { + "element": "Color dropdown", + "intent": "Fill out the form to order a blue table - select color", + "ref": "e10", + "values": [ + "Blue" + ] + }, + "id": "tooluse_338AmCqITBiAd6O-wM9vZQ" + } + ] + }, + "usage": { + "input": 6332, + "output": 125 + } + }, + "e2ff677b901933ea45f2281d71272a9d96804138": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_type", + "arguments": { + "element": "Full Name textbox", + "intent": "Fill out the form to order a blue table - enter name", + "ref": "e13", + "text": "John Doe" + }, + "id": "tooluse_NRWEH8f-Q_ymv-XJz_cwXQ" + } + ] + }, + "usage": { + "input": 6610, + "output": 125 + } + }, + "ec6013b80d6dabe99066a762d35c263879e3f996": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_type", + "arguments": { + "element": "Email Address textbox", + "intent": "Fill out the form to order a blue table - enter email", + "ref": "e16", + "text": "john@doe.me" + }, + "id": "tooluse_DNanL94eTQOfmUOIZXee0Q" + } + ] + }, + "usage": { + "input": 6785, + "output": 127 + } + }, + "a1c0d60aad09e6800e7b459113b76ecfd09c0bc1": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_type", + "arguments": { + "element": "Street Address textbox", + "intent": "Fill out the form to order a blue table - enter street address", + "ref": "e19", + "text": "123 Main St" + }, + "id": "tooluse_mu-gj3z9SlSHgC9LtO9Upg" + } + ] + }, + "usage": { + "input": 6964, + "output": 127 + } + }, + "04292df5c9fd0f93607babc12490e2eff49acc95": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_type", + "arguments": { + "element": "City textbox", + "intent": "Fill out the form to order a blue table - enter city", + "ref": "e22", + "text": "Anytown" + }, + "id": "tooluse_Ix0Y1JcqS4WzH9_J_2H_yw" + } + ] + }, + "usage": { + "input": 7142, + "output": 124 + } + }, + "52601ef741070dd02fca9493f3ebc18e26841063": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_type", + "arguments": { + "element": "State/Province textbox", + "intent": "Fill out the form to order a blue table - enter state", + "ref": "e25", + "text": "XYZ state" + }, + "id": "tooluse_QWNUWXQ8R-2KjTOI9B_31Q" + } + ] + }, + "usage": { + "input": 7315, + "output": 127 + } + }, + "df9ae1389b168c1242589c7de89135b09e8f37b9": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_type", + "arguments": { + "element": "ZIP/Postal Code textbox", + "intent": "Fill out the form to order a blue table - enter zip code", + "ref": "e28", + "text": "12345" + }, + "id": "tooluse_L9O7NXZwTjG4KSb1_lCMuQ" + } + ] + }, + "usage": { + "input": 7494, + "output": 128 + } + }, + "59f45779758e9aed63851108d51b89a347d14d7e": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_click", + "arguments": { + "element": "Order button", + "intent": "Confirm the order for blue table - click Order button", + "ref": "e29" + }, + "id": "tooluse_WhmSzO-9QIaTmNyWe6Bw2w" + } + ] + }, + "usage": { + "input": 7674, + "output": 103 + } + }, + "e39eb76eb534d3b6aec6b7eb12efc755f3289243": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_handle_dialog", + "arguments": { + "accept": true, + "intent": "Confirm the blue table order by accepting the dialog" + }, + "id": "tooluse_YGjKDq7_SaeLqbk_DxLfwg" + } + ] + }, + "usage": { + "input": 7856, + "output": 85 + } + }, + "d6d47ea26a6bc4de6c050b6bb932976fb747c4c5": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_verify_text_visible", + "arguments": { + "intent": "Verify the total price for the table", + "text": "$353.98" + }, + "id": "tooluse_zT9h1iodSg634iM2XbdIuw" + } + ] + }, + "usage": { + "input": 8750, + "output": 88 + } + }, + "c3dec4cc3d043ae53d5a25abe7e520988bc38da6": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_verify_text_visible", + "arguments": { + "intent": "Verify the estimated delivery for the table", + "text": "Your order will arrive in approximately 3 days" + }, + "id": "tooluse_E-8xd_UJRQKN3fsWHkvxRw" + } + ] + }, + "usage": { + "input": 8886, + "output": 94 + } + }, + "02f40c5e8ed9677e0fba1b139217c8c7e3313b45": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_click", + "arguments": { + "element": "Place Another Order button", + "intent": "Make a new order - click Place Another Order button", + "ref": "e58" + }, + "id": "tooluse_2WgSquy4Slev6HoRZJve5A" + } + ] + }, + "usage": { + "input": 9034, + "output": 104 + } + }, + "5240e09aec5f1b1a8accb734c6288786df9067d7": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_select_option", + "arguments": { + "element": "Product Type dropdown", + "intent": "Fill out the form to order a green desk - select product type", + "ref": "e7", + "values": [ + "Desk" + ] + }, + "id": "tooluse_2j_DQ9LXS_yGbQuNUhDO4w" + } + ] + }, + "usage": { + "input": 9762, + "output": 128 + } + }, + "90b1e82619f3c5d8be64e3b2d34a4a07f39f30f6": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_select_option", + "arguments": { + "element": "Color dropdown", + "intent": "Fill out the form to order a green desk - select color", + "ref": "e10", + "values": [ + "Green" + ] + }, + "id": "tooluse_w-T-rnQhRDuHhDucfsKSOw" + } + ] + }, + "usage": { + "input": 10042, + "output": 125 + } + }, + "223193527013cb96d74fb33d6db5605158a28d2c": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_type", + "arguments": { + "element": "Full Name textbox", + "intent": "Fill out the form to order a green desk - enter name", + "ref": "e13", + "text": "John Doe" + }, + "id": "tooluse_eL78vNU9TvqclY1GxxM4Pw" + } + ] + }, + "usage": { + "input": 10320, + "output": 125 + } + }, + "59c0719ddaa6eaf63e1db9125f21272da3e93218": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_type", + "arguments": { + "element": "Email Address textbox", + "intent": "Fill out the form to order a green desk - enter email", + "ref": "e16", + "text": "john@doe.me" + }, + "id": "tooluse_e-mqqRLBShiKZWjNWdII_w" + } + ] + }, + "usage": { + "input": 10495, + "output": 127 + } + }, + "4d1ba88520a80d1b47ce5d80f2c9ec22b5a1e986": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_type", + "arguments": { + "element": "Street Address textbox", + "intent": "Fill out the form to order a green desk - enter street address", + "ref": "e19", + "text": "123 Main St" + }, + "id": "tooluse_LKB1eGJ1QhSYT-5Vv2hQwg" + } + ] + }, + "usage": { + "input": 10674, + "output": 127 + } + }, + "8f3b4d0cc8bd7fb6f5b11610fcd8f1c4b3d40f78": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_type", + "arguments": { + "element": "City textbox", + "intent": "Fill out the form to order a green desk - enter city", + "ref": "e22", + "text": "Anytown" + }, + "id": "tooluse_csBdZ4bHR9q-L-X8U3wDqA" + } + ] + }, + "usage": { + "input": 10852, + "output": 124 + } + }, + "8b7c714ec3ec2d90c256a28738be7049615a0401": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_type", + "arguments": { + "element": "State/Province textbox", + "intent": "Fill out the form to order a green desk - enter state", + "ref": "e25", + "text": "XYZ state" + }, + "id": "tooluse_xOyn4-niRtGUBdqjLdNjgg" + } + ] + }, + "usage": { + "input": 11025, + "output": 127 + } + }, + "c609fdd9046f4aa52f34a8bb343a4d9d4fb12223": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_type", + "arguments": { + "element": "ZIP/Postal Code textbox", + "intent": "Fill out the form to order a green desk - enter zip code", + "ref": "e28", + "text": "12345" + }, + "id": "tooluse_j6uoi3D-RHGMIWXvi62qJA" + } + ] + }, + "usage": { + "input": 11204, + "output": 128 + } + }, + "65dbc7169e9118e00a86c567bde3e65f385ebe9e": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_click", + "arguments": { + "element": "Order button", + "intent": "Confirm the order for green desk - click Order button", + "ref": "e29" + }, + "id": "tooluse_aRrjEDcXQdKF7--Kxhvl1w" + } + ] + }, + "usage": { + "input": 11384, + "output": 103 + } + }, + "b86dc388e786efa6859ec1f4a1735b63bcdf1607": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_handle_dialog", + "arguments": { + "accept": true, + "intent": "Confirm the green desk order by accepting the dialog" + }, + "id": "tooluse_5sxgCIfUSmehhlqlCAV2Fg" + } + ] + }, + "usage": { + "input": 11566, + "output": 85 + } + }, + "9ce2152bca0948200b8bd4dc058d5b773bb68ea8": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_verify_text_visible", + "arguments": { + "intent": "Verify the total price for the desk", + "text": "$299.98" + }, + "id": "tooluse_Cvp2c0-xRaWsXNmT6acyoQ" + } + ] + }, + "usage": { + "input": 12461, + "output": 88 + } + }, + "15aed2ad641ad0d86b399f34d57949cbb6de0ab7": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_verify_text_visible", + "arguments": { + "intent": "Verify the estimated delivery for the desk", + "text": "Your order will arrive in approximately 3 days" + }, + "id": "tooluse_mLFTB25XR-myZD56XfIb9g" + } + ] + }, + "usage": { + "input": 12597, + "output": 94 + } + }, + "27a7e4b49cd13cf7d6212b5c64063928a1efeb2f": { + "result": { + "role": "assistant", + "content": "Get the generated test code", + "toolCalls": [ + { + "name": "playwright-test__generator_read_log", + "arguments": {}, + "id": "tooluse_viEWnJnMThOE45w9htY5Hg" + } + ] + }, + "usage": { + "input": 12745, + "output": 63 + } + }, + "7f8a91446277bb086095bbd02fb59191648aeaf2": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "report_result", + "arguments": { + "code": "import { test, expect } from '@playwright/test';\n\ntest.beforeEach(async ({ page }) => {\n await page.goto('http://localhost:/evals/fill-form.html');\n});\n\ntest('Order two items', async ({ page }) => {\n // Fill out the form to order a blue table\n await page.getByLabel('Product Type *').selectOption(['Table']);\n await page.getByLabel('Color *').selectOption(['Blue']);\n await page.getByRole('textbox', { name: 'Full Name *' }).fill('John Doe');\n await page.getByRole('textbox', { name: 'Email Address *' }).fill('john@doe.me');\n await page.getByRole('textbox', { name: 'Street Address *' }).fill('123 Main St');\n await page.getByRole('textbox', { name: 'City *' }).fill('Anytown');\n await page.getByRole('textbox', { name: 'State/Province *' }).fill('XYZ state');\n await page.getByRole('textbox', { name: 'ZIP/Postal Code *' }).fill('12345');\n\n // Confirm the order\n page.once('dialog', dialog => dialog.accept());\n await page.getByRole('button', { name: 'Order' }).click();\n\n // Verify the total price and estimated delivery for the table\n await expect(page.getByText('$353.98')).toBeVisible();\n await expect(page.getByText('Your order will arrive in approximately 3 days')).toBeVisible();\n\n // Make a new order\n await page.getByRole('button', { name: 'Place Another Order' }).click();\n\n // Fill out the form to order a green desk\n await page.getByLabel('Product Type *').selectOption(['Desk']);\n await page.getByLabel('Color *').selectOption(['Green']);\n await page.getByRole('textbox', { name: 'Full Name *' }).fill('John Doe');\n await page.getByRole('textbox', { name: 'Email Address *' }).fill('john@doe.me');\n await page.getByRole('textbox', { name: 'Street Address *' }).fill('123 Main St');\n await page.getByRole('textbox', { name: 'City *' }).fill('Anytown');\n await page.getByRole('textbox', { name: 'State/Province *' }).fill('XYZ state');\n await page.getByRole('textbox', { name: 'ZIP/Postal Code *' }).fill('12345');\n\n // Confirm the order\n page.once('dialog', dialog => dialog.accept());\n await page.getByRole('button', { name: 'Order' }).click();\n\n // Verify the total price and estimated delivery for the desk\n await expect(page.getByText('$299.98')).toBeVisible();\n await expect(page.getByText('Your order will arrive in approximately 3 days')).toBeVisible();\n});" + }, + "id": "tooluse_v38_uTIwR7GgZNdiUpK9iw" + } + ] + }, + "usage": { + "input": 14154, + "output": 798 + } + } +} \ No newline at end of file diff --git a/tests/mcp/__cache__/copilot/mcp-generator-eval-generate-order-two-items-repeat2.json b/tests/mcp/__cache__/copilot/mcp-generator-eval-generate-order-two-items-repeat2.json new file mode 100644 index 0000000000000..d250b3e03699e --- /dev/null +++ b/tests/mcp/__cache__/copilot/mcp-generator-eval-generate-order-two-items-repeat2.json @@ -0,0 +1,612 @@ +{ + "5f8a61ad99f22c5ad550e02d6fc1a9f902d4ee92": { + "result": { + "role": "assistant", + "content": "Setting up the page for testing the ordering flow with two items. Will use the provided seed file to initialize the page state.", + "toolCalls": [ + { + "name": "playwright-test__generator_setup_page", + "arguments": { + "plan": "1. Fill out the form to order a blue table with details: Name: John Doe, Address: 123 Main St, Anytown, XYZ state, Zip Code: 12345, Email: john@doe.me\n2. Confirm the order.\n3. Verify the total price and estimated delivery for the table.\n4. Make a new order.\n5. Fill out the form to order a green desk with details: Name: John Doe, Address: 123 Main St, Anytown, XYZ state, Zip Code: 12345, Email: john@doe.me\n6. Confirm the order.\n7. Verify the total price and estimated delivery for the desk.", + "seedFile": "tests/ordering/seed.spec.ts" + }, + "id": "tooluse_lXzw9uonQi2XCpHdmUI1tg" + } + ] + }, + "usage": { + "input": 4661, + "output": 282 + } + }, + "06291629cdab5a1c0948968247621f4d41622ff2": { + "result": { + "role": "assistant", + "content": "I can see the order form is ready. Now I'll start filling it out for the first order - a blue table.", + "toolCalls": [] + }, + "usage": { + "input": 5544, + "output": 153 + } + }, + "b8b163fd58ec73419eaa40e6ee713f5aa2dbdf9e": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_select_option", + "arguments": { + "element": "Product Type dropdown", + "intent": "Select 'Table' as the product type for the first order", + "ref": "e7", + "values": [ + "Table" + ] + }, + "id": "tooluse_ehsbAoHVRxiizOV5oLeZMg" + } + ] + }, + "usage": { + "input": 5593, + "output": 127 + } + }, + "b6659d24635ccf1150e8757a7597cee68ea5d09e": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_select_option", + "arguments": { + "element": "Color dropdown", + "intent": "Select 'Blue' as the color for the first order", + "ref": "e10", + "values": [ + "Blue" + ] + }, + "id": "tooluse_6JydnTb3T9ipDArn-2LinA" + } + ] + }, + "usage": { + "input": 6340, + "output": 125 + } + }, + "8e5e695053381852a30c4896480a1b6761244fd5": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_type", + "arguments": { + "element": "Full Name textbox", + "intent": "Enter 'John Doe' in the Full Name field", + "ref": "e13", + "text": "John Doe" + }, + "id": "tooluse_aGhwRuJMReeipGoS-zEFaA" + } + ] + }, + "usage": { + "input": 6618, + "output": 125 + } + }, + "4faf924883747b52e7098abc42b878c620a5d365": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_type", + "arguments": { + "element": "Email Address textbox", + "intent": "Enter 'john@doe.me' in the Email Address field", + "ref": "e16", + "text": "john@doe.me" + }, + "id": "tooluse_yv-ErNhoT-OxXIViaHfM3g" + } + ] + }, + "usage": { + "input": 6793, + "output": 129 + } + }, + "53edb1ca8fb74a58c5526debc4a02adc6f542ac1": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_type", + "arguments": { + "element": "Street Address textbox", + "intent": "Enter '123 Main St' in the Street Address field", + "ref": "e19", + "text": "123 Main St" + }, + "id": "tooluse__he9ukwPTFum6ZzNq24xhg" + } + ] + }, + "usage": { + "input": 6974, + "output": 126 + } + }, + "aeba4509ece7abb488c4b70a7682c4d7562787e8": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_type", + "arguments": { + "element": "City textbox", + "intent": "Enter 'Anytown' in the City field", + "ref": "e22", + "text": "Anytown" + }, + "id": "tooluse_9rE9hErSTtehCKIjgH5Qfg" + } + ] + }, + "usage": { + "input": 7151, + "output": 123 + } + }, + "8fd0a771e8a33ccf6b5e468b7def995c4f1e44d4": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_type", + "arguments": { + "element": "State/Province textbox", + "intent": "Enter 'XYZ state' in the State/Province field", + "ref": "e25", + "text": "XYZ state" + }, + "id": "tooluse_B8J2vD3lQx62iFfzVY1nIA" + } + ] + }, + "usage": { + "input": 7323, + "output": 129 + } + }, + "62ed0f704067b8d7d7800dee9c8fafafc9518277": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_type", + "arguments": { + "element": "ZIP/Postal Code textbox", + "intent": "Enter '12345' in the ZIP/Postal Code field", + "ref": "e28", + "text": "12345" + }, + "id": "tooluse_K4ZzI6kTQJWFlBQhfHkWYw" + } + ] + }, + "usage": { + "input": 7504, + "output": 128 + } + }, + "a1bfbfcd8121dd43e10c8c73bba72538330f1428": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_click", + "arguments": { + "element": "Order button", + "intent": "Click the Order button to confirm the first order (blue table)", + "ref": "e29" + }, + "id": "tooluse_Ri5WMHtqRNqfCTP-VWgi4w" + } + ] + }, + "usage": { + "input": 7684, + "output": 104 + } + }, + "992eefb4b25b1d755cc66571fbd584fc6c33db59": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_handle_dialog", + "arguments": { + "accept": true, + "intent": "Accept the confirmation dialog for the blue table order" + }, + "id": "tooluse_P_tPGR-QS5WSCfzlvZK8XQ" + } + ] + }, + "usage": { + "input": 7867, + "output": 84 + } + }, + "bc2d685a0dc753cc070cffa8a760d8016e8bf31e": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_verify_text_visible", + "arguments": { + "intent": "Verify the total price for the table is $353.98", + "text": "$353.98" + }, + "id": "tooluse_NvN6LcNURjWS5OOEhUDd0Q" + } + ] + }, + "usage": { + "input": 8760, + "output": 93 + } + }, + "93ca57fe0e5026a4341d4e98bd9dd2b9aa469b3d": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_verify_text_visible", + "arguments": { + "intent": "Verify the estimated delivery message for the table is visible", + "text": "Your order will arrive in approximately 3 days" + }, + "id": "tooluse_CXfeEDLjRaWH7ktKv2zYgQ" + } + ] + }, + "usage": { + "input": 8901, + "output": 97 + } + }, + "550f146d40463ab605fa4b4442b585edf88f2552": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_click", + "arguments": { + "element": "Place Another Order button", + "intent": "Click 'Place Another Order' button to start the second order", + "ref": "e58" + }, + "id": "tooluse_r7SSTwqsSSmPgYdVzOPtiw" + } + ] + }, + "usage": { + "input": 9052, + "output": 107 + } + }, + "ee10577a3b951801dd5a9ab88cb126d90cd58c07": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_select_option", + "arguments": { + "element": "Product Type dropdown", + "intent": "Select 'Desk' as the product type for the second order", + "ref": "e7", + "values": [ + "Desk" + ] + }, + "id": "tooluse_mzReMxIjTWqGHFf0H4tYeQ" + } + ] + }, + "usage": { + "input": 9783, + "output": 129 + } + }, + "74ce961d09698c8c2fbe84bbd22a1582d42c9a5d": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_select_option", + "arguments": { + "element": "Color dropdown", + "intent": "Select 'Green' as the color for the second order", + "ref": "e10", + "values": [ + "Green" + ] + }, + "id": "tooluse_FzVIg3vHQsiEoWQ3TbRCmA" + } + ] + }, + "usage": { + "input": 10064, + "output": 125 + } + }, + "30ea11494826f30190d6c52b95837366ea5631d4": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_type", + "arguments": { + "element": "Full Name textbox", + "intent": "Enter 'John Doe' in the Full Name field for the second order", + "ref": "e13", + "text": "John Doe" + }, + "id": "tooluse_YNxCSHpBR3eIZAZH_0yIxg" + } + ] + }, + "usage": { + "input": 10342, + "output": 129 + } + }, + "ded10f32c4904e9f6eac403fb49b217b458d45d6": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_type", + "arguments": { + "element": "Email Address textbox", + "intent": "Enter 'john@doe.me' in the Email Address field for the second order", + "ref": "e16", + "text": "john@doe.me" + }, + "id": "tooluse_c6SAg692QSuSeOzQlsqrCg" + } + ] + }, + "usage": { + "input": 10521, + "output": 133 + } + }, + "9a46381fab9620c44807d9a4e4a8c59e640519cb": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_type", + "arguments": { + "element": "Street Address textbox", + "intent": "Enter '123 Main St' in the Street Address field for the second order", + "ref": "e19", + "text": "123 Main St" + }, + "id": "tooluse_4oKwMN6BTZCCsh2MDk7Shw" + } + ] + }, + "usage": { + "input": 10706, + "output": 130 + } + }, + "e99009e430a287d44f5484bfc743a70c45837425": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_type", + "arguments": { + "element": "City textbox", + "intent": "Enter 'Anytown' in the City field for the second order", + "ref": "e22", + "text": "Anytown" + }, + "id": "tooluse_S9nmW_2nS-u7L_8B8y7qSw" + } + ] + }, + "usage": { + "input": 10887, + "output": 127 + } + }, + "491dacba9f324ae05147bbcca762ad763962c6f8": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_type", + "arguments": { + "element": "State/Province textbox", + "intent": "Enter 'XYZ state' in the State/Province field for the second order", + "ref": "e25", + "text": "XYZ state" + }, + "id": "tooluse_Mnvd8Xb-SQ66UL-rh9ktoQ" + } + ] + }, + "usage": { + "input": 11063, + "output": 133 + } + }, + "74734aa824cf198062b920b1828221874daaf3c9": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_type", + "arguments": { + "element": "ZIP/Postal Code textbox", + "intent": "Enter '12345' in the ZIP/Postal Code field for the second order", + "ref": "e28", + "text": "12345" + }, + "id": "tooluse_8B-AculgTxy25Cqugbe8gw" + } + ] + }, + "usage": { + "input": 11248, + "output": 132 + } + }, + "aac40b8df7734cc744bb6166856e82c6bd83c739": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_click", + "arguments": { + "element": "Order button", + "intent": "Click the Order button to confirm the second order (green desk)", + "ref": "e29" + }, + "id": "tooluse_EWnFJAOcTlq6ntbBK2O8ug" + } + ] + }, + "usage": { + "input": 11432, + "output": 104 + } + }, + "b3d74e46d79e2a0dd2e8da0821901d0a45e25ffe": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_handle_dialog", + "arguments": { + "accept": true, + "intent": "Accept the confirmation dialog for the green desk order" + }, + "id": "tooluse_pY_cduMcSUuuOvLdFbsOPA" + } + ] + }, + "usage": { + "input": 11615, + "output": 84 + } + }, + "7ff5380ae38cba75a5fee2e3079fd9d64d27a6a9": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_verify_text_visible", + "arguments": { + "intent": "Verify the total price for the desk is $299.98", + "text": "$299.98" + }, + "id": "tooluse_yle_dIQBThex9ILLkL1RGg" + } + ] + }, + "usage": { + "input": 12509, + "output": 93 + } + }, + "e129b1021ab91e770d3a18a1de7fc001546b16af": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "playwright-test__browser_verify_text_visible", + "arguments": { + "intent": "Verify the estimated delivery message for the desk is visible", + "text": "Your order will arrive in approximately 3 days" + }, + "id": "tooluse_eBS2jkw6TKqDIgfB6u2ykA" + } + ] + }, + "usage": { + "input": 12650, + "output": 97 + } + }, + "567e0a8ed42daa54fa5165daea65ead41daeb523": { + "result": { + "role": "assistant", + "content": "Read the generated test code to report the final result", + "toolCalls": [ + { + "name": "playwright-test__generator_read_log", + "arguments": {}, + "id": "tooluse_28ioEmNTQruEE_2EAuMpXA" + } + ] + }, + "usage": { + "input": 12801, + "output": 68 + } + }, + "a9503c71bc36b209f0a5af6603c1b622c29224e8": { + "result": { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "name": "report_result", + "arguments": { + "code": "import { test, expect } from '@playwright/test';\n\ntest.beforeEach(async ({ page }) => {\n await page.goto('http://localhost:/evals/fill-form.html');\n});\n\ntest('Order two items', async ({ page }) => {\n // Fill out the form to order a blue table\n await page.getByLabel('Product Type *').selectOption(['Table']);\n await page.getByLabel('Color *').selectOption(['Blue']);\n await page.getByRole('textbox', { name: 'Full Name *' }).fill('John Doe');\n await page.getByRole('textbox', { name: 'Email Address *' }).fill('john@doe.me');\n await page.getByRole('textbox', { name: 'Street Address *' }).fill('123 Main St');\n await page.getByRole('textbox', { name: 'City *' }).fill('Anytown');\n await page.getByRole('textbox', { name: 'State/Province *' }).fill('XYZ state');\n await page.getByRole('textbox', { name: 'ZIP/Postal Code *' }).fill('12345');\n \n // Confirm the order\n page.once('dialog', dialog => dialog.accept());\n await page.getByRole('button', { name: 'Order' }).click();\n \n // Verify the total price and estimated delivery for the table\n await expect(page.getByText('$353.98')).toBeVisible();\n await expect(page.getByText('Your order will arrive in approximately 3 days')).toBeVisible();\n \n // Make a new order\n await page.getByRole('button', { name: 'Place Another Order' }).click();\n \n // Fill out the form to order a green desk\n await page.getByLabel('Product Type *').selectOption(['Desk']);\n await page.getByLabel('Color *').selectOption(['Green']);\n await page.getByRole('textbox', { name: 'Full Name *' }).fill('John Doe');\n await page.getByRole('textbox', { name: 'Email Address *' }).fill('john@doe.me');\n await page.getByRole('textbox', { name: 'Street Address *' }).fill('123 Main St');\n await page.getByRole('textbox', { name: 'City *' }).fill('Anytown');\n await page.getByRole('textbox', { name: 'State/Province *' }).fill('XYZ state');\n await page.getByRole('textbox', { name: 'ZIP/Postal Code *' }).fill('12345');\n \n // Confirm the order\n page.once('dialog', dialog => dialog.accept());\n await page.getByRole('button', { name: 'Order' }).click();\n \n // Verify the total price and estimated delivery for the desk\n await expect(page.getByText('$299.98')).toBeVisible();\n await expect(page.getByText('Your order will arrive in approximately 3 days')).toBeVisible();\n});" + }, + "id": "tooluse_X1AaaxiJS46kLagzvLM2sA" + } + ] + }, + "usage": { + "input": 14271, + "output": 810 + } + } +} \ No newline at end of file diff --git a/tests/mcp/fixtures.ts b/tests/mcp/fixtures.ts index ba92eaa84d42c..97f32595298e8 100644 --- a/tests/mcp/fixtures.ts +++ b/tests/mcp/fixtures.ts @@ -27,12 +27,18 @@ import { ListRootsRequestSchema } from '@modelcontextprotocol/sdk/types.js'; import { TestServer } from '../config/testserver'; import { serverFixtures } from '../config/serverFixtures'; import { parseResponse } from '../../packages/playwright/lib/mcp/browser/response'; +import { Agent } from '../../packages/playwright/lib/agents/agent'; +import { wrapInClient } from '../../packages/playwright/lib/mcp/sdk/server'; +import { parseAgentSpec } from '../../packages/playwright/lib/agents/agentParser'; import type { Config } from '../../packages/playwright/src/mcp/config'; import type { BrowserContext } from 'playwright'; import type { Transport } from '@modelcontextprotocol/sdk/shared/transport.js'; import type { Stream } from 'stream'; import type { ServerFixtures, ServerWorkerOptions } from '../config/serverFixtures'; +import type { Agent as AgentType } from '../../packages/playwright/src/agents/agent'; +import type * as tinyLoop from 'tiny-loop'; +import type * as mcp from '../../packages/playwright/src/mcp/sdk/exports'; export { parseResponse }; @@ -58,6 +64,7 @@ export type StartClient = (options?: { env?: NodeJS.ProcessEnv, }) => Promise<{ client: Client, stderr: () => string }>; +type AgentFactory = (backend: mcp.ServerBackend, fileName: string, reportToolName: string, resultSchema: tinyLoop.Schema) => Promise>; type TestFixtures = { client: Client; @@ -67,7 +74,8 @@ type TestFixtures = { server: TestServer; httpsServer: TestServer; mcpHeadless: boolean; - loop: Loop; + loop: tinyLoop.Loop; + createAgent: AgentFactory; }; type WorkerFixtures = { @@ -190,27 +198,62 @@ export const test = serverTest.extend { const provider = 'copilot'; - const cacheFile = path.join(__dirname, '__cache__', provider, sanitizeFileName(test.info().titlePath.join(' ') + '-repeat' + test.info().repeatEachIndex) + '.json'); - const dataBefore = await fs.promises.readFile(cacheFile, 'utf-8').catch(() => '{}'); - let cache = {}; - try { - cache = JSON.parse(dataBefore); - } catch { - } - const caches = { before: cache, after: {} }; + const caches = await loadLoopCaches(provider); await use(new Loop(provider, { caches, secrets: { PORT: String(server.PORT) }, logger: (category, ...args) => debug(category)(...args), })); - const dataAfter = JSON.stringify(caches.after, null, 2); - if (test.info().status === 'passed' && dataBefore !== dataAfter) { - await fs.promises.mkdir(path.dirname(cacheFile), { recursive: true }); - await fs.promises.writeFile(cacheFile, dataAfter); - } + await saveLoopCaches(caches); + }, + + createAgent: async ({ server }, use) => { + const provider = 'copilot'; + let client: Client | undefined; + let caches: any; + await use(async (backend: mcp.ServerBackend, fileName: string, reportToolName: string, resultSchema: tinyLoop.Schema) => { + client = await wrapInClient(backend, { name: 'client', version: '1.0.0' }); + const spec = await parseAgentSpec(path.join(__dirname, '../../packages/playwright/src/agents', fileName)); + spec.tools = spec.tools.filter(tool => tool.startsWith('playwright-test/') && tool !== 'playwright-test/' + reportToolName); + spec.instructions = spec.instructions.replace('`' + reportToolName + '`', '`report_result`'); + caches = await loadLoopCaches(provider); + const loopOptions: ConstructorParameters[1] = { + caches, + secrets: { PORT: String(server.PORT) }, + logger: (category, ...args) => debug(category)(...args), + }; + const agent = new Agent({ ...loopOptions, loopName: provider }, spec, new Map([['playwright-test', client]]), resultSchema); + return agent as unknown as AgentType; + }); + if (caches) + await saveLoopCaches(caches); + await client?.close(); }, }); +const kEmptyCacheData = '{}'; + +async function loadLoopCaches(provider: string) { + const cacheFile = path.join(__dirname, '__cache__', provider, sanitizeFileName(test.info().titlePath.join(' ') + '-repeat' + test.info().repeatEachIndex) + '.json'); + const dataBefore = await fs.promises.readFile(cacheFile, 'utf-8').catch(() => kEmptyCacheData); + let cache = {}; + try { + cache = JSON.parse(dataBefore); + } catch { + } + return { before: cache, after: {}, dataBefore, cacheFile }; +} + +async function saveLoopCaches(caches: { before: any, after: any, dataBefore: string, cacheFile: string }) { + const dataAfter = JSON.stringify(caches.after, null, 2); + if (caches.dataBefore === dataAfter) + return; + if (test.info().status !== 'passed' && caches.dataBefore !== kEmptyCacheData) + return; + await fs.promises.mkdir(path.dirname(caches.cacheFile), { recursive: true }); + await fs.promises.writeFile(caches.cacheFile, dataAfter); +} + async function createTransport(mcpServerType: TestOptions['mcpServerType'], options: { args: string[], env: NodeJS.ProcessEnv, cwd: string }): Promise<{ transport: Transport, stderr: Stream | null, diff --git a/tests/mcp/generator.eval.ts b/tests/mcp/generator.eval.ts new file mode 100644 index 0000000000000..cf2222fcfdb46 --- /dev/null +++ b/tests/mcp/generator.eval.ts @@ -0,0 +1,117 @@ +/** + * Copyright (c) Microsoft Corporation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import z from 'zod'; +import { zodToJsonSchema } from 'zod-to-json-schema'; + +import { test, expect, writeFiles } from './fixtures'; +import { TestServerBackend } from '../../packages/playwright/lib/mcp/test/testBackend'; + +test.use({ mcpHeadless: async ({ headless }, use) => use(headless) }); + +test('generate order-two-items', async ({ createAgent, server, headless }) => { + await writeFiles({ + 'playwright.config.ts': ` + import { defineConfig } from '@playwright/test'; + export default defineConfig({ + }); + `, + 'tests/ordering/seed.spec.ts': ` + import { test, expect } from '@playwright/test'; + test.beforeEach(async ({ page }) => { + await page.goto('${server.PREFIX}/evals/fill-form.html'); + }); + test('seed test', async ({ page }) => { + }); + `, + }); + + const agent = await createAgent( + new TestServerBackend(test.info().outputPath('playwright.config.ts'), { headless }), + 'playwright-test-generator.agent.md', + 'generator_write_test', + zodToJsonSchema(z.object({ code: z.string() })) as any, + ); + + const { code } = await agent.runTask(` + Generate a test for the test plan item. + Ordering Page + Order two items + tests/ordering/order-two-items.spec.ts + tests/ordering/seed.spec.ts + + Use the following details for the order form: + - Name: John Doe + - Address: 123 Main St, Anytown, XYZ state + - Zip Code: 12345 + - Email: john@doe.me + + + 1. Fill out the form to order a blue table. + 2. Confirm the order. + 3. Verify the total price and estimated delivery for the table. + 4. Make a new order. + 5. Fill out the form to order a green desk. + 6. Confirm the order. + 7. Verify the total price and estimated delivery for the desk. + + `, {}); + + const expected = [ + `import { test, expect } from '@playwright/test';`, + `test.beforeEach(async ({ page }) => {`, + ` await page.goto('${server.PREFIX}/evals/fill-form.html');`, + `});`, + `test('Order two items', async ({ page }) => {`, + `Fill out the form to order a blue table`, + `await page.getByLabel('Product Type *').selectOption(['Table']);`, + `await page.getByLabel('Color *').selectOption(['Blue']);`, + `await page.getByRole('textbox', { name: 'Full Name *' }).fill('John Doe');`, + `await page.getByRole('textbox', { name: 'Email Address *' }).fill('john@doe.me');`, + `await page.getByRole('textbox', { name: 'Street Address *' }).fill('123 Main St');`, + `await page.getByRole('textbox', { name: 'City *' }).fill('Anytown');`, + `await page.getByRole('textbox', { name: 'State/Province *' }).fill('XYZ state');`, + `await page.getByRole('textbox', { name: 'ZIP/Postal Code *' }).fill('12345');`, + `Confirm the order`, + `dialog.accept()`, + `await page.getByRole('button', { name: 'Order' }).click();`, + `Verify the total price and estimated delivery for the table`, + `await expect(page.getByText('$353.98')).toBeVisible();`, + `await expect(page.getByText('Your order will arrive in approximately 3 days')).toBeVisible();`, + `Make a new order`, + `await page.getByRole('button', { name: 'Place Another Order' }).click();`, + `Fill out the form to order a green desk`, + `await page.getByLabel('Product Type *').selectOption(['Desk']);`, + `await page.getByLabel('Color *').selectOption(['Green']);`, + `await page.getByRole('textbox', { name: 'Full Name *' }).fill('John Doe');`, + `await page.getByRole('textbox', { name: 'Email Address *' }).fill('john@doe.me');`, + `await page.getByRole('textbox', { name: 'Street Address *' }).fill('123 Main St');`, + `await page.getByRole('textbox', { name: 'City *' }).fill('Anytown');`, + `await page.getByRole('textbox', { name: 'State/Province *' }).fill('XYZ state');`, + `await page.getByRole('textbox', { name: 'ZIP/Postal Code *' }).fill('12345');`, + `Confirm the order`, + `await page.getByRole('button', { name: 'Order' }).click();`, + `Verify the total price and estimated delivery for the desk`, + `await expect(page.getByText('$299.98')).toBeVisible();`, + `await expect(page.getByText('Your order will arrive in approximately 3 days')).toBeVisible();`, + ]; + + let index = 0; + for (const line of expected) { + expect(code.substring(index)).toContain(line); + index = code.indexOf(line, index) + line.length; + } +});