Skip to content

Commit aa32f9f

Browse files
authored
feat(stagehand): added stagehand cua and extract tools/block (#243)
* added stagehand execute tool/block * added stagehand agent * cleaned up model names * acknowledged PR comments
1 parent d401551 commit aa32f9f

File tree

16 files changed

+2498
-15
lines changed

16 files changed

+2498
-15
lines changed

sim/app/api/tools/stagehand/agent/route.ts

Lines changed: 1054 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 265 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,265 @@
1+
import { NextRequest, NextResponse } from 'next/server'
2+
import { Stagehand } from '@browserbasehq/stagehand'
3+
import { z } from 'zod'
4+
import { createLogger } from '@/lib/logs/console-logger'
5+
import { ensureZodObject, normalizeUrl } from '../utils'
6+
7+
const logger = createLogger('StagehandExtractAPI')
8+
9+
// Environment variables for Browserbase
10+
const BROWSERBASE_API_KEY = process.env.BROWSERBASE_API_KEY
11+
const BROWSERBASE_PROJECT_ID = process.env.BROWSERBASE_PROJECT_ID
12+
13+
// Input validation schema
14+
const requestSchema = z.object({
15+
instruction: z.string(),
16+
schema: z.record(z.any()),
17+
useTextExtract: z.boolean().optional().default(false),
18+
selector: z.string().nullable().optional(),
19+
apiKey: z.string(),
20+
url: z.string().url(),
21+
})
22+
23+
export async function POST(request: NextRequest) {
24+
let stagehand = null
25+
26+
try {
27+
// Parse and validate request body
28+
const body = await request.json()
29+
logger.info('Received extraction request', {
30+
url: body.url,
31+
hasInstruction: !!body.instruction,
32+
schema: body.schema ? typeof body.schema : 'none',
33+
})
34+
35+
const validationResult = requestSchema.safeParse(body)
36+
37+
if (!validationResult.success) {
38+
logger.error('Invalid request body', { errors: validationResult.error.errors })
39+
return NextResponse.json(
40+
{ error: 'Invalid request parameters', details: validationResult.error.errors },
41+
{ status: 400 }
42+
)
43+
}
44+
45+
const params = validationResult.data
46+
const { url: rawUrl, instruction, selector, useTextExtract, apiKey, schema } = params
47+
let url = normalizeUrl(rawUrl)
48+
49+
logger.info('Starting Stagehand extraction process', {
50+
rawUrl,
51+
url,
52+
hasInstruction: !!instruction,
53+
useTextExtract: !!useTextExtract,
54+
schemaType: typeof schema,
55+
})
56+
57+
// Validate schema structure
58+
if (!schema || typeof schema !== 'object') {
59+
logger.error('Invalid schema format', { schema })
60+
return NextResponse.json(
61+
{ error: 'Invalid schema format. Schema must be a valid JSON object.' },
62+
{ status: 400 }
63+
)
64+
}
65+
66+
// Check for required environment variables
67+
if (!BROWSERBASE_API_KEY || !BROWSERBASE_PROJECT_ID) {
68+
logger.error('Missing required environment variables', {
69+
hasBrowserbaseApiKey: !!BROWSERBASE_API_KEY,
70+
hasBrowserbaseProjectId: !!BROWSERBASE_PROJECT_ID,
71+
})
72+
73+
return NextResponse.json(
74+
{ error: 'Server configuration error: Missing required environment variables' },
75+
{ status: 500 }
76+
)
77+
}
78+
79+
// Validate OpenAI API key format
80+
if (!apiKey || typeof apiKey !== 'string' || !apiKey.startsWith('sk-')) {
81+
logger.error('Invalid OpenAI API key format')
82+
return NextResponse.json({ error: 'Invalid OpenAI API key format' }, { status: 400 })
83+
}
84+
85+
try {
86+
// Initialize Stagehand with Browserbase
87+
logger.info('Initializing Stagehand with Browserbase')
88+
stagehand = new Stagehand({
89+
env: 'BROWSERBASE',
90+
apiKey: BROWSERBASE_API_KEY,
91+
projectId: BROWSERBASE_PROJECT_ID,
92+
verbose: 1,
93+
// Use a custom logger wrapper that adapts our logger to Stagehand's expected format
94+
logger: (msg) => logger.info(typeof msg === 'string' ? msg : JSON.stringify(msg)),
95+
disablePino: true,
96+
modelName: 'gpt-4o',
97+
modelClientOptions: {
98+
apiKey: apiKey, // User's OpenAI API key
99+
},
100+
})
101+
102+
// Initialize Stagehand
103+
logger.info('Starting stagehand.init()')
104+
await stagehand.init()
105+
logger.info('Stagehand initialized successfully')
106+
107+
// Navigate to the specified URL
108+
logger.info(`Navigating to ${url}`)
109+
await stagehand.page.goto(url, { waitUntil: 'networkidle' })
110+
logger.info('Navigation complete')
111+
112+
// Prepare for extraction
113+
logger.info('Preparing extraction schema', {
114+
schema: JSON.stringify(schema).substring(0, 100) + '...',
115+
})
116+
117+
// Extract data using Stagehand with the raw JSON schema
118+
logger.info('Extracting data with Stagehand')
119+
120+
try {
121+
// Convert the JSON schema to a Zod schema
122+
// First check if the schema has a nested "schema" property (common pattern)
123+
const schemaToConvert = schema.schema || schema
124+
125+
// Create a Zod schema from the JSON schema
126+
let zodSchema
127+
try {
128+
logger.info('Creating Zod schema from JSON schema', {
129+
schemaType: typeof schemaToConvert,
130+
hasNestedSchema: !!schema.schema,
131+
})
132+
133+
// Convert the schema to a Zod schema
134+
zodSchema = ensureZodObject(logger, schemaToConvert)
135+
136+
logger.info('Successfully created Zod schema')
137+
} catch (schemaError) {
138+
logger.error('Failed to convert JSON schema to Zod schema', {
139+
error: schemaError,
140+
message: schemaError instanceof Error ? schemaError.message : 'Unknown schema error',
141+
})
142+
143+
// Fall back to simple extraction without schema
144+
logger.info('Falling back to simple extraction without schema')
145+
zodSchema = undefined
146+
}
147+
148+
// Prepare extraction options
149+
const extractOptions: any = {
150+
instruction,
151+
useTextExtract: !!useTextExtract,
152+
}
153+
154+
// Add schema if we have one
155+
if (zodSchema) {
156+
extractOptions.schema = zodSchema
157+
}
158+
159+
// Add selector if provided
160+
if (selector) {
161+
logger.info(`Using selector: ${selector}`)
162+
extractOptions.selector = selector
163+
}
164+
165+
// Get the extracted data
166+
logger.info('Calling stagehand.page.extract with options', {
167+
hasInstruction: !!extractOptions.instruction,
168+
hasSchema: !!extractOptions.schema,
169+
hasSelector: !!extractOptions.selector,
170+
useTextExtract: extractOptions.useTextExtract,
171+
})
172+
173+
// Call extract based on whether we have a schema or not
174+
let extractedData
175+
if (zodSchema) {
176+
// Use the full options object when we have a schema
177+
extractedData = await stagehand.page.extract(extractOptions)
178+
} else {
179+
// Just pass the instruction when we don't have a schema
180+
extractedData = await stagehand.page.extract(extractOptions.instruction)
181+
}
182+
183+
logger.info('Extraction successful', {
184+
hasData: !!extractedData,
185+
dataType: typeof extractedData,
186+
dataKeys: extractedData ? Object.keys(extractedData) : [],
187+
})
188+
189+
// Return the extracted data
190+
return NextResponse.json({
191+
data: extractedData,
192+
schema,
193+
})
194+
} catch (extractError) {
195+
logger.error('Error during extraction operation', {
196+
error: extractError,
197+
message:
198+
extractError instanceof Error ? extractError.message : 'Unknown extraction error',
199+
})
200+
throw extractError
201+
}
202+
} catch (error) {
203+
logger.error('Stagehand extraction error', {
204+
error,
205+
message: error instanceof Error ? error.message : 'Unknown error',
206+
stack: error instanceof Error ? error.stack : undefined,
207+
})
208+
209+
// Provide more detailed error information
210+
let errorMessage = 'Unknown error during extraction'
211+
let errorDetails: Record<string, any> = {}
212+
213+
if (error instanceof Error) {
214+
errorMessage = error.message
215+
errorDetails = {
216+
name: error.name,
217+
stack: error.stack,
218+
}
219+
220+
// Log any additional properties that might provide context
221+
const errorObj = error as any
222+
if (typeof errorObj.code !== 'undefined') {
223+
errorDetails.code = errorObj.code
224+
}
225+
if (typeof errorObj.statusCode !== 'undefined') {
226+
errorDetails.statusCode = errorObj.statusCode
227+
}
228+
if (typeof errorObj.response !== 'undefined') {
229+
errorDetails.response = errorObj.response
230+
}
231+
}
232+
233+
return NextResponse.json(
234+
{
235+
error: errorMessage,
236+
details: errorDetails,
237+
},
238+
{ status: 500 }
239+
)
240+
}
241+
} catch (error) {
242+
logger.error('Unexpected error in extraction API route', {
243+
error,
244+
message: error instanceof Error ? error.message : 'Unknown error',
245+
stack: error instanceof Error ? error.stack : undefined,
246+
})
247+
return NextResponse.json(
248+
{
249+
error: 'Internal server error',
250+
details: error instanceof Error ? error.message : 'Unknown error',
251+
},
252+
{ status: 500 }
253+
)
254+
} finally {
255+
// Make sure to clean up Stagehand resources
256+
if (stagehand) {
257+
try {
258+
logger.info('Closing Stagehand instance')
259+
await stagehand.close()
260+
} catch (closeError) {
261+
logger.error('Error closing Stagehand instance', { error: closeError })
262+
}
263+
}
264+
}
265+
}
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
import { z } from 'zod'
2+
import { Logger } from '@/lib/logs/console-logger'
3+
4+
// Convert JSON schema to Zod schema (reused from extract route)
5+
function jsonSchemaToZod(logger: Logger, jsonSchema: Record<string, any>): z.ZodTypeAny {
6+
if (!jsonSchema) {
7+
logger.error('Invalid schema: Schema is null or undefined')
8+
throw new Error('Invalid schema: Schema is required')
9+
}
10+
11+
// Handle non-object schemas (strings, numbers, etc.)
12+
if (typeof jsonSchema !== 'object' || jsonSchema === null) {
13+
logger.warn('Schema is not an object, defaulting to any', { type: typeof jsonSchema })
14+
return z.any()
15+
}
16+
17+
// Handle different schema types
18+
if (jsonSchema.type === 'object' && jsonSchema.properties) {
19+
const shape: Record<string, z.ZodTypeAny> = {}
20+
21+
// Create a zod object for each property
22+
for (const [key, propSchema] of Object.entries(jsonSchema.properties)) {
23+
shape[key] = jsonSchemaToZod(logger, propSchema as Record<string, any>)
24+
25+
// Add description if available
26+
if ((propSchema as Record<string, any>).description) {
27+
shape[key] = shape[key].describe((propSchema as Record<string, any>).description)
28+
}
29+
}
30+
31+
// Create the base object
32+
let zodObject = z.object(shape)
33+
34+
// Handle required fields if specified
35+
if (jsonSchema.required && Array.isArray(jsonSchema.required)) {
36+
// For each property that's not in required, make it optional
37+
for (const key of Object.keys(jsonSchema.properties)) {
38+
if (!jsonSchema.required.includes(key)) {
39+
shape[key] = shape[key].optional()
40+
}
41+
}
42+
43+
// Recreate the object with the updated shape
44+
zodObject = z.object(shape)
45+
}
46+
47+
return zodObject
48+
} else if (jsonSchema.type === 'array' && jsonSchema.items) {
49+
const itemSchema = jsonSchemaToZod(logger, jsonSchema.items as Record<string, any>)
50+
let arraySchema = z.array(itemSchema)
51+
52+
// Add description if available
53+
if (jsonSchema.description) {
54+
arraySchema = arraySchema.describe(jsonSchema.description)
55+
}
56+
57+
return arraySchema
58+
} else if (jsonSchema.type === 'string') {
59+
let stringSchema = z.string()
60+
61+
// Add description if available
62+
if (jsonSchema.description) {
63+
stringSchema = stringSchema.describe(jsonSchema.description)
64+
}
65+
66+
return stringSchema
67+
} else if (jsonSchema.type === 'number') {
68+
let numberSchema = z.number()
69+
70+
// Add description if available
71+
if (jsonSchema.description) {
72+
numberSchema = numberSchema.describe(jsonSchema.description)
73+
}
74+
75+
return numberSchema
76+
} else if (jsonSchema.type === 'boolean') {
77+
let boolSchema = z.boolean()
78+
79+
// Add description if available
80+
if (jsonSchema.description) {
81+
boolSchema = boolSchema.describe(jsonSchema.description)
82+
}
83+
84+
return boolSchema
85+
} else if (jsonSchema.type === 'null') {
86+
return z.null()
87+
} else if (jsonSchema.type === 'integer') {
88+
let intSchema = z.number().int()
89+
90+
// Add description if available
91+
if (jsonSchema.description) {
92+
intSchema = intSchema.describe(jsonSchema.description)
93+
}
94+
95+
return intSchema
96+
} else {
97+
// For unknown types, return any
98+
logger.warn('Unknown schema type, defaulting to any', { type: jsonSchema.type })
99+
return z.any()
100+
}
101+
}
102+
103+
// Helper function to ensure we have a ZodObject
104+
export function ensureZodObject(logger: Logger, schema: Record<string, any>): z.ZodObject<any> {
105+
const zodSchema = jsonSchemaToZod(logger, schema)
106+
107+
// If not already an object type, wrap it in an object
108+
if (schema.type !== 'object') {
109+
logger.warn('Schema is not an object type, wrapping in an object', {
110+
type: schema.type,
111+
})
112+
return z.object({ value: zodSchema })
113+
}
114+
115+
// Safe cast since we know it's a ZodObject if type is 'object'
116+
return zodSchema as z.ZodObject<any>
117+
}
118+
119+
export function normalizeUrl(url: string): string {
120+
// Normalize the URL - only add https:// if needed
121+
let normalizedUrl = url
122+
123+
// Add https:// if no protocol is specified
124+
if (!normalizedUrl.startsWith('http://') && !normalizedUrl.startsWith('https://')) {
125+
normalizedUrl = `https://${normalizedUrl}`
126+
}
127+
128+
return normalizedUrl
129+
}

0 commit comments

Comments
 (0)