|
| 1 | +import { NextRequest, NextResponse } from 'next/server' |
| 2 | +import { Stagehand } from '@browserbasehq/stagehand' |
| 3 | +import { z } from 'zod' |
| 4 | +import { createLogger } from '@/lib/logs/console-logger' |
| 5 | +import { ensureZodObject, normalizeUrl } from '../utils' |
| 6 | + |
| 7 | +const logger = createLogger('StagehandExtractAPI') |
| 8 | + |
| 9 | +// Environment variables for Browserbase |
| 10 | +const BROWSERBASE_API_KEY = process.env.BROWSERBASE_API_KEY |
| 11 | +const BROWSERBASE_PROJECT_ID = process.env.BROWSERBASE_PROJECT_ID |
| 12 | + |
| 13 | +// Input validation schema |
| 14 | +const requestSchema = z.object({ |
| 15 | + instruction: z.string(), |
| 16 | + schema: z.record(z.any()), |
| 17 | + useTextExtract: z.boolean().optional().default(false), |
| 18 | + selector: z.string().nullable().optional(), |
| 19 | + apiKey: z.string(), |
| 20 | + url: z.string().url(), |
| 21 | +}) |
| 22 | + |
| 23 | +export async function POST(request: NextRequest) { |
| 24 | + let stagehand = null |
| 25 | + |
| 26 | + try { |
| 27 | + // Parse and validate request body |
| 28 | + const body = await request.json() |
| 29 | + logger.info('Received extraction request', { |
| 30 | + url: body.url, |
| 31 | + hasInstruction: !!body.instruction, |
| 32 | + schema: body.schema ? typeof body.schema : 'none', |
| 33 | + }) |
| 34 | + |
| 35 | + const validationResult = requestSchema.safeParse(body) |
| 36 | + |
| 37 | + if (!validationResult.success) { |
| 38 | + logger.error('Invalid request body', { errors: validationResult.error.errors }) |
| 39 | + return NextResponse.json( |
| 40 | + { error: 'Invalid request parameters', details: validationResult.error.errors }, |
| 41 | + { status: 400 } |
| 42 | + ) |
| 43 | + } |
| 44 | + |
| 45 | + const params = validationResult.data |
| 46 | + const { url: rawUrl, instruction, selector, useTextExtract, apiKey, schema } = params |
| 47 | + let url = normalizeUrl(rawUrl) |
| 48 | + |
| 49 | + logger.info('Starting Stagehand extraction process', { |
| 50 | + rawUrl, |
| 51 | + url, |
| 52 | + hasInstruction: !!instruction, |
| 53 | + useTextExtract: !!useTextExtract, |
| 54 | + schemaType: typeof schema, |
| 55 | + }) |
| 56 | + |
| 57 | + // Validate schema structure |
| 58 | + if (!schema || typeof schema !== 'object') { |
| 59 | + logger.error('Invalid schema format', { schema }) |
| 60 | + return NextResponse.json( |
| 61 | + { error: 'Invalid schema format. Schema must be a valid JSON object.' }, |
| 62 | + { status: 400 } |
| 63 | + ) |
| 64 | + } |
| 65 | + |
| 66 | + // Check for required environment variables |
| 67 | + if (!BROWSERBASE_API_KEY || !BROWSERBASE_PROJECT_ID) { |
| 68 | + logger.error('Missing required environment variables', { |
| 69 | + hasBrowserbaseApiKey: !!BROWSERBASE_API_KEY, |
| 70 | + hasBrowserbaseProjectId: !!BROWSERBASE_PROJECT_ID, |
| 71 | + }) |
| 72 | + |
| 73 | + return NextResponse.json( |
| 74 | + { error: 'Server configuration error: Missing required environment variables' }, |
| 75 | + { status: 500 } |
| 76 | + ) |
| 77 | + } |
| 78 | + |
| 79 | + // Validate OpenAI API key format |
| 80 | + if (!apiKey || typeof apiKey !== 'string' || !apiKey.startsWith('sk-')) { |
| 81 | + logger.error('Invalid OpenAI API key format') |
| 82 | + return NextResponse.json({ error: 'Invalid OpenAI API key format' }, { status: 400 }) |
| 83 | + } |
| 84 | + |
| 85 | + try { |
| 86 | + // Initialize Stagehand with Browserbase |
| 87 | + logger.info('Initializing Stagehand with Browserbase') |
| 88 | + stagehand = new Stagehand({ |
| 89 | + env: 'BROWSERBASE', |
| 90 | + apiKey: BROWSERBASE_API_KEY, |
| 91 | + projectId: BROWSERBASE_PROJECT_ID, |
| 92 | + verbose: 1, |
| 93 | + // Use a custom logger wrapper that adapts our logger to Stagehand's expected format |
| 94 | + logger: (msg) => logger.info(typeof msg === 'string' ? msg : JSON.stringify(msg)), |
| 95 | + disablePino: true, |
| 96 | + modelName: 'gpt-4o', |
| 97 | + modelClientOptions: { |
| 98 | + apiKey: apiKey, // User's OpenAI API key |
| 99 | + }, |
| 100 | + }) |
| 101 | + |
| 102 | + // Initialize Stagehand |
| 103 | + logger.info('Starting stagehand.init()') |
| 104 | + await stagehand.init() |
| 105 | + logger.info('Stagehand initialized successfully') |
| 106 | + |
| 107 | + // Navigate to the specified URL |
| 108 | + logger.info(`Navigating to ${url}`) |
| 109 | + await stagehand.page.goto(url, { waitUntil: 'networkidle' }) |
| 110 | + logger.info('Navigation complete') |
| 111 | + |
| 112 | + // Prepare for extraction |
| 113 | + logger.info('Preparing extraction schema', { |
| 114 | + schema: JSON.stringify(schema).substring(0, 100) + '...', |
| 115 | + }) |
| 116 | + |
| 117 | + // Extract data using Stagehand with the raw JSON schema |
| 118 | + logger.info('Extracting data with Stagehand') |
| 119 | + |
| 120 | + try { |
| 121 | + // Convert the JSON schema to a Zod schema |
| 122 | + // First check if the schema has a nested "schema" property (common pattern) |
| 123 | + const schemaToConvert = schema.schema || schema |
| 124 | + |
| 125 | + // Create a Zod schema from the JSON schema |
| 126 | + let zodSchema |
| 127 | + try { |
| 128 | + logger.info('Creating Zod schema from JSON schema', { |
| 129 | + schemaType: typeof schemaToConvert, |
| 130 | + hasNestedSchema: !!schema.schema, |
| 131 | + }) |
| 132 | + |
| 133 | + // Convert the schema to a Zod schema |
| 134 | + zodSchema = ensureZodObject(logger, schemaToConvert) |
| 135 | + |
| 136 | + logger.info('Successfully created Zod schema') |
| 137 | + } catch (schemaError) { |
| 138 | + logger.error('Failed to convert JSON schema to Zod schema', { |
| 139 | + error: schemaError, |
| 140 | + message: schemaError instanceof Error ? schemaError.message : 'Unknown schema error', |
| 141 | + }) |
| 142 | + |
| 143 | + // Fall back to simple extraction without schema |
| 144 | + logger.info('Falling back to simple extraction without schema') |
| 145 | + zodSchema = undefined |
| 146 | + } |
| 147 | + |
| 148 | + // Prepare extraction options |
| 149 | + const extractOptions: any = { |
| 150 | + instruction, |
| 151 | + useTextExtract: !!useTextExtract, |
| 152 | + } |
| 153 | + |
| 154 | + // Add schema if we have one |
| 155 | + if (zodSchema) { |
| 156 | + extractOptions.schema = zodSchema |
| 157 | + } |
| 158 | + |
| 159 | + // Add selector if provided |
| 160 | + if (selector) { |
| 161 | + logger.info(`Using selector: ${selector}`) |
| 162 | + extractOptions.selector = selector |
| 163 | + } |
| 164 | + |
| 165 | + // Get the extracted data |
| 166 | + logger.info('Calling stagehand.page.extract with options', { |
| 167 | + hasInstruction: !!extractOptions.instruction, |
| 168 | + hasSchema: !!extractOptions.schema, |
| 169 | + hasSelector: !!extractOptions.selector, |
| 170 | + useTextExtract: extractOptions.useTextExtract, |
| 171 | + }) |
| 172 | + |
| 173 | + // Call extract based on whether we have a schema or not |
| 174 | + let extractedData |
| 175 | + if (zodSchema) { |
| 176 | + // Use the full options object when we have a schema |
| 177 | + extractedData = await stagehand.page.extract(extractOptions) |
| 178 | + } else { |
| 179 | + // Just pass the instruction when we don't have a schema |
| 180 | + extractedData = await stagehand.page.extract(extractOptions.instruction) |
| 181 | + } |
| 182 | + |
| 183 | + logger.info('Extraction successful', { |
| 184 | + hasData: !!extractedData, |
| 185 | + dataType: typeof extractedData, |
| 186 | + dataKeys: extractedData ? Object.keys(extractedData) : [], |
| 187 | + }) |
| 188 | + |
| 189 | + // Return the extracted data |
| 190 | + return NextResponse.json({ |
| 191 | + data: extractedData, |
| 192 | + schema, |
| 193 | + }) |
| 194 | + } catch (extractError) { |
| 195 | + logger.error('Error during extraction operation', { |
| 196 | + error: extractError, |
| 197 | + message: |
| 198 | + extractError instanceof Error ? extractError.message : 'Unknown extraction error', |
| 199 | + }) |
| 200 | + throw extractError |
| 201 | + } |
| 202 | + } catch (error) { |
| 203 | + logger.error('Stagehand extraction error', { |
| 204 | + error, |
| 205 | + message: error instanceof Error ? error.message : 'Unknown error', |
| 206 | + stack: error instanceof Error ? error.stack : undefined, |
| 207 | + }) |
| 208 | + |
| 209 | + // Provide more detailed error information |
| 210 | + let errorMessage = 'Unknown error during extraction' |
| 211 | + let errorDetails: Record<string, any> = {} |
| 212 | + |
| 213 | + if (error instanceof Error) { |
| 214 | + errorMessage = error.message |
| 215 | + errorDetails = { |
| 216 | + name: error.name, |
| 217 | + stack: error.stack, |
| 218 | + } |
| 219 | + |
| 220 | + // Log any additional properties that might provide context |
| 221 | + const errorObj = error as any |
| 222 | + if (typeof errorObj.code !== 'undefined') { |
| 223 | + errorDetails.code = errorObj.code |
| 224 | + } |
| 225 | + if (typeof errorObj.statusCode !== 'undefined') { |
| 226 | + errorDetails.statusCode = errorObj.statusCode |
| 227 | + } |
| 228 | + if (typeof errorObj.response !== 'undefined') { |
| 229 | + errorDetails.response = errorObj.response |
| 230 | + } |
| 231 | + } |
| 232 | + |
| 233 | + return NextResponse.json( |
| 234 | + { |
| 235 | + error: errorMessage, |
| 236 | + details: errorDetails, |
| 237 | + }, |
| 238 | + { status: 500 } |
| 239 | + ) |
| 240 | + } |
| 241 | + } catch (error) { |
| 242 | + logger.error('Unexpected error in extraction API route', { |
| 243 | + error, |
| 244 | + message: error instanceof Error ? error.message : 'Unknown error', |
| 245 | + stack: error instanceof Error ? error.stack : undefined, |
| 246 | + }) |
| 247 | + return NextResponse.json( |
| 248 | + { |
| 249 | + error: 'Internal server error', |
| 250 | + details: error instanceof Error ? error.message : 'Unknown error', |
| 251 | + }, |
| 252 | + { status: 500 } |
| 253 | + ) |
| 254 | + } finally { |
| 255 | + // Make sure to clean up Stagehand resources |
| 256 | + if (stagehand) { |
| 257 | + try { |
| 258 | + logger.info('Closing Stagehand instance') |
| 259 | + await stagehand.close() |
| 260 | + } catch (closeError) { |
| 261 | + logger.error('Error closing Stagehand instance', { error: closeError }) |
| 262 | + } |
| 263 | + } |
| 264 | + } |
| 265 | +} |
0 commit comments