From 1972db92fbb4e55d51c1976323160dd744eebac4 Mon Sep 17 00:00:00 2001 From: ilyass bzitar Date: Thu, 18 Jul 2024 13:22:29 +0100 Subject: [PATCH] feater(recaptcha_capmonster): supporting capmonster 'https://capmonster.cloud/en/' API --- .github/labeler.yml | 35 +--- .../readme.md | 18 +- .../src/index.ts | 5 + .../src/provider/capMonster-api.ts | 171 ++++++++++++++++++ .../src/provider/capmonster.ts | 112 ++++++++++++ 5 files changed, 312 insertions(+), 29 deletions(-) create mode 100644 packages/puppeteer-extra-plugin-recaptcha/src/provider/capMonster-api.ts create mode 100644 packages/puppeteer-extra-plugin-recaptcha/src/provider/capmonster.ts diff --git a/.github/labeler.yml b/.github/labeler.yml index 04bbe0ba3..376b7d831 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -1,25 +1,10 @@ -# This is used with the label workflow which -# will triage pull requests and apply a label based on the -# paths that are modified in the pull request. -# -# For more information, see: -# https://github.com/actions/labeler - -'package: core': - - packages/automation-extra/**/* - - packages/playright-extra/**/* - - packages/puppeteer-extra/**/* - - packages/automation-extra-plugin/**/* - - packages/puppeteer-extra-plugin/**/* - -'plugin: automation-extra': - - packages/plugin-*/**/* - -'plugin: puppeteer-extra': - - packages/puppeteer-extra-plugin-*/**/* - -'plugin: recaptcha 🏴': - - packages/*recaptcha*/**/* - -'plugin: stealth ㊙️': - - packages/*stealth*/**/* +feature: + - changed-files: + - any-glob-to-any-file: + [ + 'packages/puppeteer-extra-plugin-recaptcha/src/index.ts', + 'packages/puppeteer-extra-plugin-recaptcha/src/provider/*', + 'packages/puppeteer-extra-plugin-recaptcha/redme.md', + ] + - base-branch: ['master'] + - head-branch: ['recaptchaCapMonster'] diff --git a/packages/puppeteer-extra-plugin-recaptcha/readme.md b/packages/puppeteer-extra-plugin-recaptcha/readme.md index da7aa27d5..f4941a324 100644 --- a/packages/puppeteer-extra-plugin-recaptcha/readme.md +++ b/packages/puppeteer-extra-plugin-recaptcha/readme.md @@ -27,6 +27,8 @@ npm install puppeteer puppeteer-extra puppeteer-extra-plugin-recaptcha > 🎁 **Note:** Until we've automated changelog updates in markdown files please follow the `#announcements` channel in our [discord server](https://extra.community/) for the latest updates and changelog info. +- Support capMonster () + _Older changelog:_ ##### `3.1.9` @@ -72,8 +74,8 @@ const RecaptchaPlugin = require('puppeteer-extra-plugin-recaptcha') puppeteer.use( RecaptchaPlugin({ provider: { - id: '2captcha', - token: 'XXXXXXX' // REPLACE THIS WITH YOUR OWN 2CAPTCHA API KEY ⚡ + id: 'XXXXXXX', // REPLACE THIS WITH YOUR OWN PROVIDER 2CAPTCHA/CAPMONSTER ⚡ + token: 'XXXXXXX' // REPLACE THIS WITH YOUR OWN 2CAPTCHA/CAPMONSTER API KEY ⚡ }, visualFeedback: true // colorize reCAPTCHAs (violet = detected, green = solved) }) @@ -109,8 +111,8 @@ import RecaptchaPlugin from 'puppeteer-extra-plugin-recaptcha' puppeteer.use( RecaptchaPlugin({ provider: { - id: '2captcha', - token: 'ENTER_YOUR_2CAPTCHA_API_KEY_HERE' + id: '2captcha', // or 'capmonster' + token: 'ENTER_YOUR_2CAPTCHA_API_KEY_HERE' // or API key for capmonster } }) ) @@ -158,6 +160,14 @@ I thought about having the plugin solve captchas directly (e.g. using the [audio _Please note:_ You need a provider configured for this plugin to do it's magic. If you decide to use the built-in 2captcha provider you need to add funds to your 2captcha account. +### capmonster + +CapMonster Cloud is a cloud-based service that automates the solving of CAPTCHA challenges. It provides developers with an API for integrating CAPTCHA solving into applications, ensuring reliable and efficient bypassing of CAPTCHAs used on websites. The service emphasizes scalability, accuracy, and security, making it suitable for automating interactions with CAPTCHA-protected sites. + +- Cost: 1000 reCAPTCHAs v2 for 0.6 USD +- Cost: 1000 reCAPTCHAs v3 for 0.9 USD +- Cost: 1000 hCaptcha for 0.8 USD + ### 2captcha Currently the only builtin solution provider as it's the cheapest and most reliable, from my experience. If you'd like to throw some free captcha credit my way feel free to [signup here](https://2captcha.com?from=6690177) (referral link, allows me to write automated tests against their API). diff --git a/packages/puppeteer-extra-plugin-recaptcha/src/index.ts b/packages/puppeteer-extra-plugin-recaptcha/src/index.ts index 49bac2eed..279918eaf 100644 --- a/packages/puppeteer-extra-plugin-recaptcha/src/index.ts +++ b/packages/puppeteer-extra-plugin-recaptcha/src/index.ts @@ -7,11 +7,16 @@ import * as types from './types' import { RecaptchaContentScript } from './content' import { HcaptchaContentScript } from './content-hcaptcha' import * as TwoCaptcha from './provider/2captcha' +import * as CapMonster from './provider/capmonster' export const BuiltinSolutionProviders: types.SolutionProvider[] = [ { id: TwoCaptcha.PROVIDER_ID, fn: TwoCaptcha.getSolutions + }, + { + id: CapMonster.PROVIDER_ID, + fn: CapMonster.getSolutions } ] diff --git a/packages/puppeteer-extra-plugin-recaptcha/src/provider/capMonster-api.ts b/packages/puppeteer-extra-plugin-recaptcha/src/provider/capMonster-api.ts new file mode 100644 index 000000000..44ec363f9 --- /dev/null +++ b/packages/puppeteer-extra-plugin-recaptcha/src/provider/capMonster-api.ts @@ -0,0 +1,171 @@ +// TODO: Create our own API wrapper + +var https = require('https') +var url = require('url') + +var apiKey +var apiUrl = 'https://api.capmonster.cloud' +var apiInUrl = 'http://api.capmonster.cloud/in.php' + +var defaultOptions = { + pollingInterval: 40000, + retries: 4 +} + +function pollCaptcha(captchaId, options, invalid, callback) { + invalid = invalid.bind({ options: options, captchaId: captchaId }) + var intervalId = setInterval(function() { + var httpsRequestOptions = { + method: 'POST', + hostname: apiUrl, + path: '/getTaskResult', + headers: { + clientKey: apiKey, + taskId: captchaId, + nocache: 1 + } + } + var request = https.request(httpsRequestOptions, function(response) { + var body = '' + + response.on('data', function(chunk) { + body += chunk + }) + + response.on('end', function() { + const res = JSON.parse(body) + if (res.status === 'processing') { + return + } + + clearInterval(intervalId) + + if (res.status !== 'ready') { + callback(res) // error + } else { + callback( + null, + { + id: captchaId, + text: res.solution.gRecaptchaResponse + }, + invalid + ) + } + callback = function() {} // prevent the callback from being called more than once, if multiple https requests are open at the same time. + }) + }) + request.on('error', function(e) { + request.destroy() + callback(e) + }) + request.end() + }, options.pollingInterval || defaultOptions.pollingInterval) +} + +export const setApiKey = function(key) { + apiKey = key +} + +export const decodeReCaptcha = function( + captchaMethod, + captcha, + pageUrl, + extraData, + options, + callback +) { + if (!callback) { + callback = options + options = defaultOptions + } + var httpsRequestOptions = url.URL(apiInUrl) + httpsRequestOptions.method = 'POST' + + var postData = { + key: apiKey, + method: captchaMethod, + pageURL: pageUrl, + ...extraData + } + if (captchaMethod === 'userrecaptcha') { + postData.googlekey = captcha + } + if (captchaMethod === 'hcaptcha') { + postData.sitekey = captcha + } + postData.nocache = 1 + + var request = https.request(httpsRequestOptions, function(response) { + var body = '' + + response.on('data', function(chunk) { + body += chunk + }) + + response.on('end', function() { + var result = JSON.parse(body) + if (result.errorId !== 0) { + return callback(result.errorCode) + } + + pollCaptcha( + result.taskId, + options, + function(error) { + var callbackToInitialCallback = callback + + report(this.captchaId) + + if (error) { + return callbackToInitialCallback('CAPTCHA_FAILED') + } + + if (!this.options.retries) { + this.options.retries = defaultOptions.retries + } + if (this.options.retries > 1) { + this.options.retries = this.options.retries - 1 + decodeReCaptcha( + captchaMethod, + captcha, + pageUrl, + extraData, + this.options, + callback + ) + } else { + callbackToInitialCallback('CAPTCHA_FAILED_TOO_MANY_TIMES') + } + }, + callback + ) + }) + }) + request.on('error', function(e) { + request.destroy() + callback(e) + }) + request.write(postData) + request.end() +} + +export const report = function(captchaId) { + var reportUrl = + apiInUrl + + '?action=reportbad&soft_id=' + + '&key=' + + apiKey + + '&id=' + + captchaId + var options = url.parse(reportUrl) + + var request = https.request(options, function(response) { + // var body = '' + // response.on('data', function(chunk) { + // body += chunk + // }) + // response.on('end', function() {}) + }) + request.end() +} diff --git a/packages/puppeteer-extra-plugin-recaptcha/src/provider/capmonster.ts b/packages/puppeteer-extra-plugin-recaptcha/src/provider/capmonster.ts new file mode 100644 index 000000000..bd52c407c --- /dev/null +++ b/packages/puppeteer-extra-plugin-recaptcha/src/provider/capmonster.ts @@ -0,0 +1,112 @@ +export const PROVIDER_ID = 'capmonster' + +import * as types from '../types' + +import Debug from 'debug' +const debug = Debug(`puppeteer-extra-plugin:recaptcha:${PROVIDER_ID}`) + +import * as solver from './capMonster-api' + +const secondsBetweenDates = (before: Date, after: Date) => + (after.getTime() - before.getTime()) / 1000 + +export interface DecodeRecaptchaAsyncResult { + err?: any + result?: any + invalid?: any +} + +export interface TwoCaptchaProviderOpts { + useEnterpriseFlag?: boolean + useActionValue?: boolean +} + +async function decodeRecaptchaAsync( + token: string, + vendor: types.CaptchaVendor, + sitekey: string, + url: string, + extraData: any, + opts = { pollingInterval: 2000 } +): Promise { + return new Promise(resolve => { + const cb = (err: any, result: any, invalid: any) => + resolve({ err, result, invalid }) + try { + solver.setApiKey(token) + + let method = 'userrecaptcha' + if (vendor === 'hcaptcha') { + method = 'hcaptcha' + } + solver.decodeReCaptcha(method, sitekey, url, extraData, opts, cb) + } catch (error) { + return resolve({ err: error }) + } + }) +} + +export async function getSolutions( + captchas: types.CaptchaInfo[] = [], + token: string = '' +): Promise { + const solutions = await Promise.all(captchas.map(c => getSolution(c, token))) + return { solutions, error: solutions.find(s => !!s.error) } +} + +async function getSolution( + captcha: types.CaptchaInfo, + token: string +): Promise { + const solution: types.CaptchaSolution = { + _vendor: captcha._vendor, + provider: PROVIDER_ID + } + try { + if (!captcha || !captcha.sitekey || !captcha.url || !captcha.id) { + throw new Error('Missing data in captcha') + } + solution.id = captcha.id + solution.requestAt = new Date() + debug('Requesting solution..', solution) + const extraData = {} + if (captcha.s) { + extraData['recaptchaDataSValue'] = captcha.s // google site specific property + } + + if ( + process.env['CAPMONSTER_PROXY_TYPE'] && + process.env['CAPMONSTER_PROXY_ADDRESS'] + ) { + extraData['proxyType'] = process.env[ + 'CAPMONSTER_PROXY_TYPE' + ].toUpperCase() + extraData['proxyAddress'] = process.env['CAPMONSTER_PROXY_ADDRESS'] + } + + const { err, result, invalid } = await decodeRecaptchaAsync( + token, + captcha._vendor, + captcha.sitekey, + captcha.url, + extraData + ) + debug('Got response', { err, result, invalid }) + if (err) throw new Error(`${PROVIDER_ID} error: ${err}`) + if (!result || !result.text || !result.id) { + throw new Error(`${PROVIDER_ID} error: Missing response data: ${result}`) + } + solution.providerCaptchaId = result.id + solution.text = result.text + solution.responseAt = new Date() + solution.hasSolution = !!solution.text + solution.duration = secondsBetweenDates( + solution.requestAt, + solution.responseAt + ) + } catch (error) { + debug('Error', error) + solution.error = error.toString() + } + return solution +}