diff --git a/packages/puppeteer-extra-plugin-adblocker/src/index.test.ts b/packages/puppeteer-extra-plugin-adblocker/src/index.test.ts index 63a6336b8..66c634d14 100644 --- a/packages/puppeteer-extra-plugin-adblocker/src/index.test.ts +++ b/packages/puppeteer-extra-plugin-adblocker/src/index.test.ts @@ -38,3 +38,35 @@ test('will block ads', async t => { await browser.close() }) + +test("will adhere to allowlist rules", async t => { + const puppeteer = require('puppeteer-extra') + const adblockerPlugin = AdblockerPlugin({ + allowlist: [ + // Allow everything + /(.*?)/, + ] + }) + + puppeteer.use(adblockerPlugin) + + const browser = await puppeteer.launch({ + args: PUPPETEER_ARGS, + headless: true + }) + + const blocker = await adblockerPlugin.getBlocker() + + const page = await browser.newPage() + + let blockedRequests = 0 + blocker.on('request-blocked', (req) => { + blockedRequests += 1 + }) + + await page.goto("https://www.google.com/search?q=rent%20a%20car", { waitUntil: 'networkidle0' }) + + t.is(blockedRequests, 0) + + await browser.close(); +}) diff --git a/packages/puppeteer-extra-plugin-adblocker/src/index.ts b/packages/puppeteer-extra-plugin-adblocker/src/index.ts index c0aa868a0..9d28042cb 100644 --- a/packages/puppeteer-extra-plugin-adblocker/src/index.ts +++ b/packages/puppeteer-extra-plugin-adblocker/src/index.ts @@ -2,7 +2,7 @@ import { promises as fs } from 'fs' import os from 'os' import path from 'path' -import { PuppeteerBlocker } from '@cliqz/adblocker-puppeteer' +import { NetworkFilter, PuppeteerBlocker } from '@cliqz/adblocker-puppeteer' import fetch from 'node-fetch' import { PuppeteerExtraPlugin } from 'puppeteer-extra-plugin' @@ -11,6 +11,8 @@ const engineCacheFilename = `${pkg.name}-${pkg.version}-engine.bin` /** Available plugin options */ export interface PluginOptions { + /** Optional custom list of sites to allow. Default: [] */ + allowlist: RegExp[] /** Whether or not to block trackers (in addition to ads). Default: false */ blockTrackers: boolean /** Whether or not to block trackers and other annoyances, including cookie @@ -41,6 +43,7 @@ export class PuppeteerExtraPluginAdblocker extends PuppeteerExtraPlugin { get defaults(): PluginOptions { return { + allowlist: [], blockTrackers: false, blockTrackersAndAnnoyances: false, useCache: true, @@ -67,6 +70,18 @@ export class PuppeteerExtraPluginAdblocker extends PuppeteerExtraPlugin { await fs.writeFile(this.engineCacheFile, blocker.serialize()) } + /** + * Return a list of exceptions to the blocking rules. + */ + private getExceptions(): NetworkFilter[] { + let list: RegExp[] = this.opts.allowlist || []; + return list + // Turn the list of regexps into an array of `NetworkFilter` instances. + .map((regex) => NetworkFilter.parse("@@" + regex.toString())) + // Remove any invalid filters. + .filter((filter): filter is NetworkFilter => filter !== undefined) + } + /** * Initialize instance of `PuppeteerBlocker` from cache if possible. * Otherwise, it throws and we will try to initialize it from remote instead. @@ -118,6 +133,9 @@ export class PuppeteerExtraPluginAdblocker extends PuppeteerExtraPlugin { await this.persistToCache(this.blocker) } } + this.blocker.update({ + newNetworkFilters: this.getExceptions(), + }) return this.blocker }