From 07a83eb5bdd487e1380048099cc14ffd25610ee6 Mon Sep 17 00:00:00 2001 From: Wyatt Stanke <47758296+Wyatt-Stanke@users.noreply.github.com> Date: Sat, 22 Jul 2023 15:39:12 -0400 Subject: [PATCH 1/2] Support exceptions (allowlist/whitelist) in adblocker Fixes #180 --- .../src/index.test.ts | 32 +++++++++++++++++++ .../src/index.ts | 22 +++++++++++-- 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/packages/puppeteer-extra-plugin-adblocker/src/index.test.ts b/packages/puppeteer-extra-plugin-adblocker/src/index.test.ts index 63a6336b8..66c634d14 100644 --- a/packages/puppeteer-extra-plugin-adblocker/src/index.test.ts +++ b/packages/puppeteer-extra-plugin-adblocker/src/index.test.ts @@ -38,3 +38,35 @@ test('will block ads', async t => { await browser.close() }) + +test("will adhere to allowlist rules", async t => { + const puppeteer = require('puppeteer-extra') + const adblockerPlugin = AdblockerPlugin({ + allowlist: [ + // Allow everything + /(.*?)/, + ] + }) + + puppeteer.use(adblockerPlugin) + + const browser = await puppeteer.launch({ + args: PUPPETEER_ARGS, + headless: true + }) + + const blocker = await adblockerPlugin.getBlocker() + + const page = await browser.newPage() + + let blockedRequests = 0 + blocker.on('request-blocked', (req) => { + blockedRequests += 1 + }) + + await page.goto("https://www.google.com/search?q=rent%20a%20car", { waitUntil: 'networkidle0' }) + + t.is(blockedRequests, 0) + + await browser.close(); +}) diff --git a/packages/puppeteer-extra-plugin-adblocker/src/index.ts b/packages/puppeteer-extra-plugin-adblocker/src/index.ts index c0aa868a0..6b7baaa9a 100644 --- a/packages/puppeteer-extra-plugin-adblocker/src/index.ts +++ b/packages/puppeteer-extra-plugin-adblocker/src/index.ts @@ -2,7 +2,7 @@ import { promises as fs } from 'fs' import os from 'os' import path from 'path' -import { PuppeteerBlocker } from '@cliqz/adblocker-puppeteer' +import { NetworkFilter, PuppeteerBlocker } from '@cliqz/adblocker-puppeteer' import fetch from 'node-fetch' import { PuppeteerExtraPlugin } from 'puppeteer-extra-plugin' @@ -11,6 +11,8 @@ const engineCacheFilename = `${pkg.name}-${pkg.version}-engine.bin` /** Available plugin options */ export interface PluginOptions { + /** Optional custom list of sites to allow. Default: [] */ + allowlist: RegExp[] /** Whether or not to block trackers (in addition to ads). Default: false */ blockTrackers: boolean /** Whether or not to block trackers and other annoyances, including cookie @@ -41,6 +43,7 @@ export class PuppeteerExtraPluginAdblocker extends PuppeteerExtraPlugin { get defaults(): PluginOptions { return { + allowlist: [], blockTrackers: false, blockTrackersAndAnnoyances: false, useCache: true, @@ -67,6 +70,18 @@ export class PuppeteerExtraPluginAdblocker extends PuppeteerExtraPlugin { await fs.writeFile(this.engineCacheFile, blocker.serialize()) } + /** + * Return a list of exceptions to the blocking rules. + */ + private getExceptions(): NetworkFilter[] { + let list: RegExp[] = this.opts.allowlist || []; + return list + // Turn the list of regexps into an array of `NetworkFilter` instances. + .map((regex) => NetworkFilter.parse("@@" + regex.toString())) + // Remove any invalid filters. + .filter((filter): filter is NetworkFilter => filter !== undefined) + } + /** * Initialize instance of `PuppeteerBlocker` from cache if possible. * Otherwise, it throws and we will try to initialize it from remote instead. @@ -118,6 +133,9 @@ export class PuppeteerExtraPluginAdblocker extends PuppeteerExtraPlugin { await this.persistToCache(this.blocker) } } + this.blocker.update({ + newNetworkFilters: this.getExceptions(), + }) return this.blocker } @@ -149,7 +167,7 @@ export class PuppeteerExtraPluginAdblocker extends PuppeteerExtraPlugin { */ async onPageCreated(page: any) { this.debug('onPageCreated') - ;(await this.getBlocker()).enableBlockingInPage(page) + ; (await this.getBlocker()).enableBlockingInPage(page) } } From f1c56f65952c858e36daaf7999c8e2adcc6fa6a6 Mon Sep 17 00:00:00 2001 From: Wyatt Stanke <47758296+Wyatt-Stanke@users.noreply.github.com> Date: Sat, 22 Jul 2023 15:40:48 -0400 Subject: [PATCH 2/2] Fix accidental formatting change --- packages/puppeteer-extra-plugin-adblocker/src/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/puppeteer-extra-plugin-adblocker/src/index.ts b/packages/puppeteer-extra-plugin-adblocker/src/index.ts index 6b7baaa9a..9d28042cb 100644 --- a/packages/puppeteer-extra-plugin-adblocker/src/index.ts +++ b/packages/puppeteer-extra-plugin-adblocker/src/index.ts @@ -167,7 +167,7 @@ export class PuppeteerExtraPluginAdblocker extends PuppeteerExtraPlugin { */ async onPageCreated(page: any) { this.debug('onPageCreated') - ; (await this.getBlocker()).enableBlockingInPage(page) + ;(await this.getBlocker()).enableBlockingInPage(page) } }