10
10
11
11
final class Bots
12
12
{
13
- public const REGEX = "#monitoring360bot
13
+ public const REGEX = "#WireReaderBot(?:/([\d+.]+))?
14
+ |monitoring360bot
14
15
|Cloudflare-Healthchecks
15
16
|360Spider
16
17
|Aboundex
@@ -108,8 +109,10 @@ final class Bots
108
109
|Seznam-Zbozi-robot
109
110
|Heurekabot-Feed
110
111
|ShopAlike
111
- |Adwords-(?:DisplayAds|Express|Instant)|Google\sWeb\sPreview|Google[\s-]Publisher[\s-]Plugin|Google-(?:Ads-Conversions|Ads-Qualify|Adwords|AMPHTML|Assess|Extended|HotelAdsVerifier|InspectionTool|PageRenderer|Read-Aloud|Safety|Shopping-Quality|Site-Verification|speakr|Stale-Content-Probe|Test|Youtube-Links)|(?:AdsBot|APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google(?:-Mobile)?|Google(?:AdSenseInfeed|AssociationService|bot|Other|Prober|Producer)|Google.*/\+/web/snippet
112
+ |Googlebot-News
113
+ |Adwords-(?:DisplayAds|Express|Instant)|Google\sWeb\sPreview|Google[\s-]Publisher[\s-]Plugin|Google-(?:adstxt|Ads-Conversions|Ads-Qualify|Adwords|AMPHTML|Assess|Extended|HotelAdsVerifier|InspectionTool|Lens|PageRenderer|Read-Aloud|Safety|Shopping-Quality|Site-Verification|Sites-Thumbnails|speakr|Stale-Content-Probe|Test|Youtube-Links)|(?:AdsBot|APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google(?:-Mobile)?|Google(?:AdSenseInfeed|AssociationService|bot|Other|Prober|Producer|Sites)|Google.*/\+/web/snippet
112
114
|^Google$
115
+ |Google-Area120-PrivacyPolicyFetcher
113
116
|heritrix
114
117
|HubSpot\s
115
118
|vuhuvBot
@@ -182,6 +185,9 @@ final class Bots
182
185
|Screaming\sFrog\sSEO\sSpider
183
186
|ScreenerBot
184
187
|SemrushBot
188
+ |SerpReputationManagementAgent/[\d.]+
189
+ |SplitSignalBot
190
+ |SiteAuditBot/[\d.]+
185
191
|SensikaBot
186
192
|SEOENG(?:World)?Bot
187
193
|SEOkicks-Robot
@@ -216,16 +222,16 @@ final class Bots
216
222
|TinEye-bot
217
223
|Tiny\sTiny\sRSS
218
224
|theoldreader\.com
219
- |Trackable/0.1
225
+ |Trackable/0\ .1
220
226
|trendictionbot
221
227
|TurnitinBot
222
- |TweetedTimes\sBot
228
+ |TweetedTimes
223
229
|TweetmemeBot
224
230
|Twingly\sRecon
225
231
|Twitterbot
226
232
|UniversalFeedParser
227
233
|via\ssecureurl\.fwdcdn\.com
228
- |Uptimebot
234
+ |Uptime(?:bot)?/[\d.]+
229
235
|UptimeRobot
230
236
|URLAppendBot
231
237
|Vagabondo
@@ -256,7 +262,9 @@ final class Bots
256
262
|YahooCacheSystem
257
263
|Y!J-BRW
258
264
|Y!J-WSC
259
- |Yandex(?:(?:\.Gazeta\s|Accessibility|Mobile|MobileScreenShot|RenderResources|Screenshot|Sprav)?Bot|(?:AdNet|Antivirus|Blogs|Calendar|Catalog|Direct|Favicons|ForDomain|ImageResizer|Images|Market|Media|Metrika|News|OntoDB(?:API)?|Pagechecker|Partner|RCA|SearchShop|(?:News|Site)links|Tracker|Turbo|Verticals|Vertis|Video|Webmaster))|YaDirectFetcher
265
+ |Y!J-ASR
266
+ |^Y!J
267
+ |Yandex(?:(?:\.Gazeta\s|Accessibility|Mobile|MobileScreenShot|RenderResources|Screenshot|Sprav)?Bot|(?:AdNet|Antivirus|Blogs|Calendar|Catalog|Direct|Favicons|ForDomain|ImageResizer|Images|Market|Media|Metrika|News|OntoDB(?:API)?|Pagechecker|Partner|RCA|SearchShop|(?:News|Site)links|Tracker|Turbo|Userproxy|Verticals|Vertis|Video|Webmaster))|YaDirectFetcher
260
268
|Yeti|NaverJapan|AdsBot-Naver
261
269
|YoudaoBot
262
270
|YOURLS\sv[0-9]
@@ -269,7 +277,8 @@ final class Bots
269
277
|.*Java.*outbrain
270
278
|HubPages.*crawlingpolicy
271
279
|Pinterest(?:bot)?/[\d.]+.*www\.pinterest\.com
272
- |Site24x7
280
+ |.*Site24x7
281
+ |.*\sHLB/[\d.]+
273
282
|s~snapchat-proxy
274
283
|Snap\sURL\sPreview\sService
275
284
|SnapchatAds/[\d.]+
@@ -358,6 +367,7 @@ final class Bots
358
367
|datagnionbot
359
368
|WhatCMS
360
369
|httpx
370
+ |.*\.oast\.
361
371
|scaninfo@(?:expanseinc|paloaltonetworks)\.com
362
372
|HuaweiWebCatBot
363
373
|Hatena-Favicon
@@ -453,7 +463,6 @@ final class Bots
453
463
|RenovateBot/[\d.]+
454
464
|INETDEX-BOT/[\d.]+
455
465
|NETZZAPPEN
456
- |SerpReputationManagementAgent/[\d.]+
457
466
|panscient\.com
458
467
|research@pdrlabs\.net
459
468
|Nicecrawler/[\d.]+
@@ -533,6 +542,8 @@ final class Bots
533
542
|MBCrawler
534
543
|mariadb-mysql-kbs-bot
535
544
|GitHubCopilotChat
545
+ |^pdrl\.fm
546
+ |PodUptime/
536
547
|anthropic-ai
537
548
|NetpeakCheckerBot/[\d.]+
538
549
|SandobaCrawler/[\d.]+
@@ -568,7 +579,47 @@ final class Bots
568
579
|ViberUrlDownloader
569
580
|^Zeno$
570
581
|Barracuda\sSentinel
571
- |nuhk|grub-client|Download\sDemon|SearchExpress|Microsoft\sURL\sControl|borg|altavista|dataminr\.com|tweetedtimes\.com|teoma|oegp|http%20client|htdig|mogimogi|larbin|scrubby|searchsight|semanticdiscovery|snappy|vortex(?!(?:\sBuild|Plus))|zeal(?!ot)|dataparksearch|findlinks|BrowserMob|URL2PNG|ZooShot|GomezA|Google\sSketchUp|Read%20Later|7Siters|centuryb\.o\.t9|InterNaetBoten|EasyBib\sAutoCite|Bidtellect|tomnomnom/meg|cortex|Re-re\sStudio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle\sClient|Hello,?\sworld|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|daumoa,damoa,daum,daumos,duamoa,duam,duamos|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M\sCODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux\sGnu\s\(cow\)|Test\sCertificate\sInfo|iplabel|Magellan|TheSafex?Internetx?Search|kirkland-signature|^xenu|^ZmEu|^(?:chrome|firefox|Zeus)$
572
- |[a-z0-9_-]*(?:(?<!cu|power[\s_]|m[\s_])bot(?![\s_]TAB|[\s_]?5[0-9]|[\s_]Senior|[\s_]Junior)|analyzer|appengine|archiver|checker|collector|crawl|crawler|fetcher|indexer|monitor|project(?!or)|research|resolver|robots|scraper|security|spider|study|transcoder|uptime|user[\s_]?agent|validator)(?:[^a-z]|$)
582
+ |RuxitSynthetic/[\d.]+
583
+ |DynatraceSynthetic/[\d.]+
584
+ |sitebulb
585
+ |Monsidobot/[\d.]+
586
+ |AccompanyBot
587
+ |Ghost\sInspector
588
+ |Cypress/[\d.]+
589
+ |Google-Apps-Script
590
+ |SiteOne-Crawler/[\d.]+
591
+ |Detectify
592
+ |DomCopBot
593
+ |Paqlebot/[\d.]+
594
+ |Wibybot
595
+ |Synapse
596
+ |OSZKbot/[\d.]+
597
+ |ZoomBot
598
+ |RavenCrawler/[\d.]+
599
+ |KadoBot
600
+ |Dubbotbot/[\d.]+
601
+ |Swiftbot/[\d.]+
602
+ |EyeMonIT
603
+ |ThousandEyes
604
+ |OmtrBot/[\d.]+
605
+ |WebMon/[\d.]+
606
+ |AdsTxtCrawlerTP/[\d.]+
607
+ |fragFINN
608
+ |Clickagy
609
+ |kiwitcms-gitops/[\d.]+
610
+ |webtru_crawler
611
+ |URLSuMaBot
612
+ |360JK\syunjiankong
613
+ |UCSBNetworkMeasurement
614
+ |Plesk\sscreenshot\sbot
615
+ |Who\.is
616
+ |Probely
617
+ |Uptimia(?:/[\d.]+)?
618
+ |2GDPR/[\d.]+
619
+ |abuse\.xmco\.fr
620
+ |CheckHost
621
+ |LAC_IAHarvester/[\d.]+
622
+ |nuhk|grub-client|Download\sDemon|SearchExpress|Microsoft\sURL\sControl|borg|altavista|dataminr\.com|teoma|oegp|http%20client|htdig|mogimogi|larbin|scrubby|searchsight|semanticdiscovery|snappy|vortex(?!(?:\sBuild|Plus))|zeal(?!ot)|dataparksearch|findlinks|BrowserMob|URL2PNG|ZooShot|GomezA|Google\sSketchUp|Read%20Later|7Siters|centuryb\.o\.t9|InterNaetBoten|EasyBib\sAutoCite|Bidtellect|tomnomnom/meg|cortex|Re-re\sStudio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle\sClient|Hello,?\sworld|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M\sCODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux\sGnu\s\(cow\)|Test\sCertificate\sInfo|iplabel|Magellan|TheSafex?Internetx?Search|Searcherweb|kirkland-signature|LinkChain|survey-security-dot-txt|^xenu|^ZmEu|^(?:chrome|firefox|Zeus)$
623
+ |[a-z0-9_-]*(?:(?<!cu|power[\s_]|m[\s_])bot(?![\s_]TAB|[\s_]?5[0-9]|[\s_]Senior|[\s_]Junior)|analyzer|appengine|archiver?|checker|collector|crawl|crawler|fetcher|indexer|inspector|monitor|project(?!or)|(?<!Google\sWap\s)proxy|research|resolver|robots|scanner|scraper|script|searcher|(?<!-)security|spider|study|transcoder|uptime|user[\s_]?agent|validator)(?:[^a-z]|$)
573
624
#x " ;
574
625
}
0 commit comments