Skip to content

Commit 603d4fe

Browse files
authored
Merge pull request #32 from Setono/create-pull-request/patch
Changes by create-pull-request action
2 parents b574005 + 1f2be25 commit 603d4fe

File tree

1 file changed

+93
-17
lines changed

1 file changed

+93
-17
lines changed

src/BotDetector/Bots.php

Lines changed: 93 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ final class Bots
2727
|AmorankSpider
2828
|ApacheBench
2929
|Applebot
30+
|iTMS
3031
|AppSignalBot
3132
|Arachni
3233
|AspiegelBot
@@ -40,6 +41,7 @@ final class Bots
4041
|BazQux
4142
|Better\sUptime\sBot
4243
|MSNBot|msrbot|bingbot|bingadsbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot
44+
|Blackbox\sExporter
4345
|Blekkobot
4446
|BLEXBot
4547
|Bloglovin
@@ -58,6 +60,8 @@ final class Bots
5860
|Cloudflare-SSLDetector
5961
|Cloudflare\sCustom\sHostname\sVerification
6062
|Cloudflare-Traffic-Manager
63+
|Cloudflare-Smart-Transit
64+
|CloudflareObservatory
6165
|https://developers\.cloudflare\.com/security-center/
6266
|coccoc\.com
6367
|collectd
@@ -72,14 +76,17 @@ final class Bots
7276
|Domain\sRe-Animator\sBot|support@domainreanimator\.com
7377
|DotBot
7478
|DuckDuck(?:Go-Favicons-)?Bot
79+
|DuckAssistBot
7580
|EasouSpider
7681
|eCairn-Grabber
7782
|EMail\sExractor
7883
|evc-batch
7984
|Exabot|ExaleadCloudview
8085
|ExactSeek\sCrawler
8186
|Ezooms
82-
|facebookexternalhit|facebookplatform|facebookexternalua|facebookcatalog
87+
|facebook(?:catalog|externalhit|externalua|platform|scraper)
88+
|meta-externalagent
89+
|meta-externalfetcher
8390
|FacebookBot/[\d.]+
8491
|Feedbin
8592
|FeedBurner
@@ -104,25 +111,32 @@ final class Bots
104111
|Google-Structured-Data-Testing-Tool
105112
|GoogleStackdriverMonitoring
106113
|Google-Transparency-Report
114+
|Google-CloudVertexBot
107115
|via\sggpht\.com\sGoogleImageProxy
116+
|Google-Document-Conversion
117+
|GoogleDocs;\sapps-spreadsheets
118+
|GoogleDocs;\sapps-presentations
119+
|GoogleDocs;
108120
|SeznamEmailProxy
109121
|Seznam-Zbozi-robot
110122
|Heurekabot-Feed
111123
|ShopAlike
124+
|deepcrawl\.com
112125
|Googlebot-News
113-
|Adwords-(?:DisplayAds|Express|Instant)|Google\sWeb\sPreview|Google[\s-]Publisher[\s-]Plugin|Google-(?:adstxt|Ads-Conversions|Ads-Qualify|Adwords|AMPHTML|Assess|Extended|HotelAdsVerifier|InspectionTool|Lens|PageRenderer|Read-Aloud|Safety|Shopping-Quality|Site-Verification|Sites-Thumbnails|speakr|Stale-Content-Probe|Test|Youtube-Links)|(?:AdsBot|APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google(?:-Mobile)?|Google(?:AdSenseInfeed|AssociationService|bot|Other|Prober|Producer|Sites)|Google.*/\+/web/snippet
126+
|Adwords-(?:DisplayAds|Express|Instant)|Google\sWeb\sPreview|Google[\s-]Publisher[\s-]Plugin|Google-(?:adstxt|Ads-Conversions|Ads-Qualify|Adwords|AMPHTML|Assess|Extended|HotelAdsVerifier|InspectionTool|Lens|PageRenderer|Read-Aloud|Shopping-Quality|Site-Verification|Sites-Thumbnails|speakr|Stale-Content-Probe|Test|Youtube-Links)|(?:AdsBot|APIs|Feedfetcher|Mediapartners)-Google(?:-Mobile)?|Google(?:AdSenseInfeed|AssociationService|bot|Other|Prober|Producer|Sites)|Google.*/\+/web/snippet
114127
|^Google$
128+
|Google-Safety
129+
|DuplexWeb-Google
115130
|Google-Area120-PrivacyPolicyFetcher
116-
|heritrix
117131
|HubSpot\s
118-
|vuhuvBot
132+
|vuhuv(?:Bot|RBT)
119133
|HTTPMon/[\d.]+
120134
|ICC-Crawler
121135
|inoreader\.com
122136
|iisbot
123137
|ips-agent
124138
|IP-Guide\.com
125-
|k6/[0-9\.]+
139+
|k6/[0-9.]+
126140
|kouio
127141
|larbin
128142
|[A-z0-9]*-Lighthouse
@@ -134,7 +148,7 @@ final class Bots
134148
|magpie-crawler
135149
|MagpieRSS
136150
|masscan-ng/[\d.]+
137-
|masscan
151+
|.*masscan
138152
|Mastodon/
139153
|meanpathbot
140154
|MetaJobBot
@@ -155,6 +169,7 @@ final class Bots
155169
|nlcrawler
156170
|Nmap\sScripting\sEngine
157171
|Nuzzel
172+
|NodePing
158173
|Octopus\s[0-9]
159174
|OnlineOrNot\.com_bot
160175
|omgili
@@ -174,7 +189,7 @@ final class Bots
174189
|Quora-Bot
175190
|RamblerMail
176191
|QuerySeekerSpider
177-
|Qwantify
192+
|Qwantify|Qwantbot
178193
|Rainmeter
179194
|redditbot
180195
|Riddler
@@ -185,6 +200,7 @@ final class Bots
185200
|Screaming\sFrog\sSEO\sSpider
186201
|ScreenerBot
187202
|SemrushBot
203+
|BacklinksExtendedBot
188204
|SerpReputationManagementAgent/[\d.]+
189205
|SplitSignalBot
190206
|SiteAuditBot/[\d.]+
@@ -206,7 +222,6 @@ final class Bots
206222
|Sogou[\s-](?:head|inst|Orion|Pic|Test|web)[\s-]spider|New-Sogou-Spider
207223
|Sosospider|Sosoimagespider
208224
|Sprinklr
209-
|sqlmap/
210225
|SSL\sLabs
211226
|StatusCake
212227
|Superfeedr\sbot
@@ -252,6 +267,8 @@ final class Bots
252267
|websitepulse[+\s]checker
253268
|WordPress.+isitwp\.com
254269
|Automattic\sAnalytics\sCrawler/[\d.]+
270+
|WordPress\.com\smShots
271+
|wp\.com\sfeedbot
255272
|WordPress
256273
|Wotbox
257274
|XenForo
@@ -332,6 +349,7 @@ final class Bots
332349
|Startpagina-Linkchecker
333350
|MoodleBot-Linkchecker
334351
|GTmetrix
352+
|CyberFind\s?Crawler
335353
|Nutch
336354
|Seobility
337355
|Vercelbot
@@ -346,7 +364,7 @@ final class Bots
346364
|Barkrowler
347365
|BDCbot
348366
|adbeat
349-
|BW/[\d.]+
367+
|(?:BuiltWith|BW)/[\d.]+
350368
|https://whatis\.contentkingapp\.com
351369
|MicroAdBot
352370
|PingAdmin\.Ru
@@ -403,6 +421,7 @@ final class Bots
403421
|seolyt/[\d.]+
404422
|YaK/[\d.]+
405423
|KomodiaBot/[\d.]+
424+
|KStandBot/[\d.]+
406425
|Neevabot/[\d.]+
407426
|LinkPreview/[\d.]+
408427
|JungleKeyThumbnail/[\d.]+
@@ -426,7 +445,7 @@ final class Bots
426445
|OnalyticaBot
427446
|deepnoc
428447
|Newslitbot/[\d.]+
429-
|um-LN/[\d.]+
448+
|um-(?:ANS|CC|FC|IC|LN)/[\d.]+
430449
|Abonti/[\d.]+
431450
|collection@infegy\.com
432451
|HTTP\sBanner\sDetection\s\(https://security\.ipip\.net\)
@@ -452,6 +471,7 @@ final class Bots
452471
|CriteoBot/
453472
|PayPal\sIPN
454473
|MaCoCu
474+
|CLASSLA
455475
|dnt-policy@eff\.org
456476
|InfoTigerBot
457477
|(?:Birdcrawlerbot|CrawlaDeBot)
@@ -477,7 +497,7 @@ final class Bots
477497
|Sublinq
478498
|Gregarius/[\d.]+
479499
|COMODO\sDCV
480-
|Sectigo\sDCV
500+
|Sectigo\sDCV|acme\.sectigo\.com
481501
|KlarnaBot-(?:DownloadProductImage|EnrichProducts|PriceWatcher)/[\d.]+
482502
|Taboolabot/[\d.]+
483503
|Asana/[\d.]+
@@ -502,14 +522,14 @@ final class Bots
502522
|AdsTxtCrawler/[\d.]+
503523
|Morningscore
504524
|Uptime-Kuma/[\d.]+
525+
|OAI-SearchBot
526+
|GPTBot/[\d.]+
505527
|ChatGPT-User
506528
|BrightEdge\sCrawler/[\d.]+
507529
|sfFeedReader/[\d.]+
508530
|cyberscan\.io
509-
|deepcrawl\.com
510531
|researchscan\.comsys\.rwth-aachen\.de
511532
|newspaper/[\d.]+
512-
|GPTBot/[\d.]+
513533
|Ant(?:\.com\sbeta|Bot)(?:/([\d+.]+))?
514534
|WebwikiBot/[\d.]+
515535
|phpMyAdmin
@@ -518,7 +538,7 @@ final class Bots
518538
|ArchiveTeam\sArchiveBot
519539
|MADBbot/[\d.]+
520540
|MeltwaterNews
521-
|(?:Owler@ows\.eu|OWLer)/[\d.]+
541+
|owler
522542
|bbc\.co\.uk/display/men/Page\+Monitor
523543
|BBC-Forge-URL-Monitor-Twisted
524544
|ClaudeBot
@@ -528,6 +548,7 @@ final class Bots
528548
|DaspeedBot/([\d+.]+)
529549
|StractBot(?:/([\d+.]+))?
530550
|GeedoBot(?:/([\d+.]+))?
551+
|GeedoProductSearch
531552
|BackupLand(?:/([\d+.]+))?
532553
|Konturbot(?:/([\d+.]+))?
533554
|keys-so-bot
@@ -585,7 +606,6 @@ final class Bots
585606
|Monsidobot/[\d.]+
586607
|AccompanyBot
587608
|Ghost\sInspector
588-
|Cypress/[\d.]+
589609
|Google-Apps-Script
590610
|SiteOne-Crawler/[\d.]+
591611
|Detectify
@@ -619,7 +639,63 @@ final class Bots
619639
|abuse\.xmco\.fr
620640
|CheckHost
621641
|LAC_IAHarvester/[\d.]+
622-
|nuhk|grub-client|Download\sDemon|SearchExpress|Microsoft\sURL\sControl|borg|altavista|dataminr\.com|teoma|oegp|http%20client|htdig|mogimogi|larbin|scrubby|searchsight|semanticdiscovery|snappy|vortex(?!(?:\sBuild|Plus))|zeal(?!ot)|dataparksearch|findlinks|BrowserMob|URL2PNG|ZooShot|GomezA|Google\sSketchUp|Read%20Later|7Siters|centuryb\.o\.t9|InterNaetBoten|EasyBib\sAutoCite|Bidtellect|tomnomnom/meg|cortex|Re-re\sStudio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle\sClient|Hello,?\sworld|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M\sCODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux\sGnu\s\(cow\)|Test\sCertificate\sInfo|iplabel|Magellan|TheSafex?Internetx?Search|Searcherweb|kirkland-signature|LinkChain|survey-security-dot-txt|^xenu|^ZmEu|^(?:chrome|firefox|Zeus)$
623-
|[a-z0-9_-]*(?:(?<!cu|power[\s_]|m[\s_])bot(?![\s_]TAB|[\s_]?5[0-9]|[\s_]Senior|[\s_]Junior)|analyzer|appengine|archiver?|checker|collector|crawl|crawler|fetcher|indexer|inspector|monitor|project(?!or)|(?<!Google\sWap\s)proxy|research|resolver|robots|scanner|scraper|script|searcher|(?<!-)security|spider|study|transcoder|uptime|user[\s_]?agent|validator)(?:[^a-z]|$)
642+
|InsytfulBot/[\d.]+
643+
|statista\.com
644+
|SubstackContentFetch/[\d.]+
645+
|^ds9
646+
|LiveJournal\.com
647+
|bitdiscovery
648+
|Castopod/[\d.]+
649+
|Elastic/Synthetics
650+
|WDG_Validator/[\d.]+
651+
652+
|CrawlyProjectCrawler/[\d.]+
653+
|BDFetch
654+
|PunkMap
655+
|GenomeCrawlerd/[\d.]+
656+
|Gaisbot/[\d.]+
657+
|FAST-WebCrawler/[\d.]+
658+
|ducks\.party
659+
|DepSpid/[\d.]+
660+
|Website-info\.net
661+
|RedekenBot
662+
|semaltbot
663+
|MakeMerryBot
664+
|Timpibot
665+
|Validbot
666+
|NPBot
667+
|domaincodex\.com
668+
|Swisscows\sFavicons
669+
|leak\.info
670+
|workona
671+
|Bloglines
672+
|heritrix
673+
|search\.marginalia\.nu
674+
|vu-server-health-scanner/[\d.]+
675+
|Functionize
676+
|Prerender
677+
|bl\.uk_ldfc_bot
678+
|Miniature\.io
679+
|Convertify
680+
|ZoteroTranslationServer
681+
|MuckRack
682+
|Golfe
683+
|SpiderLing
684+
|Bravebot
685+
|1001FirmsBot
686+
|SteamChatURLLookup
687+
|ohdear\.app
688+
|Inspici
689+
|peer39_crawler
690+
|Pandalytics
691+
|CloudServerMarketSpider
692+
|Pigafetta
693+
|Cotoyogi
694+
|SuggestBot
695+
|cms-experiment
696+
|SiteCheckerBotCrawler
697+
|SBIder
698+
|nuhk|grub-client|Download\sDemon|SearchExpress|Microsoft\sURL\sControl|borg|altavista|dataminr\.com|teoma|oegp|http%20client|htdig|mogimogi|larbin|scrubby|searchsight|semanticdiscovery|snappy|vortex(?!(?:\sBuild|Plus|\sCM62|\sHD65))|zeal(?!ot)|dataparksearch|findlinks|BrowserMob|URL2PNG|ZooShot|GomezA|Google\sSketchUp|Read%20Later|7Siters|centuryb\.o\.t9|InterNaetBoten|EasyBib\sAutoCite|Bidtellect|tomnomnom/meg|cortex|Re-re\sStudio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle\sClient|Hello,?\sworld|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M\sCODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux\sGnu\s\(cow\)|Test\sCertificate\sInfo|iplabel|Magellan|TheSafex?Internetx?Search|Searcherx?web|kirkland-signature|LinkChain|survey-security-dot-txt|infrawatch|Time/|r00ts3c-owned-you|nvdorz|Root\sSlut|NiggaBalls|BotPoke|GlobalWebSearch|xx032_bo9vs83_2a|sslshed|geckotrail|Wordup|Keydrop|^xenu|^(?:chrome|firefox|Abcd|Dark|KvshClient|Node.js|Report\sRunner|url|Zeus|ZmEu)$
699+
|[a-z0-9_-]*(?:(?<!cu|power[\s_]|m[\s_])bot(?![\s_]TAB|[\s_]?5[0-9]|[\s_]Senior|[\s_]Junior)|analyzer|appengine|archiver?|checker|collector|crawl|crawler|(?<!node-|uclient-|Mikrotik/\d\.[x\d]\s|electron-)fetch(?:er)?|indexer|inspector|monitor|(?<!Microsoft\s|banshee-)project(?!or)|(?<!Google\sWap\s|Blue\s|SpeedMode;\s)proxy|(?<!P)research|resolver|robots|(?<!Cam)scanner|scraper|script|searcher|(?<!-)security|spider(?!\s8)|study|transcoder|uptime|user[\s_]?agent|validator)(?:[^a-z]|$)
624700
#x";
625701
}

0 commit comments

Comments
 (0)