Skip to content

Commit cf3c5a5

Browse files
committed
fix(list): Repair PhishTank filter
1 parent 9d5bbb4 commit cf3c5a5

File tree

5 files changed

+19
-5
lines changed

5 files changed

+19
-5
lines changed

data/v2/manifest.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8378,7 +8378,7 @@
83788378
"checksums": {},
83798379
"content": {
83808380
"filter": "NONE",
8381-
"retriever": "LYNX",
8381+
"retriever": "ARIA2",
83828382
"type": "CSV"
83838383
},
83848384
"formats": [

docker/Dockerfile

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,10 +110,10 @@ USER root
110110
WORKDIR /root
111111

112112
LABEL maintainer="T145" \
113-
version="7.1.1" \
113+
version="7.1.3" \
114114
description="Runs the \"Black Mirror\" project! Check it out GitHub!" \
115115
org.opencontainers.image.created="2025-11-01" \
116-
org.opencontainers.image.revision="7.1.1" \
116+
org.opencontainers.image.revision="7.1.3" \
117117
org.opencontainers.image.source="https://github.com/T145/black-mirror" \
118118
org.opencontainers.image.url="https://github.com/T145/black-mirror" \
119119
org.opencontainers.image.vendor="T145" \
@@ -194,6 +194,7 @@ RUN apk add --no-cache --virtual .perl-build build-base curl gnutar xz perl-util
194194
make -j"$(nproc)"; \
195195
TEST_JOBS="$(nproc)" make test_harness || true; \
196196
make install; \
197+
ln -sf /usr/local/bin/perl /usr/bin/perl; \
197198
apk del .perl-build
198199

199200
WORKDIR /usr/src

logs/aria2.log

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
2026-01-21 20:58:00.305701 [NOTICE] [Context.cc:310] Downloading 1 item(s)
2+
2026-01-21 20:58:00.313030 [NOTICE] [ServerStatMan.cc:230] ServerStat file logs/server-stats.txt loaded successfully.
3+
2026-01-21 20:58:00.388268 [NOTICE] [HttpResponse.cc:168] CUID#7 - Redirecting to https://cdn.phishtank.com/datadumps/verified_online.csv?Expires=1769028790&Signature=MKUQigJidNgZ397xQp0ax9jfTmYX3-8VdCJ400GIiUQ3Ic7iff5wKevZv42c7w-wxADHbAVemYavMvBWecUWXmVWnJrS3m2VSCP7ctO6KdsFO76OohBQGesdtQuL2Ont4K~Teqo4QTrNINoEAkcftRsjD6hSdwFuZT-raJKYvmY9GVlaQlVZmJG27wXITFoud9edU7p5I091YzVYKyWOgkcr3B788vY35Eju7OSjVZPfA1~kNT0PJ1k0EpI9JRo8q4p46DQZ~AYowqIBJb9TWmNOSQJgIDzbFRHWDfZwrLeARCjPi11RBk~CqnimBicUswmYoFUwVZBR5xVnTjf90g__&Key-Pair-Id=APKAILB45UG3RB4CSOJA
4+
2026-01-21 20:58:00.740747 [NOTICE] [RequestGroup.cc:1214] Download complete: /tmp/phishtank_aria2.csv
5+
2026-01-21 20:58:00.744948 [NOTICE] [ServerStatMan.cc:110] ServerStat file logs/server-stats.txt saved successfully.
6+
2026-01-21 20:58:15.776673 [NOTICE] [Context.cc:310] Downloading 1 item(s)
7+
2026-01-21 20:58:15.782912 [NOTICE] [ServerStatMan.cc:230] ServerStat file logs/server-stats.txt loaded successfully.
8+
2026-01-21 20:58:15.984072 [ERROR] [AbstractCommand.cc:401] CUID#7 - Download aborted. URI=https://phishstats.info/phish_score.csv
9+
Exception: [AbstractCommand.cc:403] errorCode=4 URI=https://phishstats.info/phish_score.csv
10+
-> [RequestGroup.cc:1266] errorCode=4 Reached max-file-not-found count=1
11+
2026-01-21 20:58:15.984263 [NOTICE] [RequestGroupMan.cc:424] Download GID#049ba6c9e563a68f not complete: /tmp/phishstats.csv
12+
2026-01-21 20:58:15.987903 [NOTICE] [ServerStatMan.cc:110] ServerStat file logs/server-stats.txt saved successfully.

logs/server-stats.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
host=data.phishtank.com, protocol=https, dl_speed=6003128, sc_avg_speed=6003128, mc_avg_speed=0, last_updated=1769029080, counter=1, status=OK

scripts/v2/apply_filters.bash

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -212,8 +212,8 @@ process_list() {
212212
'WATCHLIST_INTERNET') mlr --mmap --csv --ifs ';' -N cut -f 1 ;;
213213
'CRUZ_IT') mlr --mmap --csv --headerless-csv-output clean-whitespace then cut -f ip_address ;;
214214
# PhishTank often includes large hashes in its URLs, which may lead to parsing issues.
215-
# Therefore use regex that ignores paths and queries after the domain.
216-
'PHISHTANK') mlr --mmap --csv --headerless-csv-output --lazy-quotes put -S '$url =~ "https?://([^/]+)"; $Domain = "\1"' then cut -f Domain ;;
215+
# Therefore use a robust regex that ignores paths and queries after the domain.
216+
'PHISHTANK') mlr --mmap --csv --headerless-csv-output --lazy-quotes --skip-comments clean-whitespace then cut -f url | get_domains_from_urls ;;
217217
'BLOCKLIST_UA') mlr --mmap --csv --headerless-csv-output --ifs ';' cut -f IP ;;
218218
# The C2 feed has malformed CSVs.
219219
'THREATVIEW_C2_HOSTS') mawk -F, '/^[^#]/{print $3}' ;;

0 commit comments

Comments
 (0)