Skip to content

Commit 9df0d46

Browse files
committed
Add periodic tests for batch
1 parent b1dde07 commit 9df0d46

File tree

9 files changed

+1341
-13
lines changed

9 files changed

+1341
-13
lines changed

docker/batch-test.env

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,5 +115,6 @@ CRON_WEEKLY_POSTGRESQL_BACKUP=True
115115

116116
# selftest runs against public domain (example.(nl|com)) which will never work in the test environment
117117
CRON_15MIN_RUN_TESTS=False
118+
CRON_15MIN_RUN_TESTS_BATCH=False
118119

119120
INTERNETNL_BRANDING=True
Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
#!/usr/bin/env python3
2+
3+
# run tests on example domains and write metrics to prometheus textfile
4+
5+
# for iterative development
6+
# docker run -ti -e INTERNETNL_DOMAINNAME=internet.nl -v $PWD/docker/cron/periodic/15min/tests.py:/tests.py \
7+
# ghcr.io/internetstandards/cron:latest /tests.py --debug
8+
9+
import sys
10+
import os
11+
import time
12+
from prometheus_client import REGISTRY, Gauge, generate_latest
13+
import prometheus_client
14+
import logging
15+
import requests
16+
import datetime
17+
18+
log = logging.getLogger(__name__)
19+
20+
DEBUG = "--debug" in sys.argv
21+
22+
# file to write metrics to https://github.com/prometheus/node_exporter?tab=readme-ov-file#textfile-collector
23+
OUTPUT_TEXTFILE = "/prometheus-textfile-directory/tests-batch.prom"
24+
25+
26+
BATCH_REQUEST_TIMEOUT = 60 * 5
27+
REQUEST_TIMEOUT = 30
28+
29+
REQUEST_TYPES = ["web", "mail"]
30+
31+
IPV4_IP_APP_INTERNAL = os.environ.get("IPV4_IP_APP_INTERNAL")
32+
INTERNETNL_DOMAINNAME = os.environ.get("INTERNETNL_DOMAINNAME")
33+
# talk directly to the internal app container as the webserver might
34+
# have access restrictions in place
35+
URL_BASE = f"http://{IPV4_IP_APP_INTERNAL}:8080"
36+
HEADERS = {"Host": INTERNETNL_DOMAINNAME}
37+
38+
TEST_DOMAINS = {
39+
# domain's to use in website tests
40+
"web": [
41+
"internet.nl",
42+
"example.nl",
43+
"example.com",
44+
"internetsociety.org",
45+
"ripe.net",
46+
"surf.nl",
47+
"ecp.nl",
48+
"forumstandaardisatie.nl",
49+
"minez.nl",
50+
],
51+
# domain's to use in mail tests
52+
"mail": [
53+
"internetsociety.org",
54+
"ripe.net",
55+
"surf.nl",
56+
"ecp.nl",
57+
# these are currently really slow and will probably improve when
58+
# we switch to sslyze, for now disable these in monitoring
59+
# "internet.nl",
60+
# "forumstandaardisatie.nl",
61+
# "minez.nl",
62+
],
63+
}
64+
65+
METRIC_BATCH_RUN = Gauge("tests_batch_run_total", "Batch requests that have been run.", ["request_type"])
66+
METRIC_BATCH_SUCCESS = Gauge("tests_batch_success_total", "Batch requests runs that succeeded.", ["request_type"])
67+
METRIC_BATCH_FAILURE = Gauge("tests_batch_failure_total", "Batch requests runs that failed.", ["request_type"])
68+
METRIC_BATCH_TIMEOUT = Gauge("tests_batch_timeout_total", "Batch requests that ran into timeout.", ["request_type"])
69+
METRIC_BATCH_RUNTIME = Gauge(
70+
"tests_batch_runtime_seconds", "Amount of time batch request ran before done.", ["request_type"]
71+
)
72+
METRIC_BATCH_STAGE_RUNTIME = Gauge(
73+
"tests_batch_stage_runtime_seconds", "Amount of time each stage in batch request took.", ["request_type", "stage"]
74+
)
75+
76+
METRIC_BATCH_DOMAIN = Gauge("tests_batch_domain_total", "Amount of domains batch request.", ["request_type", "domain"])
77+
78+
METRIC_BATCH_DOMAIN_SUCCESS = Gauge(
79+
"tests_batch_domain_success",
80+
"Amount of successful domain tests in batch request per domain.",
81+
["request_type", "domain"],
82+
)
83+
METRIC_BATCH_DOMAIN_SCORE = Gauge(
84+
"tests_batch_domain_score", "Per domain test scores for batch request.", ["request_type", "domain"]
85+
)
86+
87+
METRIC_BATCH_DOMAIN_CATEGORIES = Gauge(
88+
"tests_batch_domain_categories",
89+
"Domain verdict and status per category.",
90+
["request_type", "domain", "category", "verdict", "status"],
91+
)
92+
93+
METRIC_BATCH_DOMAIN_TESTS = Gauge(
94+
"tests_batch_domain_tests",
95+
"Domain verdict and status per test.",
96+
["request_type", "domain", "test", "verdict", "status"],
97+
)
98+
99+
100+
def wait_for_request_status(url: str, expected_status: list[str], timeout: int = 10, interval: int = 1, auth=None):
101+
"""Poll url and parse JSON for request.status, return if value matches expected status or
102+
fail when timeout expires."""
103+
104+
log.debug("waiting for status: %s", expected_status)
105+
106+
max_tries = int(timeout / interval)
107+
108+
tries = 0
109+
status = "n/a"
110+
while tries < max_tries:
111+
status_response = requests.get(url, auth=auth, headers=HEADERS)
112+
status_response.raise_for_status()
113+
114+
log.debug(status_response.text)
115+
status_data = status_response.json()
116+
status: str = status_data["request"]["status"]
117+
if status in expected_status:
118+
break
119+
time.sleep(interval)
120+
tries += 1
121+
else:
122+
raise TimeoutError(f"request status never reached '{str(expected_status)}' states, current state: '{status}'")
123+
124+
125+
def run_test_batch(request_type: str, domains: list[str]):
126+
request_data = {"type": "web", "domains": domains, "name": f"periodic test {str(datetime.datetime.now())}"}
127+
128+
auth = ("periodic_tests", "periodic_tests")
129+
api_url: str = URL_BASE + "/api/batch/v2/"
130+
131+
test_start = int(time.time())
132+
133+
# start batch request
134+
register_response = requests.post(api_url + "requests", json=request_data, auth=auth, headers=HEADERS)
135+
register_response.raise_for_status()
136+
log.debug(register_response.text)
137+
138+
# get test_id from register data
139+
register_data = register_response.json()
140+
test_id: str = register_data["request"]["request_id"]
141+
142+
# wait for batch tests to start
143+
wait_for_request_status(
144+
api_url + "requests/" + test_id, ["running", "generating", "done"], timeout=BATCH_REQUEST_TIMEOUT, auth=auth
145+
)
146+
registering_time = int(time.time()) - test_start
147+
METRIC_BATCH_STAGE_RUNTIME.labels(request_type, "registering").set(registering_time)
148+
149+
# wait for batch tests to complete and report to be generated
150+
wait_for_request_status(
151+
api_url + "requests/" + test_id, ["generating", "done"], timeout=BATCH_REQUEST_TIMEOUT, auth=auth
152+
)
153+
running_time = int(time.time()) - test_start - registering_time
154+
METRIC_BATCH_STAGE_RUNTIME.labels(request_type, "running").set(running_time)
155+
156+
# wait for report generation and batch to be done
157+
wait_for_request_status(api_url + "requests/" + test_id, ["done"], timeout=BATCH_REQUEST_TIMEOUT, auth=auth)
158+
generating_time = int(time.time()) - test_start - running_time
159+
METRIC_BATCH_STAGE_RUNTIME.labels(request_type, "generating").set(generating_time)
160+
161+
# get batch results
162+
results_response = requests.get(api_url + "requests/" + test_id + "/results", auth=auth, headers=HEADERS)
163+
results_response.raise_for_status()
164+
log.debug(results_response.text)
165+
166+
results_response_data = results_response.json()
167+
168+
METRIC_BATCH_RUNTIME.labels(request_type).set(int(time.time() - test_start))
169+
METRIC_BATCH_SUCCESS.labels(request_type).set(1 if results_response_data["request"]["status"] == "done" else 0)
170+
171+
for domain, results in results_response_data["domains"].items():
172+
METRIC_BATCH_DOMAIN.labels(request_type, domain).set(1)
173+
METRIC_BATCH_DOMAIN_SUCCESS.labels(request_type, domain).set(1 if results["status"] == "ok" else 0)
174+
METRIC_BATCH_DOMAIN_SCORE.labels(request_type, domain).set(results["scoring"]["percentage"])
175+
176+
for category, result in results["results"]["categories"].items():
177+
METRIC_BATCH_DOMAIN_CATEGORIES.labels(
178+
request_type, domain, category, result["verdict"], result["status"]
179+
).inc(1)
180+
181+
for test, result in results["results"]["tests"].items():
182+
METRIC_BATCH_DOMAIN_TESTS.labels(request_type, domain, test, result["verdict"], result["status"]).inc(1)
183+
184+
185+
def run_batch_tests():
186+
for request_type in REQUEST_TYPES:
187+
domains = TEST_DOMAINS[request_type]
188+
log.info(f"testing: {request_type} {domains}")
189+
190+
METRIC_BATCH_RUN.labels(request_type).set(1)
191+
METRIC_BATCH_FAILURE.labels(request_type).set(0)
192+
METRIC_BATCH_TIMEOUT.labels(request_type).set(0)
193+
METRIC_BATCH_SUCCESS.labels(request_type).set(0)
194+
try:
195+
run_test_batch(request_type, domains)
196+
197+
except Exception:
198+
log.exception("Error during test")
199+
METRIC_BATCH_FAILURE.labels(request_type).set(1)
200+
201+
202+
def main():
203+
logging.basicConfig(level=logging.DEBUG if DEBUG else logging.ERROR)
204+
205+
# disable internal metrics
206+
REGISTRY.unregister(prometheus_client.GC_COLLECTOR)
207+
REGISTRY.unregister(prometheus_client.PLATFORM_COLLECTOR)
208+
REGISTRY.unregister(prometheus_client.PROCESS_COLLECTOR)
209+
210+
# run test probes against domains and collect metrics
211+
run_batch_tests()
212+
213+
# write metrics to stdout or file in prometheus textfile format
214+
if DEBUG:
215+
print(generate_latest(REGISTRY).decode())
216+
else:
217+
with open(OUTPUT_TEXTFILE, "w") as f:
218+
f.write(generate_latest(REGISTRY).decode())
219+
220+
221+
if __name__ == "__main__" and os.environ.get("CRON_15MIN_RUN_TESTS_BATCH", "False") == "True":
222+
main()

docker/defaults.env

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,9 @@ CRON_WEEKLY_POSTGRESQL_BACKUP=False
240240
# enable running tests every 15 minutes for metrics collection
241241
CRON_15MIN_RUN_TESTS=True
242242

243+
# enable running batch tests every 15 minutes for metrics collection, enable in local.env for batch deployments
244+
CRON_15MIN_RUN_TESTS_BATCH=False
245+
243246
# enables internet.nl specific content (eg: contact information, faq, security.txt), only enable for internet.nl
244247
# instances. For customization see: documentation/Customize.md
245248
INTERNETNL_BRANDING=False

docker/develop.env

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ LOGGING_DRIVER=json-file
6464
CRON_DAILY_POSTGRESQL_BACKUP=False
6565
CRON_WEEKLY_POSTGRESQL_BACKUP=False
6666
CRON_15MIN_RUN_TESTS=False
67+
CRON_15MIN_RUN_TESTS_BATCH=False
6768

6869
INTERNETNL_BRANDING=False
6970

docker/docker-compose.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -681,6 +681,7 @@ services:
681681
- DB_PASSWORD=password
682682
- CRON_DAILY_POSTGRESQL_BACKUP
683683
- CRON_WEEKLY_POSTGRESQL_BACKUP
684+
- CRON_15MIN_RUN_TESTS_BATCH
684685
- IPV4_IP_APP_INTERNAL
685686
- INTERNETNL_DOMAINNAME
686687
- INTERNETNL_CACHE_TTL
@@ -708,6 +709,7 @@ services:
708709
- postgres-backups:/var/lib/postgresql/backups
709710
- nginx-logs-exporter:/var/log/nginx/prometheus-nginxlog-exporter/
710711
- prometheus-textfile-directory:/prometheus-textfile-directory
712+
# - ./cron/periodic:/etc/periodic
711713

712714
healthcheck:
713715
test: ["CMD", "pgrep", "crond"]

0 commit comments

Comments
 (0)