Skip to content

Commit

Permalink
unbound: fix healthcheck logging + added fail tolerance to checks (#6004
Browse files Browse the repository at this point in the history
)

* unbound: fix healthcheck logging to stdout + rewrote healthcheck logic

* compose: bump unbound tag

* unbound: fixed healthcheck logic
  • Loading branch information
DerLinkman authored Aug 13, 2024
1 parent b1c1e40 commit b26ccc2
Show file tree
Hide file tree
Showing 6 changed files with 154 additions and 58 deletions.
15 changes: 11 additions & 4 deletions data/Dockerfiles/unbound/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,17 @@ LABEL maintainer = "The Infrastructure Company GmbH <[email protected]>"
RUN apk add --update --no-cache \
curl \
bind-tools \
coreutils \
unbound \
bash \
openssl \
drill \
tzdata \
syslog-ng \
supervisor \
&& curl -o /etc/unbound/root.hints https://www.internic.net/domain/named.cache \
&& chown root:unbound /etc/unbound \
&& adduser unbound tty \
&& adduser unbound tty \
&& chmod 775 /etc/unbound

EXPOSE 53/udp 53/tcp
Expand All @@ -21,9 +24,13 @@ COPY docker-entrypoint.sh /docker-entrypoint.sh

# healthcheck (dig, ping)
COPY healthcheck.sh /healthcheck.sh
COPY syslog-ng.conf /etc/syslog-ng/syslog-ng.conf
COPY supervisord.conf /etc/supervisor/supervisord.conf
COPY stop-supervisor.sh /usr/local/sbin/stop-supervisor.sh

RUN chmod +x /healthcheck.sh
HEALTHCHECK --interval=30s --timeout=30s CMD [ "/healthcheck.sh" ]
HEALTHCHECK --interval=30s --timeout=10s \
CMD sh -c '[ -f /tmp/healthcheck_status ] && [ "$(cat /tmp/healthcheck_status)" -eq 0 ] || exit 1'

ENTRYPOINT ["/docker-entrypoint.sh"]

CMD ["/usr/sbin/unbound"]
CMD exec /usr/bin/supervisord -c /etc/supervisor/supervisord.conf
132 changes: 79 additions & 53 deletions data/Dockerfiles/unbound/healthcheck.sh
Original file line number Diff line number Diff line change
@@ -1,76 +1,102 @@
#!/bin/bash

# Skip Unbound (DNS Resolver) Healthchecks (NOT Recommended!)
if [[ "${SKIP_UNBOUND_HEALTHCHECK}" =~ ^([yY][eE][sS]|[yY])+$ ]]; then
SKIP_UNBOUND_HEALTHCHECK=y
fi

# Reset logfile
echo "$(date +"%Y-%m-%d %H:%M:%S"): Starting health check - logs can be found in /var/log/healthcheck.log"
echo "$(date +"%Y-%m-%d %H:%M:%S"): Starting health check" > /var/log/healthcheck.log
STATUS_FILE="/tmp/healthcheck_status"
RUNS=0

# Declare log function for logfile inside container
function log_to_file() {
echo "$(date +"%Y-%m-%d %H:%M:%S"): $1" >> /var/log/healthcheck.log
# Declare log function for logfile to stdout
function log_to_stdout() {
echo "$(date +"%Y-%m-%d %H:%M:%S"): $1"
}

# General Ping function to check general pingability
function check_ping() {
declare -a ipstoping=("1.1.1.1" "8.8.8.8" "9.9.9.9")

for ip in "${ipstoping[@]}" ; do
ping -q -c 3 -w 5 "$ip"
if [ $? -ne 0 ]; then
log_to_file "Healthcheck: Couldn't ping $ip for 5 seconds... Gave up!"
log_to_file "Please check your internet connection or firewall rules to fix this error, because a simple ping test should always go through from the unbound container!"
return 1
fi
declare -a ipstoping=("1.1.1.1" "8.8.8.8" "9.9.9.9")
local fail_tolerance=1
local failures=0

for ip in "${ipstoping[@]}" ; do
success=false
for ((i=1; i<=3; i++)); do
ping -q -c 3 -w 5 "$ip" > /dev/null
if [ $? -eq 0 ]; then
success=true
break
else
log_to_stdout "Healthcheck: Failed to ping $ip on attempt $i. Trying again..."
fi
done

if [ "$success" = false ]; then
log_to_stdout "Healthcheck: Couldn't ping $ip after 3 attempts. Marking this IP as failed."
((failures++))
fi
done

if [ $failures -gt $fail_tolerance ]; then
log_to_stdout "Healthcheck: Too many ping failures ($fail_tolerance failures allowed, you got $failures failures), marking Healthcheck as unhealthy..."
return 1
fi

return 0

log_to_file "Healthcheck: Ping Checks WORKING properly!"
return 0
}

# General DNS Resolve Check against Unbound Resolver himself
function check_dns() {
declare -a domains=("mailcow.email" "github.com" "hub.docker.com")

for domain in "${domains[@]}" ; do
for ((i=1; i<=3; i++)); do
dig +short +timeout=2 +tries=1 "$domain" @127.0.0.1 > /dev/null
if [ $? -ne 0 ]; then
log_to_file "Healthcheck: DNS Resolution Failed on $i attempt! Trying again..."
if [ $i -eq 3 ]; then
log_to_file "Healthcheck: DNS Resolution not possible after $i attempts... Gave up!"
log_to_file "Maybe check your outbound firewall, as it needs to resolve DNS over TCP AND UDP!"
return 1
fi
declare -a domains=("fuzzy.mailcow.email" "github.com" "hub.docker.com")
local fail_tolerance=1
local failures=0

for domain in "${domains[@]}" ; do
success=false
for ((i=1; i<=3; i++)); do
dig_output=$(dig +short +timeout=2 +tries=1 "$domain" @127.0.0.1 2>/dev/null)
dig_rc=$?

if [ $dig_rc -ne 0 ] || [ -z "$dig_output" ]; then
log_to_stdout "Healthcheck: DNS Resolution Failed on attempt $i for $domain! Trying again..."
else
success=true
break
fi
done
done

log_to_file "Healthcheck: DNS Resolver WORKING properly!"
return 0

}
if [ "$success" = false ]; then
log_to_stdout "Healthcheck: DNS Resolution not possible after 3 attempts for $domain... Gave up!"
((failures++))
fi
done

if [[ ${SKIP_UNBOUND_HEALTHCHECK} == "y" ]]; then
log_to_file "Healthcheck: ALL CHECKS WERE SKIPPED! Unbound is healthy!"
exit 0
if [ $failures -gt $fail_tolerance ]; then
log_to_stdout "Healthcheck: Too many DNS failures ($fail_tolerance failures allowed, you got $failures failures), marking Healthcheck as unhealthy..."
return 1
fi

# run checks, if check is not returning 0 (return value if check is ok), healthcheck will exit with 1 (marked in docker as unhealthy)
check_ping
return 0
}

if [ $? -ne 0 ]; then
exit 1
fi
while true; do

check_dns
if [[ ${SKIP_UNBOUND_HEALTHCHECK} == "y" ]]; then
log_to_stdout "Healthcheck: ALL CHECKS WERE SKIPPED! Unbound is healthy!"
echo "0" > $STATUS_FILE
sleep 365d
fi

if [ $? -ne 0 ]; then
exit 1
fi
# run checks, if check is not returning 0 (return value if check is ok), healthcheck will exit with 1 (marked in docker as unhealthy)
check_ping
PING_STATUS=$?

check_dns
DNS_STATUS=$?

if [ $PING_STATUS -ne 0 ] || [ $DNS_STATUS -ne 0 ]; then
echo "1" > $STATUS_FILE

else
echo "0" > $STATUS_FILE
fi

sleep 30

log_to_file "Healthcheck: ALL CHECKS WERE SUCCESSFUL! Unbound is healthy!"
exit 0
done
10 changes: 10 additions & 0 deletions data/Dockerfiles/unbound/stop-supervisor.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash

printf "READY\n";

while read line; do
echo "Processing Event: $line" >&2;
kill -3 $(cat "/var/run/supervisord.pid")
done < /dev/stdin

rm -rf /tmp/healthcheck_status
32 changes: 32 additions & 0 deletions data/Dockerfiles/unbound/supervisord.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
[supervisord]
nodaemon=true
user=root
pidfile=/var/run/supervisord.pid

[program:syslog-ng]
command=/usr/sbin/syslog-ng --foreground --no-caps
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0
autostart=true

[program:unbound]
command=/usr/sbin/unbound
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0
autorestart=true

[program:unbound-healthcheck]
command=/bin/bash /healthcheck.sh
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0
autorestart=true

[eventlistener:processes]
command=/usr/local/sbin/stop-supervisor.sh
events=PROCESS_STATE_STOPPED, PROCESS_STATE_EXITED, PROCESS_STATE_FATAL
21 changes: 21 additions & 0 deletions data/Dockerfiles/unbound/syslog-ng.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
@version: 4.5
@include "scl.conf"
options {
chain_hostnames(off);
flush_lines(0);
use_dns(no);
use_fqdn(no);
owner("root"); group("adm"); perm(0640);
stats(freq(0));
keep_timestamp(no);
bad_hostname("^gconfd$");
};
source s_dgram {
unix-dgram("/dev/log");
internal();
};
destination d_stdout { pipe("/dev/stdout"); };
log {
source(s_dgram);
destination(d_stdout);
};
2 changes: 1 addition & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
services:

unbound-mailcow:
image: mailcow/unbound:1.22
image: mailcow/unbound:1.23
environment:
- TZ=${TZ}
- SKIP_UNBOUND_HEALTHCHECK=${SKIP_UNBOUND_HEALTHCHECK:-n}
Expand Down

0 comments on commit b26ccc2

Please sign in to comment.