File tree Expand file tree Collapse file tree 4 files changed +21
-5
lines changed
Expand file tree Collapse file tree 4 files changed +21
-5
lines changed Original file line number Diff line number Diff line change 11#! /bin/sh
22set -e
3- # find nassl worker and restart the container(s)
4- docker ps --filter label=com.docker.compose.service=worker-nassl --quiet | xargs --no-run-if-empty docker restart
3+ # stop and start worker one at a time to ensure (batch) tasks are still being picked up
4+ # workers are sent a TERM signal with which a 10 minute grace period before QUIT is sent
5+ for worker in $( docker ps --filter label=com.docker.compose.service=worker-nassl --quiet) ; do
6+ docker stop " $worker "
7+ docker start " $worker "
8+ # wait for container to be healthy
9+ timeout 300 sh -c " while docker inspect \" $worker \" | jq --exit-status '.[0].State.Health.Status != \" healthy\" ' >/dev/null;do sleep 1;done" || true
10+ done
Original file line number Diff line number Diff line change @@ -21,4 +21,4 @@ $COMPOSE_CMD up --no-deps --no-recreate --wait --scale="$SERVICE=$(($REPLICAS*2)
2121docker rm --force " $OLD_CONTAINERS "
2222
2323# restore replica number to original
24- $COMPOSE_CMD scale $SERVICE =$REPLICAS
24+ $COMPOSE_CMD up --no-deps --no-recreate --wait --scale= " $SERVICE =$REPLICAS " " $SERVICE "
Original file line number Diff line number Diff line change 11#! /bin/sh
22set -e
3- # find worker and restart the container(s)
4- docker ps --filter label=com.docker.compose.service=worker --quiet | xargs --no-run-if-empty docker restart
3+ # stop and start worker one at a time to ensure (batch) tasks are still being picked up
4+ # workers are sent a TERM signal with which a 10 minute grace period before QUIT is sent
5+ for worker in $( docker ps --filter label=com.docker.compose.service=worker --quiet) ; do
6+ docker stop " $worker "
7+ docker start " $worker "
8+ # wait for container to be healthy
9+ timeout 300 sh -c " while docker inspect \" $worker \" | jq --exit-status '.[0].State.Health.Status != \" healthy\" ' >/dev/null;do sleep 1;done" || true
10+ done
Original file line number Diff line number Diff line change @@ -249,6 +249,8 @@ services:
249249 # time after which a SIGKILL is sent to celery after a SIGTERM (warm shutdown), default 10s
250250 # insufficient short grace period causes issues on batch when tasks are killed during the hourly worker restart
251251 stop_grace_period : 10m
252+ # SIGTERM is default, but make it explicit
253+ stop_signal : SIGTERM
252254
253255 depends_on :
254256 db-migrate :
@@ -736,6 +738,8 @@ services:
736738 environment :
737739 - AUTO_UPDATE_TO
738740 - WORKER_SLOW_REPLICAS
741+ - WORKER_REPLICAS
742+ - RELEASE
739743
740744 restart : unless-stopped
741745 logging :
You can’t perform that action at this time.
0 commit comments