|
| 1 | +#!/usr/bin/env python3 |
| 2 | + |
| 3 | +"""A script to notify, on slack, when "manual" fastly deploys happen. |
| 4 | +
|
| 5 | +All of our fastly services are intended to be deployed via the |
| 6 | +fastly-deploy jenkins job. However, sometimes people deploy a fastly |
| 7 | +service manually, by going to the fastly UI, hitting "clone", making |
| 8 | +some changes, and hitting "save". We do not want such changes to |
| 9 | +happen, since there is no record of them in source control, and indeed |
| 10 | +the change will be overwritten the next time a "proper" deploy |
| 11 | +happens, via fastly-deploy. |
| 12 | +
|
| 13 | +This script notices when such deploys happen. It works in concert |
| 14 | +with the fastly-deploy job to do its work: fastly-deploy says what |
| 15 | +versions it has deployed, and we talk to the fastly API to find out |
| 16 | +what versions are live that were *not* deployed by fastly-deploy. |
| 17 | +When we see one, we alert in an alerting channel, and also in |
| 18 | +#whats-happening, which is our record of all changes that affect our |
| 19 | +production system. |
| 20 | +
|
| 21 | +This is meant to be run every minute or so, via cron. |
| 22 | +""" |
| 23 | +import collections |
| 24 | +import json |
| 25 | +import http.client |
| 26 | +import logging |
| 27 | +import os |
| 28 | +import subprocess |
| 29 | +import sys |
| 30 | + |
| 31 | +# jenkins-server has alertlib installed in /usr as part of setup.sh |
| 32 | +import alertlib |
| 33 | + |
| 34 | +_FASTLY_HOST = 'api.fastly.com' |
| 35 | + |
| 36 | +ServiceInfo = collections.namedtuple( |
| 37 | + "ServiceInfo", |
| 38 | + ("service_name", "service_id", |
| 39 | + "version", "is_active", "updated_at", "description"), |
| 40 | +) |
| 41 | + |
| 42 | +DeploysFileInfo = collections.namedtuple( |
| 43 | + "DeploysFileInfo", |
| 44 | + ("service_id", "version"), |
| 45 | +) |
| 46 | + |
| 47 | +# sync-start:fastly-deploys-file jobs/deploy-fastly.groovy |
| 48 | +_DATADIR = os.path.expanduser( |
| 49 | + "~jenkins/jobs/deploy/jobs/deploy-fastly/workspace") |
| 50 | +_GOOD_DEPLOYS_FILE = os.path.join(_DATADIR, "deployed_versions.txt") |
| 51 | +_BAD_DEPLOYS_FILE = os.path.join(_DATADIR, "manually_deployed_versions.txt") |
| 52 | + |
| 53 | + |
| 54 | +def _parse_deploys_file(f): |
| 55 | + """Return a map from fastly service-id to a set of versions. |
| 56 | +
|
| 57 | + Each input line looks like `luUUdGK4AEAIz1vqRyQ180:123`. |
| 58 | + This would give a return value like `{"luUUdGK4AEAIz1vqRyQ180": {123}}`. |
| 59 | + """ |
| 60 | + retval = {} |
| 61 | + for line in f.read().splitlines(): |
| 62 | + try: |
| 63 | + parts = line.split(':') |
| 64 | + (service_id, version) = (parts[0], int(parts[1])) |
| 65 | + except Exception: |
| 66 | + logging.warning("Skipping malformed deploys-file line: '%s'", line) |
| 67 | + continue |
| 68 | + retval.setdefault(service_id, set()).add(version) |
| 69 | + return retval |
| 70 | + |
| 71 | + |
| 72 | +def _create_deploys_file_line(service_info): |
| 73 | + return '%s:%s\n' % (service_info.service_id, service_info.version) |
| 74 | +# sync-end:fastly-deploys-file |
| 75 | + |
| 76 | + |
| 77 | +def get_service_info(api_key): |
| 78 | + """Return a dict from service-name to ServiceInfos of locked versions.""" |
| 79 | + conn = http.client.HTTPSConnection(_FASTLY_HOST) |
| 80 | + conn.request("GET", "/service", headers={'Fastly-Key': api_key}) |
| 81 | + resp = conn.getresponse() |
| 82 | + body = resp.read() |
| 83 | + if resp.status != 200: |
| 84 | + raise http.client.HTTPException("Error talking to %s: response %s (%s)" |
| 85 | + % (_FASTLY_HOST, resp.status, body)) |
| 86 | + data = json.loads(body) |
| 87 | + |
| 88 | + return { |
| 89 | + service['id']: [ |
| 90 | + ServiceInfo(service_name=service['name'], |
| 91 | + service_id=service['id'], |
| 92 | + version=v['number'], |
| 93 | + is_active=v['active'], |
| 94 | + updated_at=v['updated_at'], |
| 95 | + description=v['comment']) |
| 96 | + for v in service['versions'] |
| 97 | + if v['locked'] |
| 98 | + ] |
| 99 | + for service in data |
| 100 | + } |
| 101 | + |
| 102 | + |
| 103 | +def get_deploys_to_warn(service_info, |
| 104 | + good_deploys_by_service_id, bad_deploys_by_service_id): |
| 105 | + for (service_id, versions) in service_info.items(): |
| 106 | + # We want to warn about any deploy that is a) not good, and |
| 107 | + # b) that we haven't already warned about. The first condition |
| 108 | + # means not in `good_deploys`, the second means not in `bad_deploys`. |
| 109 | + to_ignore = (good_deploys_by_service_id.get(service_id, set()) | |
| 110 | + bad_deploys_by_service_id.get(service_id, set())) |
| 111 | + |
| 112 | + # We only start warning for a service the first time we see a |
| 113 | + # "good" deploy (via fastly-deploy.groovy) for that service. |
| 114 | + # That way, when introducing a new service, we don't log |
| 115 | + # for test-versions that were made before it went live. |
| 116 | + first_good_deploy = min( |
| 117 | + good_deploys_by_service_id.get(service_id, {sys.maxsize})) |
| 118 | + |
| 119 | + retval = [] |
| 120 | + for v in versions: |
| 121 | + if v.version not in to_ignore and v.version >= first_good_deploy: |
| 122 | + retval.append(v) |
| 123 | + return retval |
| 124 | + |
| 125 | + |
| 126 | +def send_to_slack(slack_channel, service_infos_to_warn): |
| 127 | + message = ('*These fastly services were deployed via the fastly UI, ' |
| 128 | + 'not the deploy-fastly jenkins job.* Make sure the fastly ' |
| 129 | + 'yaml files are up to date with these changes!') |
| 130 | + for service_info in service_infos_to_warn: |
| 131 | + message += ('\n* `%s`: version %s%s (%s)' |
| 132 | + % (service_info.service_name, |
| 133 | + service_info.version, |
| 134 | + ' *LIVE*' if service_info.is_active else '', |
| 135 | + service_info.description)) |
| 136 | + alertlib.Alert(message, severity=logging.INFO).send_to_slack( |
| 137 | + slack_channel, |
| 138 | + sender='fastly', |
| 139 | + icon_emoji=':fastly:', |
| 140 | + ) |
| 141 | + |
| 142 | + |
| 143 | +if __name__ == '__main__': |
| 144 | + import argparse |
| 145 | + dflt = ' (default: %(default)s)' |
| 146 | + parser = argparse.ArgumentParser() |
| 147 | + parser.add_argument('--good-deploys-file', default=_GOOD_DEPLOYS_FILE, |
| 148 | + help=('File holding deploys made by deploy-fastly' |
| 149 | + + dflt)) |
| 150 | + parser.add_argument('--bad-deploys-file', default=_BAD_DEPLOYS_FILE, |
| 151 | + help=('File holding deploys we have already warned ' |
| 152 | + 'about' + dflt)) |
| 153 | + parser.add_argument('--slack-channel', default='#infrastructure-platform', |
| 154 | + help='Slack channel to notify at' + dflt) |
| 155 | + args = parser.parse_args() |
| 156 | + |
| 157 | + api_key = subprocess.run( |
| 158 | + ["gcloud", "--project", "khan-academy", |
| 159 | + "secrets", "versions", "access", "latest", |
| 160 | + "--secret", "Fastly_read_only_config_API_token"], |
| 161 | + capture_output=True, |
| 162 | + check=True, |
| 163 | + encoding='utf-8', |
| 164 | + ).stdout |
| 165 | + |
| 166 | + if os.path.exists(args.good_deploys_file): |
| 167 | + with open(args.good_deploys_file) as f: |
| 168 | + good_deploys_by_service_id = _parse_deploys_file(f) |
| 169 | + else: |
| 170 | + logging.warning("No good-deploys file found at %s", |
| 171 | + args.good_deploys_file) |
| 172 | + good_deploys_by_service_id = {} |
| 173 | + |
| 174 | + if os.path.exists(args.bad_deploys_file): |
| 175 | + with open(args.bad_deploys_file) as f: |
| 176 | + bad_deploys_by_service_id = _parse_deploys_file(f) |
| 177 | + else: |
| 178 | + # It's expected this will be empty if nobody has ever done |
| 179 | + # anything wrong! |
| 180 | + logging.debug("No history file found at %s", args.bad_deploys_file) |
| 181 | + bad_deploys_by_service_id = {} |
| 182 | + |
| 183 | + service_info = get_service_info(api_key) |
| 184 | + service_infos_to_warn = get_deploys_to_warn( |
| 185 | + service_info, good_deploys_by_service_id, bad_deploys_by_service_id) |
| 186 | + |
| 187 | + if service_infos_to_warn: |
| 188 | + send_to_slack(args.slack_channel, service_infos_to_warn) |
| 189 | + with open(args.bad_deploys_file, 'a') as f: |
| 190 | + f.writelines( |
| 191 | + _create_deploys_file_line(si) for si in service_infos_to_warn |
| 192 | + ) |
0 commit comments