diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 937481b05..c2b4ccb4d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -14,6 +14,9 @@ jobs: steps: - uses: actions/checkout@v2 + - name: install requirements + run: pip install ruamel.yaml + - name: check embedded chart code run: ./ci/check_embedded_chart_code.py diff --git a/binderhub/app.py b/binderhub/app.py index ed2dbe5a1..51fc1965e 100644 --- a/binderhub/app.py +++ b/binderhub/app.py @@ -6,6 +6,7 @@ import json import logging import os +import random import re import secrets from binascii import a2b_hex @@ -612,6 +613,41 @@ def _default_build_token_secret(self): """ ) + federation_id = Unicode( + config=True, + help=""" + My id in the federation. + + Used to exclude myself when checking federation members, + so all federation members can share the same federation config. + """, + ) + federation_members = Dict( + key_trait=Unicode(), + value_trait=Unicode(), + config=True, + help=""" + Dict of "federation-id": "url" for federation members. + + Used for federation-wide application of quotas. + """, + ) + federation_check_seconds = Integer( + 180, + config=True, + help=""" + Interval (in seconds) on which to check the health of other federtion members. + """, + ) + federation_status = Dict( + key_trait=Unicode(), + value_trait=Dict(), + help="""status dict of federation members + + Includes current counts of repos + """ + ) + ban_networks = Dict( config=True, help=""" @@ -812,6 +848,7 @@ def initialize(self, *args, **kwargs): "auth_enabled": self.auth_enabled, "event_log": self.event_log, "normalized_origin": self.normalized_origin, + "federation_status": self.federation_status, } ) if self.auth_enabled: @@ -874,6 +911,56 @@ def stop(self): self.http_server.stop() self.build_pool.shutdown() + def watch_federation(self): + """Watch federation members for their health, repo counts""" + if not self.federation_members: + # nothing to do + return + if not self.federation_id: + self.log.warning( + "BinderHub.federation_id not set, I don't know who I am in the federation!" + ) + + for federation_id, url in self.federation_members.items(): + if federation_id != self.federation_id: + health_url = url_path_join(url, "health") + self.log.info( + f"Watching federation member {federation_id} at {health_url}" + ) + asyncio.ensure_future( + self.watch_federation_member(federation_id, health_url) + ) + + async def watch_federation_member(self, federation_id, health_url): + """Long-running coroutine for checking health of one federation member""" + while True: + repo_counts = {} + try: + self.log.debug( + f"Checking federation member {federation_id} at {health_url}" + ) + response = await AsyncHTTPClient().fetch(health_url) + status = json.loads(response.body) + for check in status["checks"]: + if check.get("service") == "Pod quota": + print(check) + total = check["total_pods"] + quota = check["quota"] + self.log.debug(f"Federation member {federation_id} running {total}/{quota} pods") + repo_counts = check.get("repo_counts", {}) + break + else: + self.log.warning(f"No Pod quota check for {federation_id}: {status}") + except Exception as e: + self.log.error(f"Error checking federation member {federation_id}: {e}") + + self.federation_status[federation_id] = { + "repo_counts": repo_counts, + } + # add some jitter, +/- 10% + t = self.federation_check_seconds * (0.9 + 0.2 * random.random()) + await asyncio.sleep(t) + async def watch_build_pods(self): """Watch build pods @@ -905,6 +992,9 @@ def start(self, run_loop=True): self.http_server.listen(self.port) if self.builder_required: asyncio.ensure_future(self.watch_build_pods()) + if self.federation_members: + self.watch_federation() + if run_loop: tornado.ioloop.IOLoop.current().start() diff --git a/binderhub/binderspawner_mixin.py b/binderhub/binderspawner_mixin.py index 99d632217..42e24248f 100644 --- a/binderhub/binderspawner_mixin.py +++ b/binderhub/binderspawner_mixin.py @@ -2,12 +2,12 @@ Helpers for creating BinderSpawners FIXME: -This file is defined in helm-chart/binderhub/values.yaml and is copied to -binderhub/binderspawner_mixin.py by setup.py so that it can be used by other JupyterHub -container spawners. +This file is defined in binderhub/binderspawner_mixin.py +and is copied to helm-chart/binderhub/values.yaml +by ci/check_embedded_chart_code.py -The BinderHub repo is just used as the distribution mechanism for this spawner, BinderHub -itself doesn't require this code. +The BinderHub repo is just used as the distribution mechanism for this spawner, +BinderHub itself doesn't require this code. Longer term options include: - Move BinderSpawnerMixin to a separate Python package and include it in the Z2JH Hub @@ -87,6 +87,11 @@ def get_args(self): return args def start(self): + annotations = self.extra_annotations + if "repo_url" in self.user_options: + annotations["binder.hub.jupyter.org/repo_url"]: self.user_options["repo_url"] + if "binder_ref_url" in self.user_options: + annotations["binder.hub.jupyter.org/ref_url"]: self.user_options["binder_ref_url"] if not self.auth_enabled: if 'token' not in self.user_options: raise web.HTTPError(400, "token required") diff --git a/binderhub/builder.py b/binderhub/builder.py index fb3d24a4f..eb9738fc7 100644 --- a/binderhub/builder.py +++ b/binderhub/builder.py @@ -551,13 +551,11 @@ async def launch(self, provider): return for pod in pods: - for container in pod["spec"]["containers"]: - # is the container running the same image as us? - # if so, count one for the current repo. - image = container["image"].rsplit(":", 1)[0] - if image == image_no_tag: - matching_pods += 1 - break + if pod["metadata"]["annotations"].get("binder.hub.jupyter.org/repo_url") == self.repo_url: + matching_pods += 1 + for federation_id, fed_info in self.settings["federation_status"].items(): + repo_counts = fed_info.get("repo_counts", {}) + matching_pods += repo_counts.get(self.repo_url, 0) if repo_quota and matching_pods >= repo_quota: LAUNCH_COUNT.labels( diff --git a/binderhub/health.py b/binderhub/health.py index 11b743555..0041cb087 100644 --- a/binderhub/health.py +++ b/binderhub/health.py @@ -165,10 +165,28 @@ async def check_pod_quota(self): quota = self.settings["pod_quota"] total_pods = n_user_pods + n_build_pods + repo_counts = {} + for pod in user_pods: + # old way + repo_url = pod["metadata"]["annotations"].get( + "binder.hub.jupyter.org/repo_url", None + ) + # old way, get it from container env + if not repo_url: + for container in pod["spec"]["containers"]: + for env in container["env"]: + if env["name"] == "BINDER_REPO_URL": + repo_url = env.get("value") + break + if repo_url: + repo_counts.setdefault(repo_url, 0) + repo_counts[repo_url] += 1 + usage = { "total_pods": total_pods, "build_pods": n_build_pods, "user_pods": n_user_pods, + "repo_counts": repo_counts, "quota": quota, "ok": total_pods <= quota if quota is not None else True, } diff --git a/ci/check_embedded_chart_code.py b/ci/check_embedded_chart_code.py index 2f5e829ae..f369ace44 100755 --- a/ci/check_embedded_chart_code.py +++ b/ci/check_embedded_chart_code.py @@ -11,7 +11,11 @@ import difflib import os import sys -import yaml +from ruamel.yaml import YAML + +yaml = YAML(typ="rt") +yaml.preserve_quotes = True +yaml.indent(mapping=2, sequence=4, offset=2) parser = argparse.ArgumentParser(description='Check embedded chart code') parser.add_argument('--update', action='store_true', help='Update binderhub code from values.yaml') @@ -21,21 +25,28 @@ binderspawner_mixin_py = os.path.join(root, 'binderhub', 'binderspawner_mixin.py') values_yaml = os.path.join(root, 'helm-chart', 'binderhub', 'values.yaml') -with open(values_yaml) as f: - values = yaml.safe_load(f) - values_code = values['jupyterhub']['hub']['extraConfig']['0-binderspawnermixin'].splitlines() +with open(binderspawner_mixin_py, 'r') as f: + py_code = f.read() + if args.update: - with open(binderspawner_mixin_py, 'w') as f: - f.write(values['jupyterhub']['hub']['extraConfig']['0-binderspawnermixin']) + with open(values_yaml) as f: + values = yaml.load(f) + values_code = values['jupyterhub']['hub']['extraConfig']['0-binderspawnermixin'] + if values_code != py_code: + print(f"Generating {values_yaml} from {binderspawner_mixin_py}") + values['jupyterhub']['hub']['extraConfig']['0-binderspawnermixin'] = py_code + with open(values_yaml, "w") as f: + yaml.dump(values, f) else: - with open(binderspawner_mixin_py, 'r') as f: - py_code = f.read().splitlines() + with open(values_yaml) as f: + values = yaml.load(f) + values_code = values['jupyterhub']['hub']['extraConfig']['0-binderspawnermixin'] - difflines = list(difflib.context_diff(values_code, py_code)) + difflines = list(difflib.context_diff(values_code.splitlines(), py_code.splitlines())) if difflines: print('\n'.join(difflines)) print('\n') print('Values code is not in sync with binderhub/binderspawner_mixin.py') - print('Run `python {} --update` to update binderhub/binderspawner_mixin.py from values.yaml'.format(sys.argv[0])) + print('Run `python {} --update` to update values.yaml from binderhub/binderspawner_mixin.py'.format(sys.argv[0])) sys.exit(1) diff --git a/dev-requirements.txt b/dev-requirements.txt index bf85d33bd..7afc117d3 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -10,10 +10,6 @@ pycurl pytest pytest-asyncio pytest-cov -# pyyaml is used by the scripts in tools/ and could absolutely be replaced by -# using ruamel.yaml. ruamel.yaml is more powerful than pyyaml, and that is -# relevant when writing YAML files back after having loaded them from disk. -pyyaml jupyter-repo2docker>=2021.08.0 requests ruamel.yaml>=0.15 diff --git a/helm-chart/binderhub/files/binderhub_config.py b/helm-chart/binderhub/files/binderhub_config.py index de94b7221..fd25fd295 100644 --- a/helm-chart/binderhub/files/binderhub_config.py +++ b/helm-chart/binderhub/files/binderhub_config.py @@ -2,7 +2,9 @@ import os from functools import lru_cache from urllib.parse import urlparse -import yaml +from ruamel.yaml import YAML + +yaml = YAML(typ="safe") def _merge_dictionaries(a, b): @@ -34,7 +36,7 @@ def _load_values(): if os.path.exists(path): print(f"Loading {path}") with open(path) as f: - values = yaml.safe_load(f) + values = yaml.load(f) cfg = _merge_dictionaries(cfg, values) else: print(f"No config at {path}") diff --git a/helm-chart/binderhub/values.yaml b/helm-chart/binderhub/values.yaml index 7a49eff31..f521f3f22 100644 --- a/helm-chart/binderhub/values.yaml +++ b/helm-chart/binderhub/values.yaml @@ -68,12 +68,12 @@ jupyterhub: Helpers for creating BinderSpawners FIXME: - This file is defined in helm-chart/binderhub/values.yaml and is copied to - binderhub/binderspawner_mixin.py by setup.py so that it can be used by other JupyterHub - container spawners. + This file is defined in binderhub/binderspawner_mixin.py + and is copied to helm-chart/binderhub/values.yaml + by ci/check_embedded_chart_code.py - The BinderHub repo is just used as the distribution mechanism for this spawner, BinderHub - itself doesn't require this code. + The BinderHub repo is just used as the distribution mechanism for this spawner, + BinderHub itself doesn't require this code. Longer term options include: - Move BinderSpawnerMixin to a separate Python package and include it in the Z2JH Hub @@ -153,6 +153,11 @@ jupyterhub: return args def start(self): + annotations = self.extra_annotations + if "repo_url" in self.user_options: + annotations["binder.hub.jupyter.org/repo_url"]: self.user_options["repo_url"] + if "binder_ref_url" in self.user_options: + annotations["binder.hub.jupyter.org/ref_url"]: self.user_options["binder_ref_url"] if not self.auth_enabled: if 'token' not in self.user_options: raise web.HTTPError(400, "token required") diff --git a/tools/generate-json-schema.py b/tools/generate-json-schema.py index 3390ea234..543dad55b 100755 --- a/tools/generate-json-schema.py +++ b/tools/generate-json-schema.py @@ -14,7 +14,8 @@ from collections.abc import MutableMapping -import yaml +from ruamel.yaml import YAML +yaml = YAML(typ="safe") here_dir = os.path.abspath(os.path.dirname(__file__)) schema_yaml = os.path.join( @@ -50,7 +51,7 @@ def run(): # Using these sets, we can validate further manually by printing the results # of set operations. with open(schema_yaml) as f: - schema = yaml.safe_load(f) + schema = yaml.load(f) # Drop what isn't relevant for a values.schema.json file packaged with the # Helm chart, such as the description keys only relevant for our diff --git a/tools/validate-against-schema.py b/tools/validate-against-schema.py index 8b90a4d1d..39b0c8126 100755 --- a/tools/validate-against-schema.py +++ b/tools/validate-against-schema.py @@ -1,7 +1,9 @@ #!/usr/bin/env python3 -import jsonschema import os -import yaml + +import jsonschema +from ruamel.yaml import YAML +yaml = YAML(typ="safe") here_dir = os.path.abspath(os.path.dirname(__file__)) schema_yaml = os.path.join( @@ -18,13 +20,13 @@ ) with open(schema_yaml) as f: - schema = yaml.safe_load(f) + schema = yaml.load(f) with open(values_yaml) as f: - values = yaml.safe_load(f) + values = yaml.load(f) with open(lint_and_validate_values_yaml) as f: - lint_and_validate_values = yaml.safe_load(f) + lint_and_validate_values = yaml.load(f) with open(binderhub_chart_config_yaml) as f: - binderhub_chart_config_yaml = yaml.safe_load(f) + binderhub_chart_config_yaml = yaml.load(f) # Validate values.yaml against schema print("Validating values.yaml against schema.yaml...")