From 8d2f542e28acf06b979614937797e3002e9e1a1c Mon Sep 17 00:00:00 2001 From: Simon Li Date: Tue, 13 Jun 2023 23:53:18 +0100 Subject: [PATCH] Add aws curvenote binderhub Use manual build of https://github.com/jupyterhub/binderhub/pull/1724 --- config-kube-system/aws-curvenote.yaml | 19 -- config-kube-system/curvenote.yaml | 31 +++ config/aws-curvenote.yaml | 0 config/curvenote.yaml | 297 ++++++++++++++++++++++++++ deploy.py | 75 ++++++- mybinder/Chart.yaml | 7 + mybinder/values.yaml | 3 + 7 files changed, 408 insertions(+), 24 deletions(-) delete mode 100644 config-kube-system/aws-curvenote.yaml create mode 100644 config-kube-system/curvenote.yaml delete mode 100644 config/aws-curvenote.yaml create mode 100644 config/curvenote.yaml diff --git a/config-kube-system/aws-curvenote.yaml b/config-kube-system/aws-curvenote.yaml deleted file mode 100644 index 17e7810607..0000000000 --- a/config-kube-system/aws-curvenote.yaml +++ /dev/null @@ -1,19 +0,0 @@ -# Install the more modern load-balancer controller: -# https://docs.aws.amazon.com/eks/latest/userguide/aws-load-balancer-controller.html -aws-load-balancer-controller: - enabled: true - clusterName: binderhub - # Must match the IRSA service account name - name: aws-load-balancer-controller - serviceAccount: - annotations: - eks.amazonaws.com/role-arn: "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/IRSA-aws-load-balancer-controller" - -aws-ebs-csi-driver: - enabled: true - controller: - serviceAccount: - # Must match the IRSA service account name - name: ebs-csi-controller-sa - annotations: - eks.amazonaws.com/role-arn: "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/IRSA-aws-ebs-csi-driver" diff --git a/config-kube-system/curvenote.yaml b/config-kube-system/curvenote.yaml new file mode 100644 index 0000000000..277dfd9f99 --- /dev/null +++ b/config-kube-system/curvenote.yaml @@ -0,0 +1,31 @@ +# Install the more modern load-balancer controller: +# https://docs.aws.amazon.com/eks/latest/userguide/aws-load-balancer-controller.html +aws-load-balancer-controller: + enabled: true + clusterName: binderhub + clusterSecretsPermissions: + allowAllSecrets: true + enableShield: false + enableWaf: false + enableWafv2: false + logLevel: debug + serviceAccount: + # Must match the IRSA service account name + name: aws-load-balancer-controller + annotations: + eks.amazonaws.com/role-arn: "arn:aws:iam::166088433508:role/binderhub-IRSA-aws-load-balancer-controller" + +aws-ebs-csi-driver: + enabled: true + controller: + serviceAccount: + # Must match the IRSA service account name + name: ebs-csi-controller-sa + annotations: + eks.amazonaws.com/role-arn: "arn:aws:iam::166088433508:role/binderhub-IRSA-ebs-csi-controller-sa" + storageClasses: + - name: ebs-sc + # Note this results in EKS having two default StorageClasses, so to be sure + # always specify the storage class in the PVC. + annotations: + storageclass.kubernetes.io/is-default-class: "true" diff --git a/config/aws-curvenote.yaml b/config/aws-curvenote.yaml deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/config/curvenote.yaml b/config/curvenote.yaml new file mode 100644 index 0000000000..78a5f2050f --- /dev/null +++ b/config/curvenote.yaml @@ -0,0 +1,297 @@ +projectName: curvenote + +# userNodeSelector: &userNodeSelector +# mybinder.org/pool-type: users +# coreNodeSelector: &coreNodeSelector +# mybinder.org/pool-type: core + +binderhub: + # Manual build with https://github.com/jupyterhub/binderhub/pull/1637 + image: + name: docker.io/manics/binderhub + tag: pr1724-2023-06-27-21-50-amd64 + + config: + BinderHub: + # hub_url: https://hub.curvenote.mybinder.org + hub_url: https://hub.3.13.147.101.nip.io + hub_url_local: http://proxy-public + badge_base_url: https://mybinder.org + # build_node_selector: + # mybinder.org/pool-type: builds + sticky_builds: true + image_prefix: 166088433508.dkr.ecr.us-east-2.amazonaws.com/binderhub/ + # log_level: DEBUG + # TODO: we should have CPU requests, too + # use this to limit the number of builds per node + # complicated: dind memory request + KubernetesBuildExecutor.memory_request * builds_per_node ~= node memory + KubernetesBuildExecutor: + memory_request: "2G" + + LaunchQuota: + total_quota: 10 + + registry: + url: 166088433508.dkr.ecr.us-east-2.amazonaws.com + username: "" + password: "" + + replicas: 1 + # nodeSelector: *coreNodeSelector + + # extraVolumes: + # - name: secrets + # secret: + # secretName: events-archiver-secrets + # extraVolumeMounts: + # - name: secrets + # mountPath: /secrets + # readOnly: true + # extraEnv: + # GOOGLE_APPLICATION_CREDENTIALS: /secrets/service-account.json + + extraConfig: + 01-eventlog: | + # Disabled until GOOGLE_APPLICATION_CREDENTIALS secret is available + + 10-external-registry-helper: | + # from binderhub.registry import ExternalRegistryHelper + import json + from tornado import httpclient + from traitlets import Unicode + from binderhub.registry import DockerRegistry + + + class ExternalRegistryHelper(DockerRegistry): + + service_url = Unicode( + "http://binderhub-container-registry-helper:8080", + allow_none=False, + help="The URL of the registry helper micro-service.", + config=True, + ) + + auth_token = Unicode( + "secret-token", + help="The auth token to use when accessing the registry helper micro-service.", + config=True, + ) + + async def _request(self, endpoint, **kwargs): + client = httpclient.AsyncHTTPClient() + repo_url = f"{self.service_url}{endpoint}" + headers = {"Authorization": f"Bearer {self.auth_token}"} + repo = await client.fetch(repo_url, headers=headers, **kwargs) + return json.loads(repo.body.decode("utf-8")) + + async def _get_image(self, image, tag): + repo_url = f"/image/{image}:{tag}" + self.log.debug(f"Checking whether image exists: {repo_url}") + try: + image_json = await self._request(repo_url) + return image_json + except httpclient.HTTPError as e: + if e.code == 404: + return None + else: + raise + + async def get_image_manifest(self, image, tag): + """ + Checks whether the image exists in the registry. + + If the container repository doesn't exist create the repository. + + The container repository name may not be the same as the BinderHub image name. + + E.g. Oracle Container Registry (OCIR) has the form: + OCIR_NAMESPACE/OCIR_REPOSITORY_NAME:TAG + + These extra components are handled automatically by the registry helper + so BinderHub repository names such as OCIR_NAMESPACE/OCIR_REPOSITORY_NAME + can be used directly, it is not necessary to remove the extra components. + + Returns the image manifest if the image exists, otherwise None + """ + + repo_url = f"/repo/{image}" + self.log.debug(f"Checking whether repository exists: {repo_url}") + try: + repo_json = await self._request(repo_url) + except httpclient.HTTPError as e: + if e.code == 404: + repo_json = None + else: + raise + + if repo_json: + return await self._get_image(image, tag) + else: + self.log.debug(f"Creating repository: {repo_url}") + await self._request(repo_url, method="POST", body="") + return None + + async def get_credentials(self, image, tag): + token_url = f"/token/{image}:{tag}" + self.log.debug(f"Getting registry token: {token_url}") + token_json = None + try: + token_json = await self._request(token_url, method="POST", body="") + except httpclient.HTTPError as e: + if e.code != 404: + raise + token = dict((k, v) for (k, v) in token_json.items() if k in ["username", "password", "registry"]) + self.log.debug(f"Returning registry token: {token}") + return token + + c.BinderHub.registry_class = ExternalRegistryHelper + c.ExternalRegistryHelper.service_url = "http://curvenote-binderhub-container-registry-helper:8080" + c.ExternalRegistryHelper.auth_token = "secret-token-use-existing-secret-instead" + + dind: + resources: + requests: + cpu: "4" + memory: 16Gi + limits: + cpu: "7" + memory: 24Gi + + ingress: + hosts: + # - curvenote.mybinder.org + - 3.13.147.101.nip.io + + jupyterhub: + # singleuser: + # nodeSelector: *userNodeSelector + # hub: + # nodeSelector: *coreNodeSelector + hub: + db: + pvc: + storageClassName: ebs-sc + + singleuser: + initContainers: + - name: tc-init + image: jupyterhub/mybinder.org-tc-init:2020.12.4-0.dev.git.4289.h140cef52 + imagePullPolicy: IfNotPresent + env: + - name: WHITELIST_CIDR + value: 10.0.0.0/8 + - name: EGRESS_BANDWIDTH + value: 1mbit + securityContext: + # capabilities.add seems to be disabled + # by the `runAsUser: 1000` in the pod-level securityContext + # unless we explicitly run as root + runAsUser: 0 + capabilities: + add: + - NET_ADMIN + + proxy: + chp: + # nodeSelector: *coreNodeSelector + resources: + requests: + cpu: "1" + limits: + cpu: "1" + ingress: + hosts: + # - hub.curvenote.mybinder.org + - hub.3.13.147.101.nip.io + tls: + - secretName: kubelego-tls-hub + hosts: + # - hub.curvenote.mybinder.org + - hub.3.13.147.101.nip.io + scheduling: + userPlaceholder: + enabled: false + replicas: 50 + userScheduler: + enabled: false + # nodeSelector: *coreNodeSelector + cull: + # maxAge: 15 min since we're just testing + maxAge: 900 + + imageCleaner: + # Use 40GB as upper limit, size is given in bytes + imageGCThresholdHigh: 40e9 + imageGCThresholdLow: 30e9 + imageGCThresholdType: "absolute" + +cryptnono: + enabled: false + +grafana: + enabled: false + # nodeSelector: *coreNodeSelector + ingress: + hosts: + # - grafana.curvenote.mybinder.org + tls: + - hosts: + # - grafana.curvenote.mybinder.org + secretName: kubelego-tls-grafana + datasources: + datasources.yaml: + apiVersion: 1 + datasources: + - name: prometheus + orgId: 1 + type: prometheus + url: https://prometheus.curvenote.mybinder.org + access: direct + isDefault: true + editable: false + persistence: + storageClassName: csi-cinder-high-speed + +prometheus: + enabled: false + server: + # nodeSelector: *coreNodeSelector + persistentVolume: + size: 50Gi + retention: 30d + ingress: + hosts: + # - prometheus.curvenote.mybinder.org + tls: + - hosts: + # - prometheus.curvenote.mybinder.org + secretName: kubelego-tls-prometheus + +ingress-nginx: + controller: + service: + # loadBalancerIP: 162.19.17.37 + annotations: + service.beta.kubernetes.io/aws-load-balancer-scheme: "internet-facing" + +static: + ingress: + hosts: + # - static.curvenote.mybinder.org + - static.3.13.147.101.nip.io + +minesweeper: + # Requires secrets + enabled: false + image: jupyterhub/mybinder.org-minesweeper:2020.12.4-0.dev.git.5080.hf35cc80d + + +binderhub-container-registry-helper: + enabled: true + auth_token: secret-token-use-existing-secret-instead + # auth_existing_secret_name: + replicaCount: 2 + serviceAccount: + name: binderhub-container-registry-helper + annotations: + eks.amazonaws.com/role-arn: "arn:aws:iam::166088433508:role/binderhub-IRSA-aws-binderhub-ecr" diff --git a/deploy.py b/deploy.py index 61a1f5274f..077519cdb8 100755 --- a/deploy.py +++ b/deploy.py @@ -29,6 +29,9 @@ "prod": "us-central1", } +# Mapping of config name to cluster name for AWS EKS deployments +AWS_DEPLOYMENTS = {"curvenote": "binderhub"} + # Mapping of cluster names (keys) to resource group names (values) for Azure deployments AZURE_RGs = {} @@ -140,6 +143,27 @@ def setup_auth_gcloud(release, cluster=None, dry_run=False): ) +def setup_auth_aws(cluster, dry_run=False): + """ + Set up authentication for EKS on AWS + + Assumes you already have an AWS CLI profile setup with access to EKS, + and that either this is the default profile (e.g. on CI) or you have set the + AWS_PROFILE environment variable. + """ + print(BOLD + GREEN + f"Obtaining AWS EKS kubeconfig for {cluster}" + NC, flush=True) + + eks_kubeconfig = [ + "aws", + "eks", + "update-kubeconfig", + "--name", + AWS_DEPLOYMENTS[cluster], + ] + stdout = check_output(eks_kubeconfig, dry_run) + print(stdout) + + def update_networkbans(cluster, dry_run=False): """ Run secrets/ban.py to update network bans @@ -160,12 +184,14 @@ def get_config_files(release, config_dir="config"): """Return the list of config files to load""" # common config files config_files = sorted(glob.glob(os.path.join(config_dir, "common", "*.yaml"))) - config_files.extend( - sorted(glob.glob(os.path.join("secrets", config_dir, "common", "*.yaml"))) - ) + # config_files.extend( + # sorted(glob.glob(os.path.join("secrets", config_dir, "common", "*.yaml"))) + # ) # release-specific config files for config_dir in (config_dir, os.path.join("secrets", config_dir)): - config_files.append(os.path.join(config_dir, release + ".yaml")) + f = os.path.join(config_dir, release + ".yaml") + if os.path.exists(f): + config_files.append(f) return config_files @@ -309,6 +335,41 @@ def patch_coredns(dry_run=False): ) +def deploy_kube_system_charts(release, name=None, dry_run=False): + """ + Some charts must be deployed into the kube-system namespace + """ + if not name: + name = release + log_name = f"mybinder-kube-system {release}" + + config_files = get_config_files(release, config_dir="config-kube-system") + if not config_files: + print(BOLD + GREEN + f"No config files found for {log_name}" + NC, flush=True) + return + + print(BOLD + GREEN + f"Starting helm upgrade for {log_name}" + NC, flush=True) + helm = [ + "helm", + "upgrade", + "--install", + "--cleanup-on-fail", + "--namespace=kube-system", + name, + "mybinder-kube-system", + ] + for config_file in config_files: + helm.extend(["-f", config_file]) + + check_call(helm, dry_run) + print( + BOLD + GREEN + f"SUCCESS: Helm upgrade for {log_name} completed" + NC, + flush=True, + ) + + wait_for_deployments_daemonsets("kube-system", dry_run) + + def main(): # parse command line args argparser = argparse.ArgumentParser() @@ -320,6 +381,7 @@ def main(): "prod", "ovh", "ovh2", + "curvenote", ], ) argparser.add_argument( @@ -383,10 +445,13 @@ def main(): setup_auth_azure(cluster, args.dry_run) elif cluster in GCP_PROJECTS: setup_auth_gcloud(args.release, cluster, args.dry_run) + elif cluster in AWS_DEPLOYMENTS: + setup_auth_aws(cluster, args.dry_run) else: raise Exception("Cloud cluster not recognised!") - update_networkbans(cluster, args.dry_run) + # update_networkbans(cluster, args.dry_run) + deploy_kube_system_charts(args.release, args.name, args.dry_run) deploy(args.release, args.name, args.dry_run) diff --git a/mybinder/Chart.yaml b/mybinder/Chart.yaml index ae9ad6ace0..135f8d1430 100644 --- a/mybinder/Chart.yaml +++ b/mybinder/Chart.yaml @@ -63,3 +63,10 @@ dependencies: version: 9.21.1 repository: https://kubernetes.github.io/autoscaler condition: cluster-autoscaler.enabled + + # Registry helper, used to create container repositories before pushing and to + # fetch dynamic registry credentials + - name: binderhub-container-registry-helper + version: 0.2.0 + repository: oci://ghcr.io/manics/binderhub-container-registry-helper + condition: binderhub-container-registry-helper.enabled diff --git a/mybinder/values.yaml b/mybinder/values.yaml index e33e9c5e3e..d5d9867620 100644 --- a/mybinder/values.yaml +++ b/mybinder/values.yaml @@ -600,3 +600,6 @@ minesweeper: # cluster-autoscaler: enabled: false + +binderhub-container-registry-helper: + enabled: false