diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index 2e4910389..d9c8a2e7d 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -152,6 +152,7 @@ jobs: run: | curl -sf https://raw.githubusercontent.com/helm/helm/HEAD/scripts/get-helm-3 | DESIRED_VERSION=${HELM_VERSION} bash helm dependency update ./mybinder + helm dependency update ./mybinder-kube-system # Action Repo: https://github.com/sliteteam/github-action-git-crypt-unlock - name: "Stage 2: Unlock git-crypt secrets" @@ -290,6 +291,7 @@ jobs: run: | curl -sf https://raw.githubusercontent.com/helm/helm/HEAD/scripts/get-helm-3 | DESIRED_VERSION=${HELM_VERSION} bash helm dependency update ./mybinder + helm dependency update ./mybinder-kube-system - name: "Stage 2: Unlock git-crypt secrets" uses: sliteteam/github-action-git-crypt-unlock@8b1fa3ccc81e322c5c45fbab261eee46513fd3f8 diff --git a/.gitignore b/.gitignore index 6f108494c..897a44867 100644 --- a/.gitignore +++ b/.gitignore @@ -8,9 +8,9 @@ __pycache__ config/common/datacenter-*.yaml secrets/banned_hosts.txt secrets/config/common/bans.yaml -mybinder/charts -mybinder/requirements.lock -mybinder/Chart.lock +mybinder*/charts +mybinder*/requirements.lock +mybinder*/Chart.lock .ipynb_checkpoints diff --git a/.prettierignore b/.prettierignore index 9df8843e7..0f7ad7494 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1 +1,2 @@ mybinder/templates/ +terraform/aws/curvenote/cni/ diff --git a/chartpress.yaml b/chartpress.yaml index d2a420d8b..77a85d5d7 100644 --- a/chartpress.yaml +++ b/chartpress.yaml @@ -10,3 +10,4 @@ charts: valuesPath: minesweeper.image tc-init: valuesPath: binderhub.jupyterhub.singleuser.initContainers.0.image + - name: mybinder-kube-system diff --git a/config-kube-system/curvenote.yaml b/config-kube-system/curvenote.yaml new file mode 100644 index 000000000..277dfd9f9 --- /dev/null +++ b/config-kube-system/curvenote.yaml @@ -0,0 +1,31 @@ +# Install the more modern load-balancer controller: +# https://docs.aws.amazon.com/eks/latest/userguide/aws-load-balancer-controller.html +aws-load-balancer-controller: + enabled: true + clusterName: binderhub + clusterSecretsPermissions: + allowAllSecrets: true + enableShield: false + enableWaf: false + enableWafv2: false + logLevel: debug + serviceAccount: + # Must match the IRSA service account name + name: aws-load-balancer-controller + annotations: + eks.amazonaws.com/role-arn: "arn:aws:iam::166088433508:role/binderhub-IRSA-aws-load-balancer-controller" + +aws-ebs-csi-driver: + enabled: true + controller: + serviceAccount: + # Must match the IRSA service account name + name: ebs-csi-controller-sa + annotations: + eks.amazonaws.com/role-arn: "arn:aws:iam::166088433508:role/binderhub-IRSA-ebs-csi-controller-sa" + storageClasses: + - name: ebs-sc + # Note this results in EKS having two default StorageClasses, so to be sure + # always specify the storage class in the PVC. + annotations: + storageclass.kubernetes.io/is-default-class: "true" diff --git a/config/curvenote.yaml b/config/curvenote.yaml new file mode 100644 index 000000000..d2b3ab591 --- /dev/null +++ b/config/curvenote.yaml @@ -0,0 +1,324 @@ +projectName: curvenote + +binderhub: + config: + BinderHub: + hub_url: https://hub.binder.curvenote.dev + hub_url_local: http://proxy-public + badge_base_url: https://mybinder.org + sticky_builds: true + image_prefix: 166088433508.dkr.ecr.us-east-2.amazonaws.com/binderhub/ + # log_level: DEBUG + # TODO: we should have CPU requests, too + # use this to limit the number of builds per node + # complicated: dind memory request + KubernetesBuildExecutor.memory_request * builds_per_node ~= node memory + KubernetesBuildExecutor: + memory_request: "2G" + + LaunchQuota: + total_quota: 10 + + ExternalRegistryHelper: + service_url: http://curvenote-binderhub-container-registry-helper:8080 + + extraEnv: + BINDERHUB_CONTAINER_REGISTRY_HELPER_AUTH_TOKEN: + valueFrom: + secretKeyRef: + name: curvenote-binderhub-container-registry-helper + key: auth_token + + registry: + url: 166088433508.dkr.ecr.us-east-2.amazonaws.com + username: "" + password: "" + + # extraVolumes: + # - name: secrets + # secret: + # secretName: events-archiver-secrets + # extraVolumeMounts: + # - name: secrets + # mountPath: /secrets + # readOnly: true + # extraEnv: + # GOOGLE_APPLICATION_CREDENTIALS: /secrets/service-account.json + + extraConfig: + 01-eventlog: | + # Disabled until GOOGLE_APPLICATION_CREDENTIALS secret is available + # and secrets/events-archiver/curvenote.json is created + + 10-external-registry-helper: | + import json + from os import getenv + from tornado import httpclient + from traitlets import Unicode + from binderhub.registry import DockerRegistry + + + class ExternalRegistryHelper(DockerRegistry): + service_url = Unicode( + "http://binderhub-container-registry-helper:8080", + allow_none=False, + help="The URL of the registry helper micro-service.", + config=True, + ) + + auth_token = Unicode( + getenv("BINDERHUB_CONTAINER_REGISTRY_HELPER_AUTH_TOKEN"), + help="The auth token to use when accessing the registry helper micro-service.", + config=True, + ) + + async def _request(self, endpoint, **kwargs): + client = httpclient.AsyncHTTPClient() + repo_url = f"{self.service_url}{endpoint}" + headers = {"Authorization": f"Bearer {self.auth_token}"} + repo = await client.fetch(repo_url, headers=headers, **kwargs) + return json.loads(repo.body.decode("utf-8")) + + async def _get_image(self, image, tag): + repo_url = f"/image/{image}:{tag}" + self.log.debug(f"Checking whether image exists: {repo_url}") + try: + image_json = await self._request(repo_url) + return image_json + except httpclient.HTTPError as e: + if e.code == 404: + return None + raise + + async def get_image_manifest(self, image, tag): + """ + Checks whether the image exists in the registry. + + If the container repository doesn't exist create the repository. + + The container repository name may not be the same as the BinderHub image name. + + E.g. Oracle Container Registry (OCIR) has the form: + OCIR_NAMESPACE/OCIR_REPOSITORY_NAME:TAG + + These extra components are handled automatically by the registry helper + so BinderHub repository names such as OCIR_NAMESPACE/OCIR_REPOSITORY_NAME + can be used directly, it is not necessary to remove the extra components. + + Returns the image manifest if the image exists, otherwise None + """ + + repo_url = f"/repo/{image}" + self.log.debug(f"Checking whether repository exists: {repo_url}") + try: + repo_json = await self._request(repo_url) + except httpclient.HTTPError as e: + if e.code == 404: + repo_json = None + else: + raise + + if repo_json: + return await self._get_image(image, tag) + else: + self.log.debug(f"Creating repository: {repo_url}") + await self._request(repo_url, method="POST", body="") + return None + + async def get_credentials(self, image, tag): + """ + Get the registry credentials for the given image and tag if supported + by the remote helper, otherwise returns None + + Returns a dictionary of login fields. + """ + token_url = f"/token/{image}:{tag}" + self.log.debug(f"Getting registry token: {token_url}") + token_json = None + try: + token_json = await self._request(token_url, method="POST", body="") + except httpclient.HTTPError as e: + if e.code == 404: + return None + raise + self.log.debug(f"Token: {*token_json.keys(),}") + token = dict( + (k, v) + for (k, v) in token_json.items() + if k in ["username", "password", "registry"] + ) + return token + + + c.BinderHub.registry_class = ExternalRegistryHelper + + ingress: + hosts: + - binder.curvenote.dev + + jupyterhub: + hub: + db: + pvc: + storageClassName: ebs-sc + config: + KubeSpawner: + extra_pod_config: + enableServiceLinks: false + image_pull_policy: Always + extraPodSpec: + priorityClassName: binderhub-core + networkPolicy: + ingress: + # AWS VPC CNI only works if the name of the service port name is the same as + # the name of the pod port and the port number is the same + # https://docs.aws.amazon.com/eks/latest/userguide/cni-network-policy.html#cni-network-policy-considerations + - from: + - podSelector: + matchLabels: + hub.jupyter.org/network-access-hub: "true" + # For unknown reasons the hub <-> notebook traffic is partially blocked if + # this is included: + # ports: + # # service/hub port name is "hub" + # # pod/hub port name is "http" + # - port: 8081 + # protocol: TCP + + singleuser: + networkPolicy: + ingress: + # AWS VPC CNI only works if the name of the service port name is the same as + # the name of the pod port and the port number is the same + # https://docs.aws.amazon.com/eks/latest/userguide/cni-network-policy.html#cni-network-policy-considerations + - from: + - podSelector: + matchLabels: + hub.jupyter.org/network-access-singleuser: "true" + ports: + # proxy/pod port name is "notebook-port" + # I've no idea why that doesn't work + - port: 8888 + protocol: TCP + + proxy: + chp: + extraPodSpec: + priorityClassName: binderhub-core + networkPolicy: + ingress: + # AWS VPC CNI only works if the name of the service port name is the same as + # the name of the pod port and the port number is the same + # https://docs.aws.amazon.com/eks/latest/userguide/cni-network-policy.html#cni-network-policy-considerations + - from: + - podSelector: + matchLabels: + hub.jupyter.org/network-access-proxy-api: "true" + ports: + # service/proxy-api port doesn't have a name + # proxy/pod port name is "api" + - port: 8001 + protocol: TCP + - from: + ports: + # service/proxy-public port is 80 + # proxy/pod port is 8000 + - port: 8000 + protocol: TCP + - port: 80 + protocol: TCP + + ingress: + hosts: + - hub.binder.curvenote.dev + tls: + - secretName: kubelego-tls-hub + hosts: + - hub.binder.curvenote.dev + scheduling: + userPlaceholder: + enabled: false + userScheduler: + enabled: false + cull: + # maxAge: 1 hour since we're just testing + maxAge: 3600 + + imageCleaner: + enabled: true + # Use 40GB as upper limit, size is given in bytes + imageGCThresholdHigh: 40e9 + imageGCThresholdLow: 30e9 + imageGCThresholdType: "absolute" + +cryptnono: + enabled: true + +grafana: + enabled: false + ingress: + hosts: + # - grafana.binder.curvenote.dev + tls: + - hosts: + # - grafana.binder.curvenote.dev + secretName: kubelego-tls-grafana + datasources: + datasources.yaml: + apiVersion: 1 + datasources: + - name: prometheus + orgId: 1 + type: prometheus + url: https://prometheus.binder.curvenote.dev + access: direct + isDefault: true + editable: false + +prometheus: + enabled: true + server: + persistentVolume: + size: 50Gi + retention: 30d + ingress: + hosts: + - prometheus.binder.curvenote.dev + tls: + - hosts: + - prometheus.binder.curvenote.dev + secretName: kubelego-tls-prometheus + +ingress-nginx: + controller: + service: + annotations: + service.beta.kubernetes.io/aws-load-balancer-scheme: "internet-facing" + +static: + ingress: + hosts: + - static.binder.curvenote.dev + +analyticsPublisher: + enabled: false + +minesweeper: + enabled: true + +priorityClasses: + binderhub-core: 10000 + +binderhub-container-registry-helper: + enabled: true + # auth_token: Autogenerated + replicaCount: 2 + serviceAccount: + name: binderhub-container-registry-helper + annotations: + eks.amazonaws.com/role-arn: "arn:aws:iam::166088433508:role/binderhub-IRSA-aws-binderhub-ecr" + +awsEcrRegistryCleaner: + enabled: true + serviceAccount: + annotations: + eks.amazonaws.com/role-arn: "arn:aws:iam::166088433508:role/binderhub-IRSA-aws-binderhub-ecr-registry-cleaner" diff --git a/deploy.py b/deploy.py index 61a1f5274..1c2e0c0b6 100755 --- a/deploy.py +++ b/deploy.py @@ -29,6 +29,9 @@ "prod": "us-central1", } +# Mapping of config name to cluster name for AWS EKS deployments +AWS_DEPLOYMENTS = {"curvenote": "binderhub"} + # Mapping of cluster names (keys) to resource group names (values) for Azure deployments AZURE_RGs = {} @@ -140,6 +143,27 @@ def setup_auth_gcloud(release, cluster=None, dry_run=False): ) +def setup_auth_aws(cluster, dry_run=False): + """ + Set up authentication for EKS on AWS + + Assumes you already have an AWS CLI profile setup with access to EKS, + and that either this is the default profile (e.g. on CI) or you have set the + AWS_PROFILE environment variable. + """ + print(BOLD + GREEN + f"Obtaining AWS EKS kubeconfig for {cluster}" + NC, flush=True) + + eks_kubeconfig = [ + "aws", + "eks", + "update-kubeconfig", + "--name", + AWS_DEPLOYMENTS[cluster], + ] + stdout = check_output(eks_kubeconfig, dry_run) + print(stdout) + + def update_networkbans(cluster, dry_run=False): """ Run secrets/ban.py to update network bans @@ -165,24 +189,20 @@ def get_config_files(release, config_dir="config"): ) # release-specific config files for config_dir in (config_dir, os.path.join("secrets", config_dir)): - config_files.append(os.path.join(config_dir, release + ".yaml")) + f = os.path.join(config_dir, release + ".yaml") + if os.path.exists(f): + config_files.append(f) return config_files def deploy(release, name=None, dry_run=False): """Deploys a federation member to a k8s cluster. - The deployment is done in the following steps: - - 1. Deploy cert-manager - 2. Deploy mybinder helm chart - 3. Await deployed deployment and daemonsets to become Ready + Waits for deployments and daemonsets to become Ready """ if not name: name = release - setup_certmanager(dry_run) - print(BOLD + GREEN + f"Starting helm upgrade for {release}" + NC, flush=True) helm = [ "helm", @@ -309,6 +329,41 @@ def patch_coredns(dry_run=False): ) +def deploy_kube_system_charts(release, name=None, dry_run=False): + """ + Some charts must be deployed into the kube-system namespace + """ + if not name: + name = release + log_name = f"mybinder-kube-system {release}" + + config_files = get_config_files(release, config_dir="config-kube-system") + if not config_files: + print(BOLD + GREEN + f"No config files found for {log_name}" + NC, flush=True) + return + + print(BOLD + GREEN + f"Starting helm upgrade for {log_name}" + NC, flush=True) + helm = [ + "helm", + "upgrade", + "--install", + "--cleanup-on-fail", + "--namespace=kube-system", + name, + "mybinder-kube-system", + ] + for config_file in config_files: + helm.extend(["-f", config_file]) + + check_call(helm, dry_run) + print( + BOLD + GREEN + f"SUCCESS: Helm upgrade for {log_name} completed" + NC, + flush=True, + ) + + wait_for_deployments_daemonsets("kube-system", dry_run) + + def main(): # parse command line args argparser = argparse.ArgumentParser() @@ -320,6 +375,7 @@ def main(): "prod", "ovh", "ovh2", + "curvenote", ], ) argparser.add_argument( @@ -342,6 +398,13 @@ def main(): action="store_true", help="Print commands, but don't run them", ) + stages = ["all", "auth", "networkbans", "kubesystem", "certmanager", "mybinder"] + argparser.add_argument( + "--stage", + choices=stages, + default=stages[0], + help="Stage to deploy, default all", + ) args = argparser.parse_args() @@ -376,18 +439,27 @@ def main(): # script is running on CI, proceed with auth and helm setup - if cluster.startswith("ovh"): - setup_auth_ovh(args.release, cluster, args.dry_run) - patch_coredns(args.dry_run) - elif cluster in AZURE_RGs: - setup_auth_azure(cluster, args.dry_run) - elif cluster in GCP_PROJECTS: - setup_auth_gcloud(args.release, cluster, args.dry_run) - else: - raise Exception("Cloud cluster not recognised!") - - update_networkbans(cluster, args.dry_run) - deploy(args.release, args.name, args.dry_run) + if args.stage in ("all", "auth"): + if cluster.startswith("ovh"): + setup_auth_ovh(args.release, cluster, args.dry_run) + patch_coredns(args.dry_run) + elif cluster in AZURE_RGs: + setup_auth_azure(cluster, args.dry_run) + elif cluster in GCP_PROJECTS: + setup_auth_gcloud(args.release, cluster, args.dry_run) + elif cluster in AWS_DEPLOYMENTS: + setup_auth_aws(cluster, args.dry_run) + else: + raise Exception("Cloud cluster not recognised!") + + if args.stage in ("all", "networkban"): + update_networkbans(cluster, args.dry_run) + if args.stage in ("all", "kubesystem"): + deploy_kube_system_charts(args.release, args.name, args.dry_run) + if args.stage in ("all", "certmanager"): + setup_certmanager(args.dry_run) + if args.stage in ("all", "mybinder"): + deploy(args.release, args.name, args.dry_run) if __name__ == "__main__": diff --git a/mybinder-kube-system/Chart.yaml b/mybinder-kube-system/Chart.yaml new file mode 100644 index 000000000..5adf7a553 --- /dev/null +++ b/mybinder-kube-system/Chart.yaml @@ -0,0 +1,17 @@ +apiVersion: v2 +description: A meta-chart for the kube-system charts on some mybinder.org hosts +name: mybinder-kube-system +version: "0.0.1-set.by.chartpress" +kubeVersion: ">= 1.26.0-0" +dependencies: + # https://artifacthub.io/packages/helm/aws/aws-load-balancer-controller + - name: aws-load-balancer-controller + version: 1.5.3 + repository: https://aws.github.io/eks-charts + condition: aws-load-balancer-controller.enabled + + # https://github.com/kubernetes-sigs/aws-ebs-csi-driver/blob/master/docs/install.md + - name: aws-ebs-csi-driver + version: 2.17.2 + repository: https://kubernetes-sigs.github.io/aws-ebs-csi-driver + condition: aws-ebs-csi-driver.enabled diff --git a/mybinder-kube-system/values.yaml b/mybinder-kube-system/values.yaml new file mode 100644 index 000000000..f62144f76 --- /dev/null +++ b/mybinder-kube-system/values.yaml @@ -0,0 +1,7 @@ +# AWS EKS load-balancer controller +aws-load-balancer-controller: + enabled: false + +# AWS EKS storage (EBS) controller +aws-ebs-csi-driver: + enabled: false diff --git a/mybinder/Chart.yaml b/mybinder/Chart.yaml index caef0675d..d38e0b46c 100644 --- a/mybinder/Chart.yaml +++ b/mybinder/Chart.yaml @@ -50,3 +50,10 @@ dependencies: version: 9.21.1 repository: https://kubernetes.github.io/autoscaler condition: cluster-autoscaler.enabled + + # Registry helper, used to create container repositories before pushing and to + # fetch dynamic registry credentials + - name: binderhub-container-registry-helper + version: 0.2.3 + repository: https://www.manicstreetpreacher.co.uk/binderhub-container-registry-helper/ + condition: binderhub-container-registry-helper.enabled diff --git a/mybinder/templates/aws-ecr-registry-cleaner/deployment.yaml b/mybinder/templates/aws-ecr-registry-cleaner/deployment.yaml new file mode 100644 index 000000000..7a4ffda39 --- /dev/null +++ b/mybinder/templates/aws-ecr-registry-cleaner/deployment.yaml @@ -0,0 +1,44 @@ +{{- if .Values.awsEcrRegistryCleaner.enabled -}} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: aws-ecr-registry-cleaner + labels: + app: aws-ecr-registry-cleaner + component: aws-ecr-registry-cleaner + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} +spec: + # Never run more than one cleaner pod at a time + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: + app: aws-ecr-registry-cleaner + component: aws-ecr-registry-cleaner + release: {{ .Release.Name }} + template: + metadata: + labels: + app: aws-ecr-registry-cleaner + component: aws-ecr-registry-cleaner + release: {{ .Release.Name }} + spec: + containers: + - name: cleaner + image: {{ .Values.awsEcrRegistryCleaner.image }} + args: + - -expires-after-pull-days={{ .Values.awsEcrRegistryCleaner.expiresAfterPullDays }} + - -loop-delay={{ .Values.awsEcrRegistryCleaner.loopDelay }} + tolerations: + - effect: NoSchedule + key: hub.jupyter.org/dedicated + operator: Equal + value: user + - effect: NoSchedule + key: hub.jupyter.org_dedicated + operator: Equal + value: user + serviceAccountName: binderhub-ecr-registry-cleaner +{{- end }} diff --git a/mybinder/templates/aws-ecr-registry-cleaner/serviceaccount.yaml b/mybinder/templates/aws-ecr-registry-cleaner/serviceaccount.yaml new file mode 100644 index 000000000..2a74f7df5 --- /dev/null +++ b/mybinder/templates/aws-ecr-registry-cleaner/serviceaccount.yaml @@ -0,0 +1,12 @@ +{{- if .Values.awsEcrRegistryCleaner.enabled -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ .Values.awsEcrRegistryCleaner.serviceAccount.name }} + labels: + {{- include "binderhub-container-registry-helper.labels" . | nindent 4 }} + {{- with .Values.awsEcrRegistryCleaner.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/mybinder/templates/netpol.yaml b/mybinder/templates/netpol.yaml index b9cb73001..b2ab2ab92 100644 --- a/mybinder/templates/netpol.yaml +++ b/mybinder/templates/netpol.yaml @@ -39,6 +39,9 @@ spec: to: - ipBlock: cidr: 10.0.0.0/8 + - ipBlock: + # AWS EKS defaults to running DNS on 172.20.0.10 + cidr: 172.20.0.10/32 # allow access to the world, # but not the cluster - ports: diff --git a/mybinder/templates/priorityclass.yaml b/mybinder/templates/priorityclass.yaml new file mode 100644 index 000000000..2c3aa486c --- /dev/null +++ b/mybinder/templates/priorityclass.yaml @@ -0,0 +1,12 @@ +{{- range $name, $priority := .Values.priorityClasses }} +--- +apiVersion: scheduling.k8s.io/v1 +kind: PriorityClass +metadata: + name: {{ $name }} + labels: + heritage: {{ $.Release.Service }} + release: {{ $.Release.Name }} +value: {{ $priority }} +globalDefault: false +{{- end }} diff --git a/mybinder/values.yaml b/mybinder/values.yaml index 746262fbd..2a731928d 100644 --- a/mybinder/values.yaml +++ b/mybinder/values.yaml @@ -592,3 +592,20 @@ minesweeper: # cluster-autoscaler: enabled: false + +# Name:Priority pairs of priority classes to create +# https://kubernetes.io/blog/2023/01/12/protect-mission-critical-pods-priorityclass/ +priorityClasses: {} + +binderhub-container-registry-helper: + enabled: false + +awsEcrRegistryCleaner: + enabled: false + image: ghcr.io/manics/aws-ecr-registry-cleaner:0.0.1 + expiresAfterPullDays: 7 + # 12 hours + loopDelay: 43200 + serviceAccount: + name: binderhub-ecr-registry-cleaner + annotations: {} diff --git a/terraform/aws/curvenote/cni/README.md b/terraform/aws/curvenote/cni/README.md new file mode 100644 index 000000000..0f4f175e9 --- /dev/null +++ b/terraform/aws/curvenote/cni/README.md @@ -0,0 +1,20 @@ +# Enable NetworkPolicies on EKS + +EKS automatically installs the VPC CNI plugin, but by default NetworkPolicies are not enabled. + +1. Find the recommended version of the VPC CNI plugin + https://docs.aws.amazon.com/eks/latest/userguide/managing-vpc-cni.html +2. Download the VPC-CNI Kubernetes manifest, replacing `1.15.0` with the recommended version + ``` + curl -O https://raw.githubusercontent.com/aws/amazon-vpc-cni-k8s/v1.15.0/config/master/aws-k8s-cni.yaml + ``` +3. Edit `aws-k8s-cni.yaml`: + - Change all mentions of `us-west-2` to your region + - Update the manifest following the `kubectl` instructions in + https://docs.aws.amazon.com/eks/latest/userguide/cni-network-policy.html + - Add `enable-network-policy-controller: "true"` to the `aws-node` ConfigMap + - Set `--enable-network-policy=true` in the `aws-node` DaemonSet `aws-network-policy-agent` container +4. Apply: + ``` + kubectl apply -f cni/aws-k8s-cni.yaml + ``` diff --git a/terraform/aws/curvenote/cni/aws-k8s-cni-us-east-2.yaml b/terraform/aws/curvenote/cni/aws-k8s-cni-us-east-2.yaml new file mode 100644 index 000000000..f7886dc0d --- /dev/null +++ b/terraform/aws/curvenote/cni/aws-k8s-cni-us-east-2.yaml @@ -0,0 +1,564 @@ +--- +# Source: crds/customresourcedefinition.yaml +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: eniconfigs.crd.k8s.amazonaws.com +spec: + scope: Cluster + group: crd.k8s.amazonaws.com + preserveUnknownFields: false + versions: + - name: v1alpha1 + served: true + storage: true + schema: + openAPIV3Schema: + type: object + x-kubernetes-preserve-unknown-fields: true + names: + plural: eniconfigs + singular: eniconfig + kind: ENIConfig +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.11.3 + creationTimestamp: null + labels: + app.kubernetes.io/name: amazon-network-policy-controller-k8s + name: policyendpoints.networking.k8s.aws +spec: + group: networking.k8s.aws + names: + kind: PolicyEndpoint + listKind: PolicyEndpointList + plural: policyendpoints + singular: policyendpoint + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: PolicyEndpoint is the Schema for the policyendpoints API + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: PolicyEndpointSpec defines the desired state of PolicyEndpoint + properties: + egress: + description: Egress is the list of egress rules containing resolved + network addresses + items: + description: EndpointInfo defines the network endpoint information + for the policy ingress/egress + properties: + cidr: + description: CIDR is the network address(s) of the endpoint + type: string + except: + description: Except is the exceptions to the CIDR ranges mentioned + above. + items: + type: string + type: array + ports: + description: Ports is the list of ports + items: + description: Port contains information about the transport + port/protocol + properties: + endPort: + description: Endport specifies the port range port to + endPort port must be defined and an integer, endPort + > port + format: int32 + type: integer + port: + description: Port specifies the numerical port for the + protocol. If empty applies to all ports + format: int32 + type: integer + protocol: + default: TCP + description: Protocol specifies the transport protocol, + default TCP + type: string + type: object + type: array + required: + - cidr + type: object + type: array + ingress: + description: Ingress is the list of ingress rules containing resolved + network addresses + items: + description: EndpointInfo defines the network endpoint information + for the policy ingress/egress + properties: + cidr: + description: CIDR is the network address(s) of the endpoint + type: string + except: + description: Except is the exceptions to the CIDR ranges mentioned + above. + items: + type: string + type: array + ports: + description: Ports is the list of ports + items: + description: Port contains information about the transport + port/protocol + properties: + endPort: + description: Endport specifies the port range port to + endPort port must be defined and an integer, endPort + > port + format: int32 + type: integer + port: + description: Port specifies the numerical port for the + protocol. If empty applies to all ports + format: int32 + type: integer + protocol: + default: TCP + description: Protocol specifies the transport protocol, + default TCP + type: string + type: object + type: array + required: + - cidr + type: object + type: array + podIsolation: + description: PodIsolation specifies whether the pod needs to be isolated + for a particular traffic direction Ingress or Egress, or both. If + default isolation is not specified, and there are no ingress/egress + rules, then the pod is not isolated from the point of view of this + policy. This follows the NetworkPolicy spec.PolicyTypes. + items: + description: PolicyType string describes the NetworkPolicy type + This type is beta-level in 1.8 + type: string + type: array + podSelector: + description: PodSelector is the podSelector from the policy resource + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. + The requirements are ANDed. + items: + description: A label selector requirement is a selector that + contains values, a key, and an operator that relates the key + and values. + properties: + key: + description: key is the label key that the selector applies + to. + type: string + operator: + description: operator represents a key's relationship to + a set of values. Valid operators are In, NotIn, Exists + and DoesNotExist. + type: string + values: + description: values is an array of string values. If the + operator is In or NotIn, the values array must be non-empty. + If the operator is Exists or DoesNotExist, the values + array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. A single + {key,value} in the matchLabels map is equivalent to an element + of matchExpressions, whose key field is "key", the operator + is "In", and the values array contains only "value". The requirements + are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + podSelectorEndpoints: + description: PodSelectorEndpoints contains information about the pods + matching the podSelector + items: + description: PodEndpoint defines the summary information for the + pods + properties: + hostIP: + description: HostIP is the IP address of the host the pod is + currently running on + type: string + name: + description: Name is the pod name + type: string + namespace: + description: Namespace is the pod namespace + type: string + podIP: + description: PodIP is the IP address of the pod + type: string + required: + - hostIP + - name + - namespace + - podIP + type: object + type: array + policyRef: + description: PolicyRef is a reference to the Kubernetes NetworkPolicy + resource. + properties: + name: + description: Name is the name of the Policy + type: string + namespace: + description: Namespace is the namespace of the Policy + type: string + required: + - name + - namespace + type: object + required: + - policyRef + type: object + status: + description: PolicyEndpointStatus defines the observed state of PolicyEndpoint + type: object + type: object + served: true + storage: true + subresources: + status: {} +--- +# Source: aws-vpc-cni/templates/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: aws-node + namespace: kube-system + labels: + app.kubernetes.io/name: aws-node + app.kubernetes.io/instance: aws-vpc-cni + k8s-app: aws-node + app.kubernetes.io/version: "v1.15.0" +--- +# Source: aws-vpc-cni/templates/configmap.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: amazon-vpc-cni + namespace: kube-system + labels: + app.kubernetes.io/name: aws-node + app.kubernetes.io/instance: aws-vpc-cni + k8s-app: aws-node + app.kubernetes.io/version: "v1.15.0" +data: + enable-windows-ipam: "false" + enable-network-policy-controller: "true" +--- +# Source: aws-vpc-cni/templates/clusterrole.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: aws-node + labels: + app.kubernetes.io/name: aws-node + app.kubernetes.io/instance: aws-vpc-cni + k8s-app: aws-node + app.kubernetes.io/version: "v1.15.0" +rules: + - apiGroups: + - crd.k8s.amazonaws.com + resources: + - eniconfigs + verbs: ["list", "watch", "get"] + - apiGroups: [""] + resources: + - namespaces + verbs: ["list", "watch", "get"] + - apiGroups: [""] + resources: + - pods + verbs: ["list", "watch", "get"] + - apiGroups: [""] + resources: + - nodes + verbs: ["list", "watch", "get"] + - apiGroups: ["", "events.k8s.io"] + resources: + - events + verbs: ["create", "patch", "list"] + - apiGroups: ["networking.k8s.aws"] + resources: + - policyendpoints + verbs: ["get", "list", "watch"] + - apiGroups: ["networking.k8s.aws"] + resources: + - policyendpoints/status + verbs: ["get"] + - apiGroups: + - vpcresources.k8s.aws + resources: + - cninodes + verbs: ["get", "list", "patch"] +--- +# Source: aws-vpc-cni/templates/clusterrolebinding.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: aws-node + labels: + app.kubernetes.io/name: aws-node + app.kubernetes.io/instance: aws-vpc-cni + k8s-app: aws-node + app.kubernetes.io/version: "v1.15.0" +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: aws-node +subjects: + - kind: ServiceAccount + name: aws-node + namespace: kube-system +--- +# Source: aws-vpc-cni/templates/daemonset.yaml +kind: DaemonSet +apiVersion: apps/v1 +metadata: + name: aws-node + namespace: kube-system + labels: + app.kubernetes.io/name: aws-node + app.kubernetes.io/instance: aws-vpc-cni + k8s-app: aws-node + app.kubernetes.io/version: "v1.15.0" +spec: + updateStrategy: + rollingUpdate: + maxUnavailable: 10% + type: RollingUpdate + selector: + matchLabels: + k8s-app: aws-node + template: + metadata: + labels: + app.kubernetes.io/name: aws-node + app.kubernetes.io/instance: aws-vpc-cni + k8s-app: aws-node + spec: + priorityClassName: "system-node-critical" + serviceAccountName: aws-node + hostNetwork: true + initContainers: + - name: aws-vpc-cni-init + image: "602401143452.dkr.ecr.us-east-2.amazonaws.com/amazon-k8s-cni-init:v1.15.0" + env: + - name: DISABLE_TCP_EARLY_DEMUX + value: "false" + - name: ENABLE_IPv6 + value: "false" + securityContext: + privileged: true + resources: + requests: + cpu: 25m + volumeMounts: + - mountPath: /host/opt/cni/bin + name: cni-bin-dir + terminationGracePeriodSeconds: 10 + tolerations: + - operator: Exists + securityContext: + {} + containers: + - name: aws-node + image: "602401143452.dkr.ecr.us-east-2.amazonaws.com/amazon-k8s-cni:v1.15.0" + ports: + - containerPort: 61678 + name: metrics + livenessProbe: + exec: + command: + - /app/grpc-health-probe + - -addr=:50051 + - -connect-timeout=5s + - -rpc-timeout=5s + initialDelaySeconds: 60 + timeoutSeconds: 10 + readinessProbe: + exec: + command: + - /app/grpc-health-probe + - -addr=:50051 + - -connect-timeout=5s + - -rpc-timeout=5s + initialDelaySeconds: 1 + timeoutSeconds: 10 + env: + - name: ADDITIONAL_ENI_TAGS + value: "{}" + - name: AWS_VPC_CNI_NODE_PORT_SUPPORT + value: "true" + - name: AWS_VPC_ENI_MTU + value: "9001" + - name: AWS_VPC_K8S_CNI_CUSTOM_NETWORK_CFG + value: "false" + - name: AWS_VPC_K8S_CNI_EXTERNALSNAT + value: "false" + - name: AWS_VPC_K8S_CNI_LOGLEVEL + value: "DEBUG" + - name: AWS_VPC_K8S_CNI_LOG_FILE + value: "/host/var/log/aws-routed-eni/ipamd.log" + - name: AWS_VPC_K8S_CNI_RANDOMIZESNAT + value: "prng" + - name: AWS_VPC_K8S_CNI_VETHPREFIX + value: "eni" + - name: AWS_VPC_K8S_PLUGIN_LOG_FILE + value: "/var/log/aws-routed-eni/plugin.log" + - name: AWS_VPC_K8S_PLUGIN_LOG_LEVEL + value: "DEBUG" + - name: DISABLE_INTROSPECTION + value: "false" + - name: DISABLE_METRICS + value: "false" + - name: DISABLE_NETWORK_RESOURCE_PROVISIONING + value: "false" + - name: ENABLE_IPv4 + value: "true" + - name: ENABLE_IPv6 + value: "false" + - name: ENABLE_POD_ENI + value: "false" + - name: ENABLE_PREFIX_DELEGATION + value: "false" + - name: WARM_ENI_TARGET + value: "1" + - name: WARM_PREFIX_TARGET + value: "1" + - name: MY_NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + - name: MY_POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + resources: + requests: + cpu: 25m + securityContext: + capabilities: + add: + - NET_ADMIN + - NET_RAW + volumeMounts: + - mountPath: /host/opt/cni/bin + name: cni-bin-dir + - mountPath: /host/etc/cni/net.d + name: cni-net-dir + - mountPath: /host/var/log/aws-routed-eni + name: log-dir + - mountPath: /var/run/aws-node + name: run-dir + - mountPath: /run/xtables.lock + name: xtables-lock + - name: aws-eks-nodeagent + image: "602401143452.dkr.ecr.us-east-2.amazonaws.com/amazon/aws-network-policy-agent:v1.0.2" + env: + - name: MY_NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + args: + - --enable-ipv6=false + - --enable-network-policy=true + - --enable-cloudwatch-logs=false + - --metrics-bind-addr=:8162 + - --health-probe-bind-addr=:8163 + resources: + requests: + cpu: 25m + securityContext: + capabilities: + add: + - NET_ADMIN + privileged: true + volumeMounts: + - mountPath: /host/opt/cni/bin + name: cni-bin-dir + - mountPath: /sys/fs/bpf + name: bpf-pin-path + - mountPath: /var/log/aws-routed-eni + name: log-dir + - mountPath: /var/run/aws-node + name: run-dir + volumes: + - name: bpf-pin-path + hostPath: + path: /sys/fs/bpf + - name: cni-bin-dir + hostPath: + path: /opt/cni/bin + - name: cni-net-dir + hostPath: + path: /etc/cni/net.d + - name: log-dir + hostPath: + path: /var/log/aws-routed-eni + type: DirectoryOrCreate + - name: run-dir + hostPath: + path: /var/run/aws-node + type: DirectoryOrCreate + - name: xtables-lock + hostPath: + path: /run/xtables.lock + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/os + operator: In + values: + - linux + - key: kubernetes.io/arch + operator: In + values: + - amd64 + - arm64 + - key: eks.amazonaws.com/compute-type + operator: NotIn + values: + - fargate