diff --git a/config/clusters/2i2c/dask-staging.values.yaml b/config/clusters/2i2c/dask-staging.values.yaml index b1a9f6ae1a..31e05e0dc0 100644 --- a/config/clusters/2i2c/dask-staging.values.yaml +++ b/config/clusters/2i2c/dask-staging.values.yaml @@ -11,12 +11,6 @@ basehub: hosts: - dask-staging.2i2c.cloud custom: - cloudResources: - provider: gcp - gcp: - projectId: two-eye-two-see - scratchBucket: - enabled: true 2i2c: add_staff_user_ids_to_admin_users: true add_staff_user_ids_of_type: "google" @@ -39,6 +33,9 @@ basehub: image: name: pangeo/pangeo-notebook tag: "latest" + extraEnv: + SCRATCH_BUCKET: gs://pilot-hubs-scratch-dask-staging/$(JUPYTERHUB_USER) + PANGEO_SCRATCH: gs://pilot-hubs-scratch-dask-staging/$(JUPYTERHUB_USER) hub: config: JupyterHub: diff --git a/config/clusters/2i2c/ohw.values.yaml b/config/clusters/2i2c/ohw.values.yaml index 52c9225471..10e6d0a75c 100644 --- a/config/clusters/2i2c/ohw.values.yaml +++ b/config/clusters/2i2c/ohw.values.yaml @@ -12,21 +12,9 @@ basehub: - oceanhackweek.2i2c.cloud singleuser: networkPolicy: - # In clusters with NetworkPolicy enabled, do not - # allow outbound internet access that's not DNS, HTTP or HTTPS # For OHW, we allow 8080 (for DAP) and 22 (for ssh) # https://github.com/2i2c-org/infrastructure/issues/549#issuecomment-892276020 - enabled: true egress: - - ports: - - port: 53 - protocol: UDP - - ports: - - port: 80 - protocol: TCP - - ports: - - port: 443 - protocol: TCP - ports: - port: 8080 protocol: TCP @@ -54,15 +42,11 @@ basehub: cpu_limit: 2 cpu_guarantee: 0.5 extraEnv: + SCRATCH_BUCKET: gs://pilot-hubs-scratch-ohw/$(JUPYTERHUB_USER) + PANGEO_SCRATCH: gs://pilot-hubs-scratch-ohw/$(JUPYTERHUB_USER) GH_SCOPED_CREDS_CLIENT_ID: "Iv1.9c20af442fad0d86" GH_SCOPED_CREDS_APP_URL: "https://github.com/apps/ohw-gh-scoped-creds-app" custom: - cloudResources: - provider: gcp - gcp: - projectId: two-eye-two-see - scratchBucket: - enabled: true 2i2c: add_staff_user_ids_to_admin_users: true add_staff_user_ids_of_type: "github" diff --git a/config/clusters/pangeo-hubs/coessing.values.yaml b/config/clusters/pangeo-hubs/coessing.values.yaml index fd165be81a..470f47ea95 100644 --- a/config/clusters/pangeo-hubs/coessing.values.yaml +++ b/config/clusters/pangeo-hubs/coessing.values.yaml @@ -4,9 +4,6 @@ basehub: iam.gke.io/gcp-service-account: pangeo-hubs-coessing@pangeo-integration-te-3eea.iam.gserviceaccount.com jupyterhub: custom: - cloudResources: - scratchBucket: - enabled: false 2i2c: add_staff_user_ids_to_admin_users: true add_staff_user_ids_of_type: "google" @@ -23,8 +20,8 @@ basehub: secretName: https-auto-tls singleuser: extraEnv: - SCRATCH_BUCKET: gcs://pangeo-hubs-coessing-scratch/$(JUPYTERHUB_USER) - PANGEO_SCRATCH: gcs://pangeo-hubs-coessing-scratch/$(JUPYTERHUB_USER) + SCRATCH_BUCKET: gs://pangeo-hubs-coessing-scratch/$(JUPYTERHUB_USER) + PANGEO_SCRATCH: gs://pangeo-hubs-coessing-scratch/$(JUPYTERHUB_USER) # No profiles profileList: null memory: diff --git a/config/clusters/pangeo-hubs/common.values.yaml b/config/clusters/pangeo-hubs/common.values.yaml index 8d5eaa3b39..4dcfb502a3 100644 --- a/config/clusters/pangeo-hubs/common.values.yaml +++ b/config/clusters/pangeo-hubs/common.values.yaml @@ -14,12 +14,6 @@ basehub: 2i2c: add_staff_user_ids_to_admin_users: true add_staff_user_ids_of_type: "github" - cloudResources: - provider: gcp - gcp: - projectId: pangeo-integration-te-3eea - scratchBucket: - enabled: true homepage: templateVars: org: diff --git a/config/clusters/pangeo-hubs/prod.values.yaml b/config/clusters/pangeo-hubs/prod.values.yaml index dc4bb6b17b..e7c4e47bba 100644 --- a/config/clusters/pangeo-hubs/prod.values.yaml +++ b/config/clusters/pangeo-hubs/prod.values.yaml @@ -12,3 +12,7 @@ basehub: config: GitHubOAuthenticator: oauth_callback_url: https://us-central1-b.gcp.pangeo.io/hub/oauth_callback + singleuser: + extraEnv: + SCRATCH_BUCKET: gs://pangeo-hubs-scratch/$(JUPYTERHUB_USER) + PANGEO_SCRATCH: gs://pangeo-hubs-scratch/$(JUPYTERHUB_USER) diff --git a/config/clusters/pangeo-hubs/staging.values.yaml b/config/clusters/pangeo-hubs/staging.values.yaml index 896f459268..03229944da 100644 --- a/config/clusters/pangeo-hubs/staging.values.yaml +++ b/config/clusters/pangeo-hubs/staging.values.yaml @@ -12,3 +12,7 @@ basehub: config: GitHubOAuthenticator: oauth_callback_url: https://staging.us-central1-b.gcp.pangeo.io/hub/oauth_callback + singleuser: + extraEnv: + SCRATCH_BUCKET: gs://pangeo-hubs-scratch-staging/$(JUPYTERHUB_USER) + PANGEO_SCRATCH: gs://pangeo-hubs-scratch-staging/$(JUPYTERHUB_USER) diff --git a/docs/topic/infrastructure/cluster-design.md b/docs/topic/infrastructure/cluster-design.md index 089cc63c5f..ce41013e38 100644 --- a/docs/topic/infrastructure/cluster-design.md +++ b/docs/topic/infrastructure/cluster-design.md @@ -134,29 +134,25 @@ to isolate them from each other. ## Cloud access credentials for hub users -For hub users to access cloud resources (like storage buckets), they will need -to be authorized via a [GCP ServiceAccount](https://cloud.google.com/iam/docs/service-accounts). -This is different from a [Kubernetes ServiceAccount](https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/), -which is used to authenticate and authorize access to kubernetes resources (like spawning pods). - -For dask hubs, we want to provide users with write access to at least one storage -bucket they can use for temporary data storage. User pods need to be given access to -a GCP ServiceAccount that has write permissions to this bucket. There are two ways -to do this: - -1. Provide appropriate permissions to the GCP ServiceAccount used by the node the user - pods are running on. When used with [Metadata Concealment](https://cloud.google.com/kubernetes-engine/docs/how-to/protecting-cluster-metadata#overview), - user pods can read / write from storage buckets. However, this grants the same permissions - to *all* pods on the cluster, and hence is unsuitable for clusters with multiple - hubs running for different organizations. - -2. Use the [GKE Cloud Config Connector](https://cloud.google.com/config-connector/docs/overview) to - create a GCP ServiceAccount + Storage Bucket for each hub via helm. This requires using - [Workload Identity](https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity) and - is incompatible with (1). This is required for multi-tenant clusters, since users on a hub - have much tighter scoped permissions. - -Long-term, (2) is the appropriate way to do this for everyone. However, it affects the size -of the core node pool, since it runs some components in the cluster. For now, we use (1) for -single-tenant clusters, and (2) for multi-tenant clusters. If nobody wants a scratch GCS bucket, -neither option is required. +For hub users to access cloud resources like storage buckets from their user +servers, they will need to have credentials from a cloud specific service +account - like a [GCP ServiceAccount]. + +Currently for practical reasons we only provision one cloud specific service +account per hub, which makes all users interaction be seen as a single user. +Note that providing for example two cloud service accounts, one for hub admin +users and one for non-admin users is by far an easier improvement than providing +one for each hub user. + +```{note} Technical notes +When we create a hub with access to a bucket, we create cloud provider specific +service account for the hub via `terraform`. We then also create a [Kubernetes +ServiceAccount] via the basehub chart's templates that references the cloud +specific service account via an annotation. When this Kubernetes ServiceAccount +is mounted to the hub's user server pods, a cloud specific controller ensures +the Pod gets credentials that can be exchanged for temporary credentials to the +cloud specific service account. + +[gcp serviceaccount]: https://cloud.google.com/iam/docs/service-accounts +[kubernetes serviceaccount]: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ +``` diff --git a/helm-charts/basehub/templates/cloud-resources/gcp/_helpers.tpl b/helm-charts/basehub/templates/cloud-resources/gcp/_helpers.tpl deleted file mode 100644 index 4e317cc625..0000000000 --- a/helm-charts/basehub/templates/cloud-resources/gcp/_helpers.tpl +++ /dev/null @@ -1,9 +0,0 @@ -{{- define "cloudResources.gcp.serviceAccountName" -}} -{{ .Release.Name }}-user-sa -{{- end }} - -{{- define "cloudResources.scratchBucket.name" -}} -{{- if eq .Values.jupyterhub.custom.cloudResources.provider "gcp" -}} -{{ .Values.jupyterhub.custom.cloudResources.gcp.projectId }}-{{ .Release.Name }}-scratch-bucket -{{- end }} -{{- end }} diff --git a/helm-charts/basehub/templates/cloud-resources/gcp/service-account.yaml b/helm-charts/basehub/templates/cloud-resources/gcp/service-account.yaml deleted file mode 100644 index 55e2b94908..0000000000 --- a/helm-charts/basehub/templates/cloud-resources/gcp/service-account.yaml +++ /dev/null @@ -1,40 +0,0 @@ -{{- if .Values.jupyterhub.custom.cloudResources.scratchBucket.enabled -}} -apiVersion: iam.cnrm.cloud.google.com/v1beta1 -kind: IAMServiceAccount -metadata: - name: {{ include "cloudResources.gcp.serviceAccountName" . }} - annotations: - cnrm.cloud.google.com/project-id : {{ .Values.jupyterhub.custom.cloudResources.gcp.projectId | quote }} -spec: - displayName: {{ .Release.Name }} hub user service account ---- -apiVersion: iam.cnrm.cloud.google.com/v1beta1 -kind: IAMPolicy -metadata: - name: workload-identity-binding - annotations: - cnrm.cloud.google.com/project-id : {{ .Values.jupyterhub.custom.cloudResources.gcp.projectId | quote }} -spec: - resourceRef: - apiVersion: iam.cnrm.cloud.google.com/v1beta1 - kind: IAMServiceAccount - name: {{ include "cloudResources.gcp.serviceAccountName" . }} - bindings: - - role: roles/iam.workloadIdentityUser - members: - - serviceAccount:{{ .Values.jupyterhub.custom.cloudResources.gcp.projectId }}.svc.id.goog[{{ .Release.Namespace }}/user-sa] ---- -apiVersion: iam.cnrm.cloud.google.com/v1beta1 -kind: IAMPolicyMember -metadata: - name: sa-requester-pays-binding - annotations: - cnrm.cloud.google.com/project-id : {{ .Values.jupyterhub.custom.cloudResources.gcp.projectId | quote }} -spec: - member: serviceAccount:{{ include "cloudResources.gcp.serviceAccountName" . }}@{{ .Values.jupyterhub.custom.cloudResources.gcp.projectId }}.iam.gserviceaccount.com - role: roles/serviceusage.serviceUsageConsumer - resourceRef: - apiVersion: resourcemanager.cnrm.cloud.google.com/v1beta1 - kind: Project - external: projects/{{ .Values.jupyterhub.custom.cloudResources.gcp.projectId }} -{{- end }} diff --git a/helm-charts/basehub/templates/cloud-resources/gcp/storage-bucket.yaml b/helm-charts/basehub/templates/cloud-resources/gcp/storage-bucket.yaml deleted file mode 100644 index 510df361fd..0000000000 --- a/helm-charts/basehub/templates/cloud-resources/gcp/storage-bucket.yaml +++ /dev/null @@ -1,34 +0,0 @@ -{{- if .Values.jupyterhub.custom.cloudResources.scratchBucket.enabled -}} -{{- if eq .Values.jupyterhub.custom.cloudResources.provider "gcp" -}} -apiVersion: storage.cnrm.cloud.google.com/v1beta1 -kind: StorageBucket -metadata: - annotations: - cnrm.cloud.google.com/project-id : {{ .Values.jupyterhub.custom.cloudResources.gcp.projectId | quote }} - cnrm.cloud.google.com/force-destroy: "false" - name: {{ include "cloudResources.scratchBucket.name" . }} -spec: - bucketPolicyOnly: true - lifecycleRule: - - action: - type: Delete - condition: - age: 7 ---- -apiVersion: iam.cnrm.cloud.google.com/v1beta1 -kind: IAMPolicyMember -metadata: - name: scratch-bucket-binding - annotations: - cnrm.cloud.google.com/project-id : {{ .Values.jupyterhub.custom.cloudResources.gcp.projectId | quote }} -spec: - member: serviceAccount:{{ include "cloudResources.gcp.serviceAccountName" . }}@{{ .Values.jupyterhub.custom.cloudResources.gcp.projectId }}.iam.gserviceaccount.com - # This gives users the ability to delete the bucket too :( - # But without this, I think you can't list objects in the bucket - role: roles/storage.admin - resourceRef: - apiVersion: storage.cnrm.cloud.google.com/v1beta1 - kind: StorageBucket - name: {{ include "cloudResources.scratchBucket.name" . }} -{{- end }} -{{- end }} diff --git a/helm-charts/basehub/values.schema.yaml b/helm-charts/basehub/values.schema.yaml index 1fdb6fe267..466a2a9936 100644 --- a/helm-charts/basehub/values.schema.yaml +++ b/helm-charts/basehub/values.schema.yaml @@ -284,7 +284,6 @@ properties: required: - singleuserAdmin - singleuser - - cloudResources - 2i2c - auth - jupyterhubConfigurator @@ -451,32 +450,6 @@ properties: additionalProperties: true extraEnv: type: object - cloudResources: - type: object - additionalProperties: false - required: - - provider - - gcp - - scratchBucket - properties: - provider: - enum: ["", gcp] - gcp: - type: object - additionalProperties: false - required: - - projectId - properties: - projectId: - type: string - scratchBucket: - type: object - additionalProperties: false - required: - - enabled - properties: - enabled: - type: boolean 2i2c: type: object additionalProperties: false diff --git a/helm-charts/basehub/values.yaml b/helm-charts/basehub/values.yaml index 2195ce12db..56e3ade80a 100644 --- a/helm-charts/basehub/values.yaml +++ b/helm-charts/basehub/values.yaml @@ -113,12 +113,6 @@ jupyterhub: - name: home mountPath: /home/rstudio/shared-readwrite subPath: _shared - cloudResources: - provider: "" - gcp: - projectId: "" - scratchBucket: - enabled: false 2i2c: # Should 2i2c engineering staff user IDs be injected to the admin_users # configuration of the JupyterHub's authenticator by our custom @@ -779,31 +773,7 @@ jupyterhub: return pod c.KubeSpawner.modify_pod_hook = modify_pod_hook - 03-cloud-storage-bucket: | - from z2jh import get_config - cloud_resources = get_config('custom.cloudResources') - scratch_bucket = cloud_resources['scratchBucket'] - import os - - if scratch_bucket['enabled']: - # FIXME: Support other providers too - assert cloud_resources['provider'] == 'gcp' - project_id = cloud_resources['gcp']['projectId'] - - release = os.environ['HELM_RELEASE_NAME'] - bucket_protocol = 'gcs' - bucket_name = f'{project_id}-{release}-scratch-bucket' - env = { - 'SCRATCH_BUCKET_PROTOCOL': bucket_protocol, - # Matches "daskhub.scratchBUcket.name" helm template - 'SCRATCH_BUCKET_NAME': bucket_name, - # Use k8s syntax of $(ENV_VAR) to substitute env vars dynamically in other env vars - 'SCRATCH_BUCKET': f'{bucket_protocol}://{bucket_name}/$(JUPYTERHUB_USER)', - 'PANGEO_SCRATCH': f'{bucket_protocol}://{bucket_name}/$(JUPYTERHUB_USER)', - } - - c.KubeSpawner.environment.update(env) - 04-2i2c-add-staff-user-ids-to-admin-users: | + 03-2i2c-add-staff-user-ids-to-admin-users: | from z2jh import get_config add_staff_user_ids_to_admin_users = get_config("custom.2i2c.add_staff_user_ids_to_admin_users", False) @@ -816,7 +786,7 @@ jupyterhub: staff_user_ids.extend(get_config("hub.config.Authenticator.admin_users", [])) c.Authenticator.admin_users = staff_user_ids - 05-per-user-disk: | + 04-per-user-disk: | # Optionally, create a PVC per user - useful for per-user databases from jupyterhub.utils import exponential_backoff from z2jh import get_config diff --git a/terraform/gcp/cluster.tf b/terraform/gcp/cluster.tf index d1a16f99ad..bc5c147c4e 100644 --- a/terraform/gcp/cluster.tf +++ b/terraform/gcp/cluster.tf @@ -300,6 +300,7 @@ resource "google_container_node_pool" "notebook" { workload_metadata_config { # Config Connector requires workload identity to be enabled (via GKE_METADATA_SERVER). + # Config Connector hasn't been used since March 2024, see https://github.com/2i2c-org/infrastructure/pull/3778. # If config connector is not necessary, we use simple metadata concealment # (https://cloud.google.com/kubernetes-engine/docs/how-to/protecting-cluster-metadata) # to expose the node CA to users safely. diff --git a/terraform/gcp/projects/pilot-hubs.tfvars b/terraform/gcp/projects/pilot-hubs.tfvars index 7473e425eb..6a60161ae5 100644 --- a/terraform/gcp/projects/pilot-hubs.tfvars +++ b/terraform/gcp/projects/pilot-hubs.tfvars @@ -49,19 +49,26 @@ dask_nodes = { }, } -user_buckets = {} +user_buckets = { + "scratch-dask-staging" : { + "delete_after" : 7, + }, + "scratch-ohw" : { + "delete_after" : 7, + }, +} hub_cloud_permissions = { "dask-staging" : { allow_access_to_external_requester_pays_buckets : true, - bucket_admin_access : [], - hub_namespace : "dask-staging" + bucket_admin_access : ["scratch-dask-staging"], + hub_namespace : "dask-staging", }, "ohw" : { allow_access_to_external_requester_pays_buckets : true, - bucket_admin_access : [], - hub_namespace : "ohw" + bucket_admin_access : ["scratch-ohw"], + hub_namespace : "ohw", }, }