From bf3e7428ad13f2caa4cd8b99c5a6d088a9e3ab8b Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Mon, 18 Nov 2024 10:24:02 +0000 Subject: [PATCH 1/5] nasa-cryo: create hub-specific nodegroups --- eksctl/nasa-cryo.jsonnet | 60 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 56 insertions(+), 4 deletions(-) diff --git a/eksctl/nasa-cryo.jsonnet b/eksctl/nasa-cryo.jsonnet index ae73e14970..4fbcb737c0 100644 --- a/eksctl/nasa-cryo.jsonnet +++ b/eksctl/nasa-cryo.jsonnet @@ -25,13 +25,65 @@ local nodeAz = "us-west-2a"; // A `node.kubernetes.io/instance-type label is added, so pods // can request a particular kind of node with a nodeSelector local notebookNodes = [ - { instanceType: "r5.xlarge", nameSuffix: "b" }, - { instanceType: "r5.4xlarge" }, // FIXME: tainted, to be deleted when empty, replaced by equivalent during k8s upgrade - { instanceType: "r5.4xlarge", nameSuffix: "b" }, - { instanceType: "r5.16xlarge" }, + { instanceType: "r5.xlarge", nameSuffix: "b" }, // FIXME: tainted + { + instanceType: "r5.xlarge", + namePrefix: "nb-staging", + labels+: { "2i2c/hub-name": "staging" }, + tags+: { "2i2c:hub-name": "staging" }, + }, + { + instanceType: "r5.xlarge", + namePrefix: "nb-prod", + labels+: { "2i2c/hub-name": "prod" }, + tags+: { "2i2c:hub-name": "prod" }, + }, + { instanceType: "r5.4xlarge" }, // FIXME: tainted + { + instanceType: "r5.4xlarge", + namePrefix: "nb-staging", + labels+: { "2i2c/hub-name": "staging" }, + tags+: { "2i2c:hub-name": "staging" }, + }, + { + instanceType: "r5.4xlarge", + namePrefix: "nb-prod", + labels+: { "2i2c/hub-name": "prod" }, + tags+: { "2i2c:hub-name": "prod" }, + }, + { + instanceType: "r5.16xlarge", + namePrefix: "nb-staging", + labels+: { "2i2c/hub-name": "staging" }, + tags+: { "2i2c:hub-name": "staging" }, + }, + { + instanceType: "r5.16xlarge", + namePrefix: "nb-prod", + labels+: { "2i2c/hub-name": "prod" }, + tags+: { "2i2c:hub-name": "prod" }, + }, + { + instanceType: "g4dn.xlarge", + namePrefix: "staging", + labels+: { "2i2c/hub-name": "staging" }, + tags+: { + "2i2c:hub-name": "staging", + "k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu": "1" + }, + taints+: { + "nvidia.com/gpu": "present:NoSchedule" + }, + // Allow provisioning GPUs across all AZs, to prevent situation where all + // GPUs in a single AZ are in use and no new nodes can be spawned + availabilityZones: masterAzs, + }, { instanceType: "g4dn.xlarge", + namePrefix: "prod", + labels+: { "2i2c/hub-name": "prod" }, tags+: { + "2i2c:hub-name": "prod", "k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu": "1" }, taints+: { From 70dd3059bc4f9d59fcc829f04ecdf3d01a951aad Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Mon, 18 Nov 2024 10:24:22 +0000 Subject: [PATCH 2/5] nasa-cryo: add node-purpose tags --- eksctl/nasa-cryo.jsonnet | 2 ++ 1 file changed, 2 insertions(+) diff --git a/eksctl/nasa-cryo.jsonnet b/eksctl/nasa-cryo.jsonnet index 4fbcb737c0..8894de5180 100644 --- a/eksctl/nasa-cryo.jsonnet +++ b/eksctl/nasa-cryo.jsonnet @@ -160,6 +160,7 @@ local daskNodes = [ "hub.jupyter.org/node-purpose": "core", "k8s.dask.org/node-purpose": "core" }, + tags+: { "2i2c:node-purpose": "core" }, }, ] + [ ng + { @@ -175,6 +176,7 @@ local daskNodes = [ "hub.jupyter.org/node-purpose": "user", "k8s.dask.org/node-purpose": "scheduler" }, + tags+: { "2i2c:node-purpose": "user" }, taints+: { "hub.jupyter.org_dedicated": "user:NoSchedule", "hub.jupyter.org/dedicated": "user:NoSchedule" From 6d72fb837f73077589d4771f83e1a2a506eae925 Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Mon, 18 Nov 2024 10:24:37 +0000 Subject: [PATCH 3/5] nasa-cryo: cycle core nodegroup --- eksctl/nasa-cryo.jsonnet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eksctl/nasa-cryo.jsonnet b/eksctl/nasa-cryo.jsonnet index 8894de5180..062a4307d0 100644 --- a/eksctl/nasa-cryo.jsonnet +++ b/eksctl/nasa-cryo.jsonnet @@ -147,7 +147,7 @@ local daskNodes = [ [ ng + { namePrefix: 'core', - nameSuffix: 'b', + nameSuffix: 'a', nameIncludeInstanceType: false, availabilityZones: [nodeAz], ssh: { From e4fd28d9450632bdeb8c0e32ce5b1f061c57702a Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Mon, 18 Nov 2024 10:26:15 +0000 Subject: [PATCH 4/5] nasa-cryo: remove tainted nodegroup --- eksctl/nasa-cryo.jsonnet | 1 - 1 file changed, 1 deletion(-) diff --git a/eksctl/nasa-cryo.jsonnet b/eksctl/nasa-cryo.jsonnet index 062a4307d0..5046edac8b 100644 --- a/eksctl/nasa-cryo.jsonnet +++ b/eksctl/nasa-cryo.jsonnet @@ -38,7 +38,6 @@ local notebookNodes = [ labels+: { "2i2c/hub-name": "prod" }, tags+: { "2i2c:hub-name": "prod" }, }, - { instanceType: "r5.4xlarge" }, // FIXME: tainted { instanceType: "r5.4xlarge", namePrefix: "nb-staging", From c33f9564d598d2cc8a084b3c3c480f16229fa645 Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Mon, 18 Nov 2024 10:29:28 +0000 Subject: [PATCH 5/5] nasa-cryo: add node selectors --- config/clusters/nasa-cryo/prod.values.yaml | 2 ++ config/clusters/nasa-cryo/staging.values.yaml | 2 ++ 2 files changed, 4 insertions(+) diff --git a/config/clusters/nasa-cryo/prod.values.yaml b/config/clusters/nasa-cryo/prod.values.yaml index d7194fe49d..bed8f3c37d 100644 --- a/config/clusters/nasa-cryo/prod.values.yaml +++ b/config/clusters/nasa-cryo/prod.values.yaml @@ -13,6 +13,8 @@ basehub: GitHubOAuthenticator: oauth_callback_url: https://hub.cryointhecloud.com/hub/oauth_callback singleuser: + nodeSelector: + 2i2c/hub-name: prod extraEnv: SCRATCH_BUCKET: s3://nasa-cryo-scratch/$(JUPYTERHUB_USER) PANGEO_SCRATCH: s3://nasa-cryo-scratch/$(JUPYTERHUB_USER) diff --git a/config/clusters/nasa-cryo/staging.values.yaml b/config/clusters/nasa-cryo/staging.values.yaml index c4ed1a5da6..c4f7565a04 100644 --- a/config/clusters/nasa-cryo/staging.values.yaml +++ b/config/clusters/nasa-cryo/staging.values.yaml @@ -13,6 +13,8 @@ basehub: GitHubOAuthenticator: oauth_callback_url: https://staging.hub.cryointhecloud.com/hub/oauth_callback singleuser: + nodeSelector: + 2i2c/hub-name: staging extraEnv: SCRATCH_BUCKET: s3://nasa-cryo-scratch-staging/$(JUPYTERHUB_USER) PANGEO_SCRATCH: s3://nasa-cryo-scratch-staging/$(JUPYTERHUB_USER)