From 6a85d25502c96553cda18de5afe8eec53340992e Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Tue, 19 Nov 2024 13:11:14 +0000 Subject: [PATCH 1/7] gridsst: upgrade k8s to 1.30 --- eksctl/gridsst.jsonnet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eksctl/gridsst.jsonnet b/eksctl/gridsst.jsonnet index 4dcbf2296..c45f36a74 100644 --- a/eksctl/gridsst.jsonnet +++ b/eksctl/gridsst.jsonnet @@ -73,7 +73,7 @@ local daskNodes = [ metadata+: { name: "gridsst", region: clusterRegion, - version: "1.29", + version: "1.30", tags+: { "ManagedBy": "2i2c", "2i2c.org/cluster-name": $.metadata.name, From b0d7f0e25371d59b06660167fb9e9446ef973cd9 Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Tue, 19 Nov 2024 13:11:46 +0000 Subject: [PATCH 2/7] gridsst: add hub-specific nodegroups --- eksctl/gridsst.jsonnet | 129 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 120 insertions(+), 9 deletions(-) diff --git a/eksctl/gridsst.jsonnet b/eksctl/gridsst.jsonnet index c45f36a74..5dba8e58e 100644 --- a/eksctl/gridsst.jsonnet +++ b/eksctl/gridsst.jsonnet @@ -31,16 +31,127 @@ local notebookNodes = [ # scale up: https://github.com/2i2c-org/infrastructure/pull/1836 # scale down: https://github.com/2i2c-org/infrastructure/pull/1844 # - { instanceType: "m5.large", minSize: 1 }, - { instanceType: "m5.xlarge", minSize: 0 }, - { instanceType: "m5.2xlarge", minSize: 0 }, - { instanceType: "m5.8xlarge", minSize: 0 }, - { instanceType: "r5.xlarge", minSize: 0 }, - { instanceType: "r5.4xlarge", minSize: 0 }, - { instanceType: "r5.16xlarge", minSize: 0 }, - { - instanceType: "g4dn.xlarge", minSize: 0, + { + instanceType: "m5.large", + minSize: 1, + namePrefix: "nb-staging", + labels+: { "2i2c/hub-name": "staging" }, + tags+: { "2i2c:hub-name": "staging }, + }, + { + instanceType: "m5.large", + minSize: 1, + namePrefix: "nb-prod", + labels+: { "2i2c/hub-name": "prod" }, + tags+: { "2i2c:hub-name": "prod }, + }, + { + instanceType: "m5.xlarge", + minSize: 0, + namePrefix: "nb-staging", + labels+: { "2i2c/hub-name": "staging" }, + tags+: { "2i2c:hub-name": "staging }, + }, + { + instanceType: "m5.xlarge", + minSize: 0, + namePrefix: "nb-prod", + labels+: { "2i2c/hub-name": "prod" }, + tags+: { "2i2c:hub-name": "prod }, + }, + { + instanceType: "m5.2xlarge", + minSize: 0, + namePrefix: "nb-staging", + labels+: { "2i2c/hub-name": "staging" }, + tags+: { "2i2c:hub-name": "staging }, + }, + { + instanceType: "m5.2xlarge", + minSize: 0, + namePrefix: "nb-prod", + labels+: { "2i2c/hub-name": "prod" }, + tags+: { "2i2c:hub-name": "prod }, + }, + { + instanceType: "m5.8xlarge", + minSize: 0, + namePrefix: "nb-staging", + labels+: { "2i2c/hub-name": "staging" }, + tags+: { "2i2c:hub-name": "staging }, + }, + { + instanceType: "m5.8xlarge", + minSize: 0, + namePrefix: "nb-prod", + labels+: { "2i2c/hub-name": "prod" }, + tags+: { "2i2c:hub-name": "prod }, + }, + { + instanceType: "r5.xlarge", + minSize: 0, + namePrefix: "nb-staging", + labels+: { "2i2c/hub-name": "staging" }, + tags+: { "2i2c:hub-name": "staging }, + }, + { + instanceType: "r5.xlarge", + minSize: 0, + namePrefix: "nb-prod", + labels+: { "2i2c/hub-name": "prod" }, + tags+: { "2i2c:hub-name": "prod }, + }, + { + instanceType: "r5.4xlarge", + minSize: 0, + namePrefix: "nb-staging", + labels+: { "2i2c/hub-name": "staging" }, + tags+: { "2i2c:hub-name": "staging }, + }, + { + instanceType: "r5.4xlarge", + minSize: 0, + namePrefix: "nb-prod", + labels+: { "2i2c/hub-name": "prod" }, + tags+: { "2i2c:hub-name": "prod }, + }, + { + instanceType: "r5.16xlarge", + minSize: 0, + namePrefix: "nb-staging", + labels+: { "2i2c/hub-name": "staging" }, + tags+: { "2i2c:hub-name": "staging }, + }, + { + instanceType: "r5.16xlarge", + minSize: 0, + namePrefix: "nb-prod", + labels+: { "2i2c/hub-name": "prod" }, + tags+: { "2i2c:hub-name": "prod }, + }, + { + instanceType: "g4dn.xlarge", + minSize: 0, + namePrefix: "nb-staging", + labels+: { "2i2c/hub-name": "staging" }, + tags+: { + "2i2c:hub-name": "staging", + "k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu": "1" + }, + taints+: { + "nvidia.com/gpu": "present:NoSchedule" + }, + // Allow provisioning GPUs across all AZs, to prevent situation where all + // GPUs in a single AZ are in use and no new nodes can be spawned + availabilityZones: masterAzs, + }, + { + instanceType: "g4dn.xlarge", + minSize: 0, + namePrefix: "nb-prod", + labels+: { "2i2c/hub-name": "prod" }, tags+: { + "2i2c:hub-name": "prod", "k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu": "1" }, taints+: { From 540fadb91a93f75c00e748a13fe351067ee0c63e Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Tue, 19 Nov 2024 13:12:02 +0000 Subject: [PATCH 3/7] gridsst: add node-purpose tags --- eksctl/gridsst.jsonnet | 2 ++ 1 file changed, 2 insertions(+) diff --git a/eksctl/gridsst.jsonnet b/eksctl/gridsst.jsonnet index 5dba8e58e..2854bfef5 100644 --- a/eksctl/gridsst.jsonnet +++ b/eksctl/gridsst.jsonnet @@ -260,6 +260,7 @@ local daskNodes = [ "hub.jupyter.org/node-purpose": "core", "k8s.dask.org/node-purpose": "core" }, + tags+: { "2i2c:node-purpose": "core" }, }, ] + [ ng + { @@ -275,6 +276,7 @@ local daskNodes = [ "hub.jupyter.org/node-purpose": "user", "k8s.dask.org/node-purpose": "scheduler" }, + tags+: { "2i2c:node-purpose": "user" }, taints+: { "hub.jupyter.org_dedicated": "user:NoSchedule", "hub.jupyter.org/dedicated": "user:NoSchedule" From 4ebab2dcfb5f7d8d555be6817755dd90eaeac10c Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Tue, 19 Nov 2024 13:12:11 +0000 Subject: [PATCH 4/7] gridsst: cycle core nodegroup --- eksctl/gridsst.jsonnet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eksctl/gridsst.jsonnet b/eksctl/gridsst.jsonnet index 2854bfef5..1eb6078e2 100644 --- a/eksctl/gridsst.jsonnet +++ b/eksctl/gridsst.jsonnet @@ -247,7 +247,7 @@ local daskNodes = [ [ ng + { namePrefix: 'core', - nameSuffix: 'a', + nameSuffix: 'b', nameIncludeInstanceType: false, availabilityZones: [nodeAz], ssh: { From ec51ba67e866d5c60e16c32139d06e15c27d4dbb Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Tue, 19 Nov 2024 13:12:59 +0000 Subject: [PATCH 5/7] gridsst: add node selectors --- config/clusters/gridsst/prod.values.yaml | 3 +++ config/clusters/gridsst/staging.values.yaml | 3 +++ 2 files changed, 6 insertions(+) diff --git a/config/clusters/gridsst/prod.values.yaml b/config/clusters/gridsst/prod.values.yaml index 2e9ce2a76..21ee4273e 100644 --- a/config/clusters/gridsst/prod.values.yaml +++ b/config/clusters/gridsst/prod.values.yaml @@ -12,3 +12,6 @@ basehub: config: GitHubOAuthenticator: oauth_callback_url: https://gridsst.2i2c.cloud/hub/oauth_callback + singleuser: + nodeSelector: + 2i2c/hub-name: prod diff --git a/config/clusters/gridsst/staging.values.yaml b/config/clusters/gridsst/staging.values.yaml index cdf619a81..a86783b55 100644 --- a/config/clusters/gridsst/staging.values.yaml +++ b/config/clusters/gridsst/staging.values.yaml @@ -12,3 +12,6 @@ basehub: config: GitHubOAuthenticator: oauth_callback_url: https://staging.gridsst.2i2c.cloud/hub/oauth_callback + singleuser: + nodeSelector: + 2i2c/hub-name: staging From faab2c35011f5c4216a5a6a7b0abf6ea51c54b00 Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Tue, 19 Nov 2024 13:15:47 +0000 Subject: [PATCH 6/7] gridsst: fix typos --- eksctl/gridsst.jsonnet | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/eksctl/gridsst.jsonnet b/eksctl/gridsst.jsonnet index 1eb6078e2..fc1fb6627 100644 --- a/eksctl/gridsst.jsonnet +++ b/eksctl/gridsst.jsonnet @@ -36,98 +36,98 @@ local notebookNodes = [ minSize: 1, namePrefix: "nb-staging", labels+: { "2i2c/hub-name": "staging" }, - tags+: { "2i2c:hub-name": "staging }, + tags+: { "2i2c:hub-name": "staging" }, }, { instanceType: "m5.large", minSize: 1, namePrefix: "nb-prod", labels+: { "2i2c/hub-name": "prod" }, - tags+: { "2i2c:hub-name": "prod }, + tags+: { "2i2c:hub-name": "prod" }, }, { instanceType: "m5.xlarge", minSize: 0, namePrefix: "nb-staging", labels+: { "2i2c/hub-name": "staging" }, - tags+: { "2i2c:hub-name": "staging }, + tags+: { "2i2c:hub-name": "staging" }, }, { instanceType: "m5.xlarge", minSize: 0, namePrefix: "nb-prod", labels+: { "2i2c/hub-name": "prod" }, - tags+: { "2i2c:hub-name": "prod }, + tags+: { "2i2c:hub-name": "prod" }, }, { instanceType: "m5.2xlarge", minSize: 0, namePrefix: "nb-staging", labels+: { "2i2c/hub-name": "staging" }, - tags+: { "2i2c:hub-name": "staging }, + tags+: { "2i2c:hub-name": "staging" }, }, { instanceType: "m5.2xlarge", minSize: 0, namePrefix: "nb-prod", labels+: { "2i2c/hub-name": "prod" }, - tags+: { "2i2c:hub-name": "prod }, + tags+: { "2i2c:hub-name": "prod" }, }, { instanceType: "m5.8xlarge", minSize: 0, namePrefix: "nb-staging", labels+: { "2i2c/hub-name": "staging" }, - tags+: { "2i2c:hub-name": "staging }, + tags+: { "2i2c:hub-name": "staging" }, }, { instanceType: "m5.8xlarge", minSize: 0, namePrefix: "nb-prod", labels+: { "2i2c/hub-name": "prod" }, - tags+: { "2i2c:hub-name": "prod }, + tags+: { "2i2c:hub-name": "prod" }, }, { instanceType: "r5.xlarge", minSize: 0, namePrefix: "nb-staging", labels+: { "2i2c/hub-name": "staging" }, - tags+: { "2i2c:hub-name": "staging }, + tags+: { "2i2c:hub-name": "staging" }, }, { instanceType: "r5.xlarge", minSize: 0, namePrefix: "nb-prod", labels+: { "2i2c/hub-name": "prod" }, - tags+: { "2i2c:hub-name": "prod }, + tags+: { "2i2c:hub-name": "prod" }, }, { instanceType: "r5.4xlarge", minSize: 0, namePrefix: "nb-staging", labels+: { "2i2c/hub-name": "staging" }, - tags+: { "2i2c:hub-name": "staging }, + tags+: { "2i2c:hub-name": "staging" }, }, { instanceType: "r5.4xlarge", minSize: 0, namePrefix: "nb-prod", labels+: { "2i2c/hub-name": "prod" }, - tags+: { "2i2c:hub-name": "prod }, + tags+: { "2i2c:hub-name": "prod" }, }, { instanceType: "r5.16xlarge", minSize: 0, namePrefix: "nb-staging", labels+: { "2i2c/hub-name": "staging" }, - tags+: { "2i2c:hub-name": "staging }, + tags+: { "2i2c:hub-name": "staging" }, }, { instanceType: "r5.16xlarge", minSize: 0, namePrefix: "nb-prod", labels+: { "2i2c/hub-name": "prod" }, - tags+: { "2i2c:hub-name": "prod }, + tags+: { "2i2c:hub-name": "prod" }, }, { instanceType: "g4dn.xlarge", From 29494b2ea296a0e4a160744d9c2aecaef93ead44 Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Tue, 19 Nov 2024 13:44:56 +0000 Subject: [PATCH 7/7] gridsst: set the minsize of the staging nodegroup to zero --- eksctl/gridsst.jsonnet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eksctl/gridsst.jsonnet b/eksctl/gridsst.jsonnet index fc1fb6627..33488487a 100644 --- a/eksctl/gridsst.jsonnet +++ b/eksctl/gridsst.jsonnet @@ -33,7 +33,7 @@ local notebookNodes = [ # { instanceType: "m5.large", - minSize: 1, + minSize: 0, namePrefix: "nb-staging", labels+: { "2i2c/hub-name": "staging" }, tags+: { "2i2c:hub-name": "staging" },