diff --git a/config/clusters/nasa-cryo/prod.values.yaml b/config/clusters/nasa-cryo/prod.values.yaml index d7194fe49..bed8f3c37 100644 --- a/config/clusters/nasa-cryo/prod.values.yaml +++ b/config/clusters/nasa-cryo/prod.values.yaml @@ -13,6 +13,8 @@ basehub: GitHubOAuthenticator: oauth_callback_url: https://hub.cryointhecloud.com/hub/oauth_callback singleuser: + nodeSelector: + 2i2c/hub-name: prod extraEnv: SCRATCH_BUCKET: s3://nasa-cryo-scratch/$(JUPYTERHUB_USER) PANGEO_SCRATCH: s3://nasa-cryo-scratch/$(JUPYTERHUB_USER) diff --git a/config/clusters/nasa-cryo/staging.values.yaml b/config/clusters/nasa-cryo/staging.values.yaml index c4ed1a5da..c4f7565a0 100644 --- a/config/clusters/nasa-cryo/staging.values.yaml +++ b/config/clusters/nasa-cryo/staging.values.yaml @@ -13,6 +13,8 @@ basehub: GitHubOAuthenticator: oauth_callback_url: https://staging.hub.cryointhecloud.com/hub/oauth_callback singleuser: + nodeSelector: + 2i2c/hub-name: staging extraEnv: SCRATCH_BUCKET: s3://nasa-cryo-scratch-staging/$(JUPYTERHUB_USER) PANGEO_SCRATCH: s3://nasa-cryo-scratch-staging/$(JUPYTERHUB_USER) diff --git a/eksctl/nasa-cryo.jsonnet b/eksctl/nasa-cryo.jsonnet index ae73e1497..5046edac8 100644 --- a/eksctl/nasa-cryo.jsonnet +++ b/eksctl/nasa-cryo.jsonnet @@ -25,13 +25,64 @@ local nodeAz = "us-west-2a"; // A `node.kubernetes.io/instance-type label is added, so pods // can request a particular kind of node with a nodeSelector local notebookNodes = [ - { instanceType: "r5.xlarge", nameSuffix: "b" }, - { instanceType: "r5.4xlarge" }, // FIXME: tainted, to be deleted when empty, replaced by equivalent during k8s upgrade - { instanceType: "r5.4xlarge", nameSuffix: "b" }, - { instanceType: "r5.16xlarge" }, + { instanceType: "r5.xlarge", nameSuffix: "b" }, // FIXME: tainted + { + instanceType: "r5.xlarge", + namePrefix: "nb-staging", + labels+: { "2i2c/hub-name": "staging" }, + tags+: { "2i2c:hub-name": "staging" }, + }, + { + instanceType: "r5.xlarge", + namePrefix: "nb-prod", + labels+: { "2i2c/hub-name": "prod" }, + tags+: { "2i2c:hub-name": "prod" }, + }, + { + instanceType: "r5.4xlarge", + namePrefix: "nb-staging", + labels+: { "2i2c/hub-name": "staging" }, + tags+: { "2i2c:hub-name": "staging" }, + }, + { + instanceType: "r5.4xlarge", + namePrefix: "nb-prod", + labels+: { "2i2c/hub-name": "prod" }, + tags+: { "2i2c:hub-name": "prod" }, + }, + { + instanceType: "r5.16xlarge", + namePrefix: "nb-staging", + labels+: { "2i2c/hub-name": "staging" }, + tags+: { "2i2c:hub-name": "staging" }, + }, + { + instanceType: "r5.16xlarge", + namePrefix: "nb-prod", + labels+: { "2i2c/hub-name": "prod" }, + tags+: { "2i2c:hub-name": "prod" }, + }, + { + instanceType: "g4dn.xlarge", + namePrefix: "staging", + labels+: { "2i2c/hub-name": "staging" }, + tags+: { + "2i2c:hub-name": "staging", + "k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu": "1" + }, + taints+: { + "nvidia.com/gpu": "present:NoSchedule" + }, + // Allow provisioning GPUs across all AZs, to prevent situation where all + // GPUs in a single AZ are in use and no new nodes can be spawned + availabilityZones: masterAzs, + }, { instanceType: "g4dn.xlarge", + namePrefix: "prod", + labels+: { "2i2c/hub-name": "prod" }, tags+: { + "2i2c:hub-name": "prod", "k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu": "1" }, taints+: { @@ -95,7 +146,7 @@ local daskNodes = [ [ ng + { namePrefix: 'core', - nameSuffix: 'b', + nameSuffix: 'a', nameIncludeInstanceType: false, availabilityZones: [nodeAz], ssh: { @@ -108,6 +159,7 @@ local daskNodes = [ "hub.jupyter.org/node-purpose": "core", "k8s.dask.org/node-purpose": "core" }, + tags+: { "2i2c:node-purpose": "core" }, }, ] + [ ng + { @@ -123,6 +175,7 @@ local daskNodes = [ "hub.jupyter.org/node-purpose": "user", "k8s.dask.org/node-purpose": "scheduler" }, + tags+: { "2i2c:node-purpose": "user" }, taints+: { "hub.jupyter.org_dedicated": "user:NoSchedule", "hub.jupyter.org/dedicated": "user:NoSchedule"