Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New AWS cluster: BICAN #3840

Merged
merged 9 commits into from
Mar 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/deploy-grafana-dashboards.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ jobs:
- cluster_name: 2i2c-aws-us
- cluster_name: 2i2c-uk
- cluster_name: awi-ciroh
- cluster_name: bican
- cluster_name: catalystproject-africa
- cluster_name: catalystproject-latam
- cluster_name: cloudbank
Expand Down
12 changes: 12 additions & 0 deletions config/clusters/bican/cluster.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
name: bican
provider: aws # https://2i2c.awsapps.com/start#/
aws:
key: enc-deployer-credentials.secret.json
clusterType: eks
clusterName: bican
region: us-east-2
support:
helm_chart_values_files:
- support.values.yaml
- enc-support.secret.values.yaml
hubs: []
25 changes: 25 additions & 0 deletions config/clusters/bican/enc-deployer-credentials.secret.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"AccessKey": {
"AccessKeyId": "ENC[AES256_GCM,data:DeO+KCrRK5prA+MwdYH259ZoE30=,iv:wHbKxC+nj15O6Fk4W/RYxePl1ZIhN5IiTUotCCD8RU0=,tag:RW73gvM38ZVzM+uRxMNxeA==,type:str]",
"SecretAccessKey": "ENC[AES256_GCM,data:dWhFaB2jqM1/lN4T/vsLnUI4JZ2aEvKltHz1yELlHQYZocZwe9BuKQ==,iv:OaqLfhzgyJi+XlTxyNVwF7oofk2bxEL4w/vkqoIGBJw=,tag:4SUngOLKzXJKiU+RnvJB/A==,type:str]",
"UserName": "ENC[AES256_GCM,data:SWPTr2vK0g9duv/PJnGP6+yEhajuSHE=,iv:uzCudF/Jns0rkZeTFgF3TzfAdHj+uzIaRTddxjZkIQY=,tag:TicuD0Gy4DlS1cs3v+34eg==,type:str]"
},
"sops": {
"kms": null,
"gcp_kms": [
{
"resource_id": "projects/two-eye-two-see/locations/global/keyRings/sops-keys/cryptoKeys/similar-hubs",
"created_at": "2024-03-25T16:18:28Z",
"enc": "CiUA4OM7eI62pdzV0VlvcAweLwu3y+/mVg+D/IxPHr0JdwvDieADEkkAXoW3JvyeLJ3uoPtSm7/C3hjB9a+fehCUe/eA/2GgL4GCsWNWsCcIh8wftPsXPqVJn10DoUF/zjmiGaYTrxlsArVXBciGHip9"
}
],
"azure_kv": null,
"hc_vault": null,
"age": null,
"lastmodified": "2024-03-25T16:18:28Z",
"mac": "ENC[AES256_GCM,data:FE0dsSHtAjhgJjtUrHhax/zP7Rb6E50XjN25kFOFmDBos5He7R5F1ibIZ+iy/WAu3BRgs1IZIc+egfK1GHihvvmmAv1gPGf/KCjtxHmVm2AWRSzht76Z54Lp65z9yLwh7nvTgyI/KdeVgmfFQvbcZsdMugg+s5JnRZ1CKdcSLM4=,iv:xzSgeWcIqZJ+S1VvcX4V3Baeve3HbPL6YDClCn2UFPY=,tag:tfVCPiUb8Jg0yYZNqRejig==,type:str]",
"pgp": null,
"unencrypted_suffix": "_unencrypted",
"version": "3.8.1"
}
}
15 changes: 15 additions & 0 deletions config/clusters/bican/enc-grafana-token.secret.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
grafana_token: ENC[AES256_GCM,data:ycdgNGt3VqcyJGKh4F51r1E9fn53GIHhOXwF5/mCoMeGgrWZg9qNcrgpUyAzmg==,iv:qZ8j+D2xJuEd2p+1pv7KYm1txAAiOB2RwqIP8ErCzM0=,tag:ujAmi9YozXMiux9yU20OHQ==,type:str]
sops:
kms: []
gcp_kms:
- resource_id: projects/two-eye-two-see/locations/global/keyRings/sops-keys/cryptoKeys/similar-hubs
created_at: "2024-03-25T17:00:00Z"
enc: CiUA4OM7eOhriXoULTcORAf79SP0MiE+/8E7mpYi/Fld/7pHr/64EkkAXoW3JiccZbzRvAC1iWH3vgaunmsL7V8GS8bSYJpR7XoFSSyXB5aQijOObgcQGEIAAH7L3MIJrOxV4eW9f3t+fN9I1QWGy1+6
azure_kv: []
hc_vault: []
age: []
lastmodified: "2024-03-25T17:00:00Z"
mac: ENC[AES256_GCM,data:n7XUcuVcfgX37zg+ZLt5w8p9Ct5605laTBeaK809+UktFoUokByaFB5+x6/S+OZN08Q1A+fgF2E6TXNUavN+qgUNOiNal5FvQZr9HKPofgDPIWJi72851Fy1Q6rVYou7iSPC3f1oHMfh5ilPR0GmaHjQny4W+r3p88rLO6dtP/M=,iv:qSeOy1AKNAQt2hozBNGeeSCRW6JvXz2oB4IVd9uWfzo=,tag:7y8GOuq03ZI7LlTQdWpPqQ==,type:str]
pgp: []
unencrypted_suffix: _unencrypted
version: 3.8.1
22 changes: 22 additions & 0 deletions config/clusters/bican/enc-support.secret.values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
prometheusIngressAuthSecret:
username: ENC[AES256_GCM,data:GzcSEBqXXvY2bxnAHq7NFM8T7OnkVdJqNdMHV6P57kjwpvQGhTBI0upMzYuWejD8OVpYgK+gywNUwrUIsIw+kw==,iv:coBbRSe2gdi8vqV5uaWDBm9oS16hEZR4Je3cSD/E7tE=,tag:4563alICoKwoXbok/h3/mQ==,type:str]
password: ENC[AES256_GCM,data:tp/AfO5x4oFV+9YiO42uyyAeJRGFPbJHB2Gtak7n77fWZuA+g1w2mpjG9UKoPcTmJXp6pv79+Clc03CeqNfXPQ==,iv:rzIYpQ4YI66CWDwm/Lc3EIxkJrz1mp+J/+SYtVZQK8k=,tag:Ho7pAmyS5fmDkqK22WvDOQ==,type:str]
grafana:
grafana.ini:
auth.github:
client_id: ENC[AES256_GCM,data:r0girp4lkSsFJxT9RYkNqATagK0=,iv:rUF0925+HkU4d3xRbsgcq8az+aLSgW2kbNGg06ZR504=,tag:/kZxOX/ZNRrfA62vdAWIZg==,type:str]
client_secret: ENC[AES256_GCM,data:EysT8taieKuhc76qFFqUo9qxoeYxofRcOo1hLrDQ4aBsKH6zIOAH5Q==,iv:31LdaFhwg7Kkn1DvyFxnUFoWDRBYlchwMeFdm819uc0=,tag:aKt+2mdtcA4uO6yiJFsPXw==,type:str]
sops:
kms: []
gcp_kms:
- resource_id: projects/two-eye-two-see/locations/global/keyRings/sops-keys/cryptoKeys/similar-hubs
created_at: "2024-03-25T16:58:36Z"
enc: CiUA4OM7eMjNppdOUODfRkFuLa6+B4w/pOw4QFFBwysbGYfDrxVhEkkAXoW3JnUAhPZeVwdvizCgUdBQpIH4O4V7SqdCmNYy+chPbvDkiyhOefoVVg7FrU4nHr9xSwhCsXVO16GOOdEukbrxL9YUiD7O
azure_kv: []
hc_vault: []
age: []
lastmodified: "2024-03-25T16:58:36Z"
mac: ENC[AES256_GCM,data:aLnsOYFwKqn97RsoHcbeoBdbQMV3djowqgaN1xWsPMhMEoyEFt4AyoFe8ZOrD2LFfPdCS7C9liGv93jkLY9HuIsoCtELxFUdTQzClBU7rzqspMDA+XQy4hGSJ+0mhg/JaxtBZ9NBuy19m2pBGM55V1nZPVzZNSTxWi/9EtGQDOY=,iv:jtdvEWfW3gqeLl/duh+vGIgFrCzWa+sBHwlcWRAmEJo=,tag:gVrCUxenY1REHnjepCY4lQ==,type:str]
pgp: []
unencrypted_suffix: _unencrypted
version: 3.8.1
34 changes: 34 additions & 0 deletions config/clusters/bican/support.values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
prometheusIngressAuthSecret:
enabled: true

prometheus:
server:
ingress:
enabled: true
hosts:
- prometheus.bican.2i2c.cloud
tls:
- secretName: prometheus-tls
hosts:
- prometheus.bican.2i2c.cloud

grafana:
grafana.ini:
server:
root_url: https://grafana.bican.2i2c.cloud/
auth.github:
enabled: true
allowed_organizations: 2i2c-org
ingress:
hosts:
- grafana.bican.2i2c.cloud
tls:
- secretName: grafana-tls
hosts:
- grafana.bican.2i2c.cloud

cluster-autoscaler:
enabled: true
autoDiscovery:
clusterName: bican
awsRegion: us-east-2
149 changes: 149 additions & 0 deletions eksctl/bican.jsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
/*
This file is a jsonnet template of a eksctl's cluster configuration file,
that is used with the eksctl CLI to both update and initialize an AWS EKS
based cluster.

This file has in turn been generated from eksctl/template.jsonnet which is
relevant to compare with for changes over time.

To use jsonnet to generate an eksctl configuration file from this, do:

jsonnet bican.jsonnet > bican.eksctl.yaml

References:
- https://eksctl.io/usage/schema/
*/
local ng = import "./libsonnet/nodegroup.jsonnet";

// place all cluster nodes here
local clusterRegion = "us-east-2";
local masterAzs = ["us-east-2a", "us-east-2b", "us-east-2c"];
local nodeAz = "us-east-2a";

// Node definitions for notebook nodes. Config here is merged
// with our notebook node definition.
// A `node.kubernetes.io/instance-type label is added, so pods
// can request a particular kind of node with a nodeSelector
local notebookNodes = [
{ instanceType: "r5.xlarge" },
{ instanceType: "r5.4xlarge" },
{ instanceType: "r5.16xlarge" },
{
instanceType: "g4dn.xlarge",
tags+: {
"k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu": "1"
},
// Allow provisioning GPUs across all AZs, to prevent situation where all
// GPUs in a single AZ are in use and no new nodes can be spawned
availabilityZones: masterAzs,
},
];
local daskNodes = [
// Node definitions for dask worker nodes. Config here is merged
// with our dask worker node definition, which uses spot instances.
// A `node.kubernetes.io/instance-type label is set to the name of the
// *first* item in instanceDistribution.instanceTypes, to match
// what we do with notebook nodes. Pods can request a particular
// kind of node with a nodeSelector
//
// A not yet fully established policy is being developed about using a single
// node pool, see https://github.com/2i2c-org/infrastructure/issues/2687.
//
{ instancesDistribution+: { instanceTypes: ["r5.4xlarge"] }},
];


{
apiVersion: 'eksctl.io/v1alpha5',
kind: 'ClusterConfig',
metadata+: {
name: "bican",
region: clusterRegion,
version: "1.29",
},
availabilityZones: masterAzs,
iam: {
withOIDC: true,
},
// If you add an addon to this config, run the create addon command.
//
// eksctl create addon --config-file=bican.eksctl.yaml
//
addons: [
{
// aws-ebs-csi-driver ensures that our PVCs are bound to PVs that
// couple to AWS EBS based storage, without it expect to see pods
// mounting a PVC failing to schedule and PVC resources that are
// unbound.
//
// Related docs: https://docs.aws.amazon.com/eks/latest/userguide/managing-ebs-csi.html
//
name: 'aws-ebs-csi-driver',
version: "latest",
wellKnownPolicies: {
ebsCSIController: true,
},
},
],
nodeGroups: [
ng + {
namePrefix: 'core',
nameSuffix: 'a',
nameIncludeInstanceType: false,
availabilityZones: [nodeAz],
ssh: {
publicKeyPath: 'ssh-keys/bican.key.pub'
},
instanceType: "r5.xlarge",
minSize: 1,
maxSize: 6,
labels+: {
"hub.jupyter.org/node-purpose": "core",
"k8s.dask.org/node-purpose": "core"
},
},
] + [
ng + {
namePrefix: 'nb',
availabilityZones: [nodeAz],
minSize: 0,
maxSize: 500,
instanceType: n.instanceType,
ssh: {
publicKeyPath: 'ssh-keys/bican.key.pub'
},
labels+: {
"hub.jupyter.org/node-purpose": "user",
"k8s.dask.org/node-purpose": "scheduler"
},
taints+: {
"hub.jupyter.org_dedicated": "user:NoSchedule",
"hub.jupyter.org/dedicated": "user:NoSchedule"
},
} + n for n in notebookNodes
] + ( if daskNodes != null then
[
ng + {
namePrefix: 'dask',
availabilityZones: [nodeAz],
minSize: 0,
maxSize: 500,
ssh: {
publicKeyPath: 'ssh-keys/bican.key.pub'
},
labels+: {
"k8s.dask.org/node-purpose": "worker"
},
taints+: {
"k8s.dask.org_dedicated" : "worker:NoSchedule",
"k8s.dask.org/dedicated" : "worker:NoSchedule"
},
instancesDistribution+: {
onDemandBaseCapacity: 0,
onDemandPercentageAboveBaseCapacity: 0,
spotAllocationStrategy: "capacity-optimized",
},
} + n for n in daskNodes
] else []
)
}
1 change: 1 addition & 0 deletions eksctl/ssh-keys/bican.key.pub
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAPuAmunoNk11Qr1WCfRulycEvQkbyfziEUXKxR3mUNU [email protected]
21 changes: 21 additions & 0 deletions eksctl/ssh-keys/secret/bican.key
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"data": "ENC[AES256_GCM,data:kII7kHiQ1oc8b49pb09PPWCEvwy5Q4+RISUGuasdIL7+FcqLT2GW/U5KHhe68ciYoobeYMMZ0KGZIcUi7dVnOK7JuMVj17Xbhnjtlz0J1WE+j0USBRwf7KDXI2IYZ/HUim4PjaUzll7lc+VGJvHpc3gNYgAidO0l4V/wpYqzDG8Pg2vqnzeONBPgGi6JXgvC/xS7YmeTRK8lJL+AUYquFCnt0K7DKFS9qq//4fgfct4NF1EzZf1wvjdCT3eIcmGxkm9z/8wLorkvQmT9rq8lKFgOknkVxLFCYmu1xOF/U2Szp4xl1jf3DAt5MFZNEe1iOdLLYs5mGc0PExbJbBY7uTRZX3t1MwAn8F5Aw35gAfaKW2pk8Lj3woFWpevkA/oNctqAW4vcD4I/NepMENu0sFHMKKFZjZ/IzOeRESQJF2hKw8beV5wGZwQSPIa0Gs7caVSBm4fVspHjOGmMbR+gVcjNezZ4+1P56IC9QqaR2Fksx3Js39v8IRjkFQJSPd6KeboTdCif01PZTr4IFPC/G15mnc9iMedL+XxO,iv:jORG54pmCYaxjkiJBvEdqDatv7jPc+3Y/y3EXtBuuOg=,tag:5FRS/BDxyNmBn5uCM3NRHQ==,type:str]",
"sops": {
"kms": null,
"gcp_kms": [
{
"resource_id": "projects/two-eye-two-see/locations/global/keyRings/sops-keys/cryptoKeys/similar-hubs",
"created_at": "2024-03-25T15:49:18Z",
"enc": "CiUA4OM7eDXDtAIuBO2G1PQUO/+z63OzqWASErrdn4hJXKUwOAoaEkkAXoW3Jrow6VFVyfdvrPtVgv13BIJ0lStrIiu4RjceEh/FHH8J2V9Bdx4uVEm2wxNArKjLEEmwIO+TcA9+9bTQmQNh6B5ZwFZ8"
}
],
"azure_kv": null,
"hc_vault": null,
"age": null,
"lastmodified": "2024-03-25T15:49:18Z",
"mac": "ENC[AES256_GCM,data:yu9fpvR48VhcnfODFTxeEqhqyJ9+fYF1sVFzED/Ej22XL1Lz2zIPHZtIu/sv5fwr82vWo5ku5/Ld+MMR3xrVL5M1+unRpPEVJRXpUcknssug0X+rHQ4/PcemamElIF77Ra82ezxkMEgGak2vsDn5v7UqMPeGDqjwhKlqtXxRnb0=,iv:AoXpaQhhuCOCDirK+yA8xL4VijcbmFPBtHClATGBh+A=,tag:KH6Czg6OBNQQuboqIaXUYw==,type:str]",
"pgp": null,
"unencrypted_suffix": "_unencrypted",
"version": "3.8.1"
}
}
26 changes: 26 additions & 0 deletions terraform/aws/projects/bican.tfvars
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
region = "us-east-2"

cluster_name = "bican"

cluster_nodes_location = "us-east-2a"

user_buckets = {
"scratch-staging" : {
"delete_after" : 7
},
"scratch" : {
"delete_after" : 7
},
}


hub_cloud_permissions = {
"staging" : {
bucket_admin_access : ["scratch-staging"],
extra_iam_policy : ""
},
"prod" : {
bucket_admin_access : ["scratch"],
extra_iam_policy : ""
},
}