Skip to content

Commit

Permalink
Merge pull request #2083 from yuvipanda/nasa-veda
Browse files Browse the repository at this point in the history
NASA-Veda cluster and hub
  • Loading branch information
GeorgianaElena authored Feb 1, 2023
2 parents 0353288 + 3353e52 commit 98ca113
Show file tree
Hide file tree
Showing 19 changed files with 489 additions and 3 deletions.
1 change: 1 addition & 0 deletions .github/workflows/deploy-grafana-dashboards.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ jobs:
- cluster_name: victor
- cluster_name: 2i2c-aws-us
- cluster_name: ubc-eoas
- cluster_name: nasa-veda

steps:
- name: Checkout repo
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/deploy-hubs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ jobs:
failure_victor: "${{ env.failure_victor }}"
failure_2i2c-aws-us: "${{ env.failure_2i2c-aws-us }}"
failure_ubc-eoas: "${{ env.failure_ubc-eoas }}"
failure_nasa-veda: "${{ env.failure_nasa-veda }}"

# Only run this job on pushes to the default branch and when the job output is not
# an empty list
Expand Down
38 changes: 38 additions & 0 deletions config/clusters/nasa-veda/cluster.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
name: nasa-veda
provider: aws
aws:
key: enc-deployer-credentials.secret.json
clusterType: eks
clusterName: nasa-veda
region: us-west-2
support:
helm_chart_values_files:
- support.values.yaml
- enc-support.secret.values.yaml
hubs:
- name: staging
display_name: "NASA VEDA (staging)"
domain: staging.nasa-veda.2i2c.cloud
helm_chart: daskhub
auth0:
enabled: false
helm_chart_values_files:
# The order in which you list files here is the order the will be passed
# to the helm upgrade command in, and that has meaning. Please check
# that you intend for these files to be applied in this order.
- common.values.yaml
- staging.values.yaml
- enc-staging.secret.values.yaml
- name: prod
display_name: "NASA VEDA (prod)"
domain: nasa-veda.2i2c.cloud
helm_chart: daskhub
auth0:
enabled: false
helm_chart_values_files:
# The order in which you list files here is the order the will be passed
# to the helm upgrade command in, and that has meaning. Please check
# that you intend for these files to be applied in this order.
- common.values.yaml
- prod.values.yaml
- enc-prod.secret.values.yaml
60 changes: 60 additions & 0 deletions config/clusters/nasa-veda/common.values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
basehub:
nfs:
pv:
# from https://docs.aws.amazon.com/efs/latest/ug/mounting-fs-nfs-mount-settings.html
mountOptions:
- rsize=1048576
- wsize=1048576
- timeo=600
- soft # We pick soft over hard, so NFS lockups don't lead to hung processes
- retrans=2
- noresvport
serverIP: fs-029a8973da2b1ef5f.efs.us-west-2.amazonaws.com
baseShareName: /
jupyterhub:
prePuller:
continuous:
enabled: true
hook:
enabled: true
custom:
2i2c:
add_staff_user_ids_to_admin_users: true
add_staff_user_ids_of_type: "github"
homepage:
templateVars:
org:
name: "The Visualization, Exploration, and Data Analysis (VEDA) Project"
logo_url: https://www.earthdata.nasa.gov/s3fs-public/2022-08/veda_logo_lp.png
url: https://www.earthdata.nasa.gov/esds/veda
designed_by:
name: "2i2c"
url: https://2i2c.org
operated_by:
name: "2i2c"
url: https://2i2c.org
funded_by:
name: "TODO"
url: https://www.earthdata.nasa.gov/esds/veda
hub:
allowNamedServers: true
config:
Authenticator:
admin_users:
- abarciauskas-bgse
- freitagb
- j08lue
- rezuma
JupyterHub:
authenticator_class: github
GitHubOAuthenticator:
allowed_organizations:
- veda-analytics-access:all-users
- 2i2c-org:hub-access-for-2i2c-staff
scope:
- read:org
singleuser:
defaultUrl: /lab
image:
name: pangeo/pangeo-notebook
tag: "2023.01.13"
25 changes: 25 additions & 0 deletions config/clusters/nasa-veda/enc-deployer-credentials.secret.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"AccessKey": {
"AccessKeyId": "ENC[AES256_GCM,data:awdc4F+g01F2zN+7vXpZ4ZSNRT4=,iv:5qEvkF7aEXowUGpdihAZwqtvl/SbLovCeQG2IIloh2Y=,tag:kjaRIzu3b2OHxR7S+GxUdg==,type:str]",
"SecretAccessKey": "ENC[AES256_GCM,data:QfsLI4oberW76hqPiGrEfVdSnMF0VVfTJEGQiaD4bYDEDiqTmHHe0A==,iv:l29sKNS/EfLTvh+gJEuSyFGZuPoYbuW5XnMTODQVGIM=,tag:SlK/OqO5EmhdK4YEuyy5PA==,type:str]",
"UserName": "ENC[AES256_GCM,data:4PqxwNh26BqryQ7Dhym2e7uGhr18cPc=,iv:BCKSU7K7GdjqhXR3CrG5lRUyor+ARpfmWwAJaPudLzU=,tag:1bi49l6L8sOp7skJyGdrUg==,type:str]"
},
"sops": {
"kms": null,
"gcp_kms": [
{
"resource_id": "projects/two-eye-two-see/locations/global/keyRings/sops-keys/cryptoKeys/similar-hubs",
"created_at": "2023-01-23T21:31:58Z",
"enc": "CiUA4OM7eNugcTtc8ci6wDteuLRUrDcGb5N2gSK0cz5HcUWiu9qZEkkA+0T9hfgWb9SZseFTrVI9zTVi+jXzRDP4gHUVyA7ZlDBmrZOCL+uKfJ6WQhukMtf+4Kg8v5tqMx+4FNvwly6kVDpHFzTEogLz"
}
],
"azure_kv": null,
"hc_vault": null,
"age": null,
"lastmodified": "2023-01-23T21:31:58Z",
"mac": "ENC[AES256_GCM,data:7PWUYMH/y6ufgE/99mFVu1taoLbxkX8OWXQZlcFt6LALHsByBNUXOKsAqbUpqm+dQFbDqKpmhNaJ5DEBl9RASHiOQNJtd2nPy+41SM8SpMAg+yWyYvpk5B1piqv7mKaGIRmyP47lpDngAgDQtGbHEZGj+e4wGtytwE6F3ZmHTd4=,iv:CvUosZnPEVWzF9iJhbVhDGjdvnqSSL/uyvWr2ws01sg=,tag:w/D67PtkreepKXAl3/6sGA==,type:str]",
"pgp": null,
"unencrypted_suffix": "_unencrypted",
"version": "3.7.3"
}
}
15 changes: 15 additions & 0 deletions config/clusters/nasa-veda/enc-grafana-token.secret.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
grafana_token: ENC[AES256_GCM,data:b3/JtIGNkIJhk7CPbKZV0DOO1xiBEMEmJQ7mZtXJWGvi+vHqd7RiDdk1ohaqhw==,iv:vrLtA/5QdU0tKRtiRef5MgtXnv/Fv2zsenTWom3LIbU=,tag:m9k0rK6SIcbjN54g+5WJRA==,type:str]
sops:
kms: []
gcp_kms:
- resource_id: projects/two-eye-two-see/locations/global/keyRings/sops-keys/cryptoKeys/similar-hubs
created_at: "2023-01-26T15:06:25Z"
enc: CiUA4OM7eBm4d/vDAZ621HhIsfEkXZVDnEIUWyxsogMfrhfTuG6oEkkA+0T9hVuBSbt2GAwg2RLtE5U2ZWhUE3QyblPCcEN2Y/EP2GQaBzDAw5yZ8MGXV7SYtjMjcReOdMENg6P+QC9T9NEuNBvXSvqI
azure_kv: []
hc_vault: []
age: []
lastmodified: "2023-01-26T15:06:25Z"
mac: ENC[AES256_GCM,data:4/ksvtJugnsABvCAADyw2BpE6oGCeU5a0WD+PZUSb5YRa1iJMrJg7WyX5FRvtaBhfBIDUK5S5Z5ZQvUNM1+YsPoA3wavXvOjIXRu95BnBcZUYzqICQRhB9dBgztlzKIH5wSDVE/89bwy//05ml7zSOyWBwnXUgpeH9Pm3CB5X7I=,iv:+JJ48BLtWv6xRLjiqxNwbwRuWEk1oOXT89XOHqyyBTQ=,tag:qDmNvo48202MPQ/sd9ihgw==,type:str]
pgp: []
unencrypted_suffix: _unencrypted
version: 3.7.3
21 changes: 21 additions & 0 deletions config/clusters/nasa-veda/enc-prod.secret.values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
basehub:
jupyterhub:
hub:
config:
GitHubOAuthenticator:
client_id: ENC[AES256_GCM,data:H0O+Ha2N/Ax3K6luitZ37k6EQdk=,iv:CP5BGeDMmE6uznVB6utuW78+UeP8Ge5CnDmocZIkSXU=,tag:Q4QG+b8Nq36bbANHIAfpuA==,type:str]
client_secret: ENC[AES256_GCM,data:a0Xm3m7cHRCrBdV5q22ohFMYjdolj9dDnQhg9cG5E9W47Toqxwj2Gg==,iv:/1CHsZez3j26A8ODvlYgbjXtaRsKbGraxTzgoF2ZURs=,tag:cN59IZ+9J48QxSJ0IiqsAQ==,type:str]
sops:
kms: []
gcp_kms:
- resource_id: projects/two-eye-two-see/locations/global/keyRings/sops-keys/cryptoKeys/similar-hubs
created_at: "2023-01-26T14:56:28Z"
enc: CiUA4OM7eCKAfS822SeCDOvwyvK2He3x++XchB5U0Z2uRygrlE6PEkkA+0T9hQrtx2RjWzpuy8LDTafDdeFrNE1Ehco2aOXWi5EaMoT3PrOR9aaeynZo+a9RFNp3duwN37lnDIEBdrV0pxASZ+Ok1ct3
azure_kv: []
hc_vault: []
age: []
lastmodified: "2023-01-26T14:57:40Z"
mac: ENC[AES256_GCM,data:57DrEJDR9q4W9O7SJXo/R0XjriaoTmwTSkWtkdAKkXpxIjT37ynN6sSANlk+zi+q8WbdITbk5K6hsHgOnS3VJrKnjTi8mpo0FJ6vAxIRIIr80NSzA1vhWXKn5rvSFWPNL1eh0XJK8HLoqsKBtNLMTis9jgKmAg4lME8uHgh6yxQ=,iv:Gkr554YmGLgT9ScNyxNcHhPq62C3GYph7L+MILyKN2E=,tag:e5k1BNZRscw+IK4eRtfZrA==,type:str]
pgp: []
unencrypted_suffix: _unencrypted
version: 3.7.3
21 changes: 21 additions & 0 deletions config/clusters/nasa-veda/enc-staging.secret.values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
basehub:
jupyterhub:
hub:
config:
GitHubOAuthenticator:
client_id: ENC[AES256_GCM,data:1nlulJ+UtAzwlcL8KNUcUVMcIeM=,iv:rf3nUgkIz8q3nAd4n7XjWmuQeRdPRmpMeYx1SMEc1ss=,tag:PMHa6aACsbY51LKVKFeHEg==,type:str]
client_secret: ENC[AES256_GCM,data:z0lmTJmPoEIe1K4JrOcMYAu8GzvdT8vCxZehrQjDTispUI/lsWn5Kg==,iv:UyP1kGlc5qZaC+cxLkD8Q4g6qmNo37weh5AlxsaCZB0=,tag:i54y+7FNoFevU0Xp507ftA==,type:str]
sops:
kms: []
gcp_kms:
- resource_id: projects/two-eye-two-see/locations/global/keyRings/sops-keys/cryptoKeys/similar-hubs
created_at: "2023-01-26T14:43:46Z"
enc: CiUA4OM7ePslGWEcv3OaYu9G873apD8kt5RYUre++rAAg9zHLbkxEkkA+0T9hVsary0kh5dFB0qxlQ94qktVHBQoePzx+0n+Y7teWci0FpelZF92wmIt8qzSDnQVNsv+6/GGhV50+aS8yjS20UF8yqpO
azure_kv: []
hc_vault: []
age: []
lastmodified: "2023-01-26T14:47:00Z"
mac: ENC[AES256_GCM,data:szV8nh5uNIuBllgFfeHYewVmFeE+Rm1Bs7H4t/z/t5x5CH9hV17biYguVzo2/og4owI4jY4/BuI/WEks76306pQ+0epFYwj0MdGX0k1EpCAFp9sCeMvePexHYw3wceKu660l1fdm2YOheLr1vaSGv5DBq7Ad47OoFFQ9WyuialU=,iv:sq9J70I682tGIf87OZqir60Lt8ehOwHUX5I7Bic76pQ=,tag:f2j/wVLDhvvBe3+USATzdg==,type:str]
pgp: []
unencrypted_suffix: _unencrypted
version: 3.7.3
17 changes: 17 additions & 0 deletions config/clusters/nasa-veda/enc-support.secret.values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
prometheusIngressAuthSecret:
username: ENC[AES256_GCM,data:LLY0rZP1sw+xO/D9Kjddh5UyO2M/bwQAmkSm6NJmZX+Gew19diIC0Pygu2qHseQM7aAHks5GQhUXbwCubkXjSA==,iv:cWCxydhWlPY6n394pb6VXqTwqmA1xNYOYoZ96OrH8cw=,tag:uaBVVQmAoTn0L7LV2Zq39Q==,type:str]
password: ENC[AES256_GCM,data:vK/L2heF+tpPD4p6QAOK42POuYeFbR5d+wnYdv+MEqQ24W2Xjgel9fxm/UW+ZIvtjTpMfcRjdJAkQdcmjb1Vnw==,iv:d9KEfBlNdlFf/ekGtQnwObMtvVkVJhRvBfC6QMtBKHE=,tag:Q4zbJD5cUn5XfbG+xGny0g==,type:str]
sops:
kms: []
gcp_kms:
- resource_id: projects/two-eye-two-see/locations/global/keyRings/sops-keys/cryptoKeys/similar-hubs
created_at: "2023-01-26T13:09:45Z"
enc: CiUA4OM7eFcuROTCgKn6KrhXjRZf7uF9g0y4CJ9MJKrYJhMsD11hEkkA+0T9hXE3HKnvAKGLdOGR0DcUeyz9Zi4oH2KNfJnw5p8jslCtvnkKo0YuzzcInUoPTTxpfiCsYV/NOCgpt+O+AHSFBMfQhZd2
azure_kv: []
hc_vault: []
age: []
lastmodified: "2023-01-26T13:10:34Z"
mac: ENC[AES256_GCM,data:dvOc33v8HCWt5cCF4P/aixW7SUdzILPw112FmA8FWDRUppaYhTmJto3QqVCdJrBDMLQjzrBv0LEFHbi4mG4cDjlwTGfuezCd5YrKwX09DCextMYQwGE4AhFoW8I6o2vW1AQonCx1GZiJq0q3Wx+3wfWV9kezwrQT5EFTvsmlAtM=,iv:JnW7NmpEAgPHj3vSrzjaIvPTyOaJCys/1LTIkELDn7A=,tag:cBWvRHV0mSfGsBqqZ/bPew==,type:str]
pgp: []
unencrypted_suffix: _unencrypted
version: 3.7.3
9 changes: 9 additions & 0 deletions config/clusters/nasa-veda/prod.values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
basehub:
userServiceAccount:
annotations:
eks.amazonaws.com/role-arn: arn:aws:iam::444055461661:role/nasa-veda-prod
jupyterhub:
hub:
config:
GitHubOAuthenticator:
oauth_callback_url: https://nasa-veda.2i2c.cloud/hub/oauth_callback
9 changes: 9 additions & 0 deletions config/clusters/nasa-veda/staging.values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
basehub:
userServiceAccount:
annotations:
eks.amazonaws.com/role-arn: arn:aws:iam::444055461661:role/nasa-veda-staging
jupyterhub:
hub:
config:
GitHubOAuthenticator:
oauth_callback_url: https://staging.nasa-veda.2i2c.cloud/hub/oauth_callback
31 changes: 31 additions & 0 deletions config/clusters/nasa-veda/support.values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
prometheusIngressAuthSecret:
enabled: true

cluster-autoscaler:
enabled: true
autoDiscovery:
clusterName: nasa-veda
awsRegion: us-west-2

grafana:
grafana.ini:
server:
root_url: https://grafana.nasa-veda.2i2c.cloud
ingress:
hosts:
- grafana.nasa-veda.2i2c.cloud
tls:
- secretName: grafana-tls
hosts:
- grafana.nasa-veda.2i2c.cloud

prometheus:
server:
ingress:
enabled: true
hosts:
- prometheus.nasa-veda.2i2c.cloud
tls:
- secretName: prometheus-tls
hosts:
- prometheus.nasa-veda.2i2c.cloud
1 change: 1 addition & 0 deletions docs/howto/troubleshoot/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ issues that may arise.
```{toctree}
:maxdepth: 2
logs.md
ssh.md
```
53 changes: 53 additions & 0 deletions docs/howto/troubleshoot/ssh.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# SSH into nodes

Sometimes, you need to directly SSH into a kubernetes node to troubleshoot an
issue. This document describes how to do that on various cloud providers.

## GCP

1. Make sure you are [authenticated with gcloud](tools:gcloud:auth)

2. Set the `project` we are operating on, so `gcloud` knows where to look:

```
gcloud config set project <name-of-project>
```

```{note}
You can find the name of the project under `gcp.project` in the `cluster.yaml`
file for the cluster.
```

3. Find the name of the node you want to login to, usually via `kubectl get node`.
You can also find out the node a specific pod is on by `kubectl get node -o wide`.

4. SSH into the node with `gcloud compute ssh <node-name>`. This will set you up with
a user who has `sudo` permissions on the node, so you can poke around!


## AWS

1. Make sure you are logged in to the `aws` commandline tool, and authenticated as yourself
to have access to AWS organization under which this cluster lives. You can validate that
with `aws sts get-caller-identity` - the output should include your personal username,
*not* that of the hub deployer!

2. You also need the AWS [Session Manager](https://docs.aws.amazon.com/systems-manager/latest/userguide/session-manager-working-with-install-plugin.html)
installed.

3. Get the *instance id* of the node. Unlike with GCP, on AWS the instance id is not the same
as the node-name reported by `kubectl get node` or `kubectl get pod -o wide`. The instance
name is on the kubernetes node object as a label with name `alpha.eksctl.io/instance-id`.
You can get the entire object's definition with `kubectl get node <node-name> -o yaml`, and
pick out the `alpha.eksctl.io/instance-id` from there. This is of the form `i-<some-string>`.

4. You can now ssh with:

```bash
aws ssm start-session --target <instance-id>
```

5. This will put you on the node with the `sh` shell, which is missing a lot of the features we
expect from interactive shells today. You can get on bash with `bash -l`.

6. You will be a user with full `sudo` access, so you can troubleshoot to your heart's content.
Loading

0 comments on commit 98ca113

Please sign in to comment.