Skip to content

Commit

Permalink
Merge branch 'awslabs:main' into spark-op-examples
Browse files Browse the repository at this point in the history
  • Loading branch information
alanty authored Dec 14, 2023
2 parents 4ce70b4 + 9244219 commit 51e7912
Show file tree
Hide file tree
Showing 112 changed files with 13,232 additions and 6,870 deletions.
6 changes: 6 additions & 0 deletions .codespellrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[codespell]
skip = .git,*.pdf,*.svg,go.sum,package-lock.json,*.css,.codespellrc,*.sql,website/package-lock.json
check-hidden = true
# some embedded images and known typoed outputs
ignore-regex = ^\s*"image/\S+": ".*|.*loopback adddress.*
# ignore-words-list =
22 changes: 22 additions & 0 deletions .github/workflows/codespell.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
---
name: Codespell

on:
push:
branches: [main]
pull_request:
branches: [main]

permissions:
contents: read

jobs:
codespell:
name: Check for spelling errors
runs-on: ubuntu-latest

steps:
- name: Checkout
uses: actions/checkout@v3
- name: Codespell
uses: codespell-project/actions-codespell@v2
8 changes: 6 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
rev: v4.5.0
hooks:
- id: trailing-whitespace
args: ['--markdown-linebreak-ext=md']
Expand All @@ -10,7 +10,7 @@ repos:
- id: detect-aws-credentials
args: ['--allow-missing-credentials']
- repo: https://github.com/antonbabenko/pre-commit-terraform
rev: v1.83.2
rev: v1.83.5
hooks:
- id: terraform_fmt
- id: terraform_docs
Expand All @@ -33,3 +33,7 @@ repos:
- '--args=--only=terraform_workspace_remote'
- id: terraform_validate
exclude: docs
- repo: https://github.com/codespell-project/codespell
rev: v2.2.6
hooks:
- id: codespell
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
![Data on EKS](website/static/img/doeks-logo-green.png)
# [Data on Amazon EKS (DoEKS)](https://awslabs.github.io/data-on-eks/)
(pronounce Do.eks)


[![plan-examples](https://github.com/awslabs/data-on-eks/actions/workflows/plan-examples.yml/badge.svg?branch=main)](https://github.com/awslabs/data-on-eks/actions/workflows/plan-examples.yml)

Expand Down
2 changes: 1 addition & 1 deletion ai-ml/emr-spark-rapids/addons.tf
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ module "eks_data_addons" {
#---------------------------------------------------------------
# Kubecost Add-on
#---------------------------------------------------------------
# Note: Kubecost add-on depdends on Kube Prometheus Stack add-on for storing the metrics
# Note: Kubecost add-on depends on Kube Prometheus Stack add-on for storing the metrics
enable_kubecost = var.enable_kubecost
kubecost_helm_config = {
values = [templatefile("${path.module}/helm-values/kubecost-values.yaml", {})]
Expand Down
2 changes: 1 addition & 1 deletion ai-ml/emr-spark-rapids/eks.tf
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ module "eks" {
instance_types = ["m5.xlarge"] # 4 vCPU and 16GB

ebs_optimized = true
# This bloc device is used only for root volume. Adjust volume according to your size.
# This block device is used only for root volume. Adjust volume according to your size.
# NOTE: Dont use this volume for Spark workloads
block_device_mappings = {
xvda = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ filter:
Kube_CA_File /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
Kube_Token_File /var/run/secrets/kubernetes.io/serviceaccount/token
# CATION: Donot use `cloudwatch` plugin. This Golang Plugin is not recommnded by AWS anymore instead use C plugin(`cloudWatchLogs`) for better performance.
# CATION: Do not use `cloudwatch` plugin. This Golang Plugin is not recommended by AWS anymore instead use C plugin(`cloudWatchLogs`) for better performance.
# cloudWatch:
# enabled: false

Expand Down
2 changes: 1 addition & 1 deletion ai-ml/jark-stack/terraform/eks.tf
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ module "eks" {

eks_managed_node_groups = {
# It's recommended to have a Managed Node group for hosting critical add-ons
# It's recommeded to use Karpenter to place your workloads instead of using Managed Node groups
# It's recommended to use Karpenter to place your workloads instead of using Managed Node groups
# You can leverage nodeSelector and Taints/tolerations to distribute workloads across Managed Node group or Karpenter nodes.
core_node_group = {
name = "core-node-group"
Expand Down
8 changes: 8 additions & 0 deletions ai-ml/jupyterhub/addons.tf
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ module "eks_blueprints_addons" {
values = [
<<-EOT
clusterName: ${module.eks.cluster_name}
karpenterRole: ${split("/", module.eks_blueprints_addons.karpenter.node_iam_role_arn)[1]}
EOT
]
}
Expand All @@ -174,6 +175,7 @@ module "eks_blueprints_addons" {
<<-EOT
name: gpu-ts
clusterName: ${module.eks.cluster_name}
karpenterRole: ${split("/", module.eks_blueprints_addons.karpenter.node_iam_role_arn)[1]}
instanceSizes: ["xlarge", "2xlarge", "4xlarge", "8xlarge", "16xlarge", "24xlarge"]
instanceFamilies: ["g5"]
taints:
Expand All @@ -194,6 +196,7 @@ module "eks_blueprints_addons" {
<<-EOT
name: gpu
clusterName: ${module.eks.cluster_name}
karpenterRole: ${split("/", module.eks_blueprints_addons.karpenter.node_iam_role_arn)[1]}
instanceSizes: ["24xlarge"]
instanceFamilies: ["p4d"]
taints:
Expand All @@ -214,6 +217,7 @@ module "eks_blueprints_addons" {
<<-EOT
name: inferentia
clusterName: ${module.eks.cluster_name}
karpenterRole: ${split("/", module.eks_blueprints_addons.karpenter.node_iam_role_arn)[1]}
instanceSizes: ["8xlarge", "24xlarge"]
instanceFamilies: ["inf2"]
taints:
Expand All @@ -237,6 +241,7 @@ module "eks_blueprints_addons" {
<<-EOT
name: trainium
clusterName: ${module.eks.cluster_name}
karpenterRole: ${split("/", module.eks_blueprints_addons.karpenter.node_iam_role_arn)[1]}
instanceSizes: ["32xlarge"]
instanceFamilies: ["trn1"]
taints:
Expand Down Expand Up @@ -292,7 +297,10 @@ module "eks_data_addons" {
userdata_url = try("https://${local.cognito_custom_domain}.auth.${local.region}.amazoncognito.com/oauth2/userInfo", "")
client_id = try(aws_cognito_user_pool_client.user_pool_client[0].id, "")
client_secret = try(aws_cognito_user_pool_client.user_pool_client[0].client_secret, "")
user_pool_id = try(aws_cognito_user_pool.pool[0].id, "")
identity_pool_id = try(aws_cognito_identity_pool.identity_pool[0].id, "")
jupyter_single_user_sa_name = kubernetes_service_account_v1.jupyterhub_single_user_sa.metadata[0].name
region = var.region
})]
}

Expand Down
1 change: 1 addition & 0 deletions ai-ml/jupyterhub/cleanup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ targets=(
"module.eks_data_addons"
"module.eks_blueprints_addons"
"module.eks"
"module.vpc"
)

#-------------------------------------------
Expand Down
224 changes: 224 additions & 0 deletions ai-ml/jupyterhub/cognito.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
#---------------------------------------------------------------
# Lambda function for pre token generation
#----------------------------------------------------------------

data "aws_iam_policy_document" "assume_role" {
statement {
effect = "Allow"
principals {
type = "Service"
identifiers = ["lambda.amazonaws.com", "cognito-idp.amazonaws.com"]
}
actions = ["sts:AssumeRole"]
}
}

data "aws_iam_policy" "lambda_execution_policy" {
arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole"
}

resource "aws_iam_role" "iam_for_lambda" {
count = var.jupyter_hub_auth_mechanism == "cognito" ? 1 : 0
name = "iam_for_lambda"
assume_role_policy = data.aws_iam_policy_document.assume_role.json
}

resource "aws_iam_role_policy_attachment" "lambda_policy_attachment" {
count = var.jupyter_hub_auth_mechanism == "cognito" ? 1 : 0
role = aws_iam_role.iam_for_lambda[0].name
policy_arn = data.aws_iam_policy.lambda_execution_policy.arn
}

data "archive_file" "lambda" {
type = "zip"
output_path = "/tmp/lambda.zip"
source {
filename = "index.mjs"
content = <<-EOF
export const handler = async (event) => {
event.response = {
claimsOverrideDetails: {
claimsToAddOrOverride: {
department: "engineering",
},
},
};
return event;
};
EOF
}
}

resource "aws_lambda_function" "pretoken_trigger" {
count = var.jupyter_hub_auth_mechanism == "cognito" ? 1 : 0
function_name = "pretoken-trigger-function"
filename = data.archive_file.lambda.output_path
source_code_hash = data.archive_file.lambda.output_base64sha256

runtime = "nodejs18.x"
handler = "index.handler"

role = aws_iam_role.iam_for_lambda[0].arn
}

#---------------------------------------------------------------
# Cognito pool, domain and client creation.
# This can be used
# Auth integration later.
#----------------------------------------------------------------
resource "aws_cognito_user_pool" "pool" {
count = var.jupyter_hub_auth_mechanism == "cognito" ? 1 : 0
name = "jupyterhub-userpool"

username_attributes = ["email"]
auto_verified_attributes = ["email"]

password_policy {
minimum_length = 6
}

lambda_config {
pre_token_generation = aws_lambda_function.pretoken_trigger[0].arn
}
}

resource "aws_cognito_user_pool_domain" "domain" {
count = var.jupyter_hub_auth_mechanism == "cognito" ? 1 : 0
domain = local.cognito_custom_domain
user_pool_id = aws_cognito_user_pool.pool[0].id
}

resource "aws_cognito_user_pool_client" "user_pool_client" {
count = var.jupyter_hub_auth_mechanism == "cognito" ? 1 : 0
name = "jupyter-client"
access_token_validity = 1
token_validity_units {
access_token = "days"
}
callback_urls = ["https://${var.jupyterhub_domain}/hub/oauth_callback"]
user_pool_id = aws_cognito_user_pool.pool[0].id
allowed_oauth_flows_user_pool_client = true
allowed_oauth_flows = ["code"]
allowed_oauth_scopes = ["openid", "email"]
generate_secret = true
supported_identity_providers = [
"COGNITO"
]

depends_on = [aws_cognito_user_pool_domain.domain]
}

#---------------------------------------------------------------
# Cognito identity pool creation.
#----------------------------------------------------------------
resource "aws_cognito_identity_pool" "identity_pool" {
count = var.jupyter_hub_auth_mechanism == "cognito" ? 1 : 0
identity_pool_name = "jupyterhub-identity-pool"
allow_unauthenticated_identities = false
cognito_identity_providers {
client_id = aws_cognito_user_pool_client.user_pool_client[0].id
provider_name = aws_cognito_user_pool.pool[0].endpoint
server_side_token_check = true
}

depends_on = [aws_cognito_user_pool_client.user_pool_client]
}

resource "aws_s3_bucket" "jupyterhub_bucket" {
count = var.jupyter_hub_auth_mechanism == "cognito" ? 1 : 0
bucket_prefix = "jupyterhub-test-bucket-"
}

resource "aws_s3_object" "engineering_object" {
count = var.jupyter_hub_auth_mechanism == "cognito" ? 1 : 0
bucket = aws_s3_bucket.jupyterhub_bucket[0].id
key = "engineering/"
}

resource "aws_s3_object" "legal_object" {
count = var.jupyter_hub_auth_mechanism == "cognito" ? 1 : 0
bucket = aws_s3_bucket.jupyterhub_bucket[0].id
key = "legal/"
}

#---------------------------------------------------------------
# IAM role for a team member from the engineering department
# In theory there would be other departments such as "legal"
#----------------------------------------------------------------
resource "aws_iam_role" "cognito_authenticated_engineering_role" {
count = var.jupyter_hub_auth_mechanism == "cognito" ? 1 : 0

name = "EngineeringTeamRole"

assume_role_policy = jsonencode({
Version = "2012-10-17",
Statement = [
{
Action = ["sts:AssumeRoleWithWebIdentity", "sts:TagSession"],
Effect = "Allow",
Principal = {
Federated = "cognito-identity.amazonaws.com"
},
Condition = {
StringEquals = {
"cognito-identity.amazonaws.com:aud" = aws_cognito_identity_pool.identity_pool[0].id
},
"ForAnyValue:StringLike" : {
"cognito-identity.amazonaws.com:amr" : "authenticated"
}
}
}
]
})
}

resource "aws_iam_role_policy" "s3_cognito_engineering_policy" {
count = var.jupyter_hub_auth_mechanism == "cognito" ? 1 : 0
name = "s3_cognito_engineering_policy"
role = aws_iam_role.cognito_authenticated_engineering_role[0].id

policy = <<-EOF
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": ["s3:List*"],
"Resource": "*",
"Condition": {
"StringEquals": {
"s3:prefix": "$${aws:PrincipalTag/department}"
}
}
}
]
}
EOF
}

resource "aws_cognito_identity_pool_provider_principal_tag" "example" {
count = var.jupyter_hub_auth_mechanism == "cognito" ? 1 : 0
identity_pool_id = aws_cognito_identity_pool.identity_pool[0].id
identity_provider_name = aws_cognito_user_pool.pool[0].endpoint
use_defaults = false
principal_tags = {
department = "department"
}
}

resource "aws_iam_policy_attachment" "s3_readonly_policy_attachment" {
count = var.jupyter_hub_auth_mechanism == "cognito" ? 1 : 0
name = "S3ReadOnlyAccessAttachment"
policy_arn = "arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess"
roles = [aws_iam_role.cognito_authenticated_engineering_role[0].name]
}

resource "aws_cognito_identity_pool_roles_attachment" "identity_pool_roles" {
count = var.jupyter_hub_auth_mechanism == "cognito" ? 1 : 0
identity_pool_id = aws_cognito_identity_pool.identity_pool[0].id
roles = {
authenticated = aws_iam_role.cognito_authenticated_engineering_role[0].arn
}
}
2 changes: 1 addition & 1 deletion ai-ml/jupyterhub/helm/aws-for-fluentbit/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ filter:
Kube_CA_File /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
Kube_Token_File /var/run/secrets/kubernetes.io/serviceaccount/token
# CATION: Donot use `cloudwatch` plugin. This Golang Plugin is not recommnded by AWS anymore instead use C plugin(`cloudWatchLogs`) for better performance.
# CATION: Do not use `cloudwatch` plugin. This Golang Plugin is not recommended by AWS anymore instead use C plugin(`cloudWatchLogs`) for better performance.
# cloudWatch:
# enabled: false

Expand Down
Loading

0 comments on commit 51e7912

Please sign in to comment.