Skip to content

Commit

Permalink
Merge branch 'awslabs:main' into sd_inf
Browse files Browse the repository at this point in the history
  • Loading branch information
ratnopamc authored Jan 25, 2024
2 parents a9ac96a + ab266a5 commit 2b314e4
Show file tree
Hide file tree
Showing 19 changed files with 295 additions and 283 deletions.
5 changes: 3 additions & 2 deletions analytics/terraform/emr-eks-karpenter/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/
|------|--------|---------|
| <a name="module_amp_ingest_irsa"></a> [amp\_ingest\_irsa](#module\_amp\_ingest\_irsa) | aws-ia/eks-blueprints-addon/aws | ~> 1.0 |
| <a name="module_ebs_csi_driver_irsa"></a> [ebs\_csi\_driver\_irsa](#module\_ebs\_csi\_driver\_irsa) | terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks | ~> 5.20 |
| <a name="module_eks"></a> [eks](#module\_eks) | terraform-aws-modules/eks/aws | ~> 19.15 |
| <a name="module_eks"></a> [eks](#module\_eks) | terraform-aws-modules/eks/aws | ~> 19.18 |
| <a name="module_eks_blueprints_addons"></a> [eks\_blueprints\_addons](#module\_eks\_blueprints\_addons) | aws-ia/eks-blueprints-addons/aws | ~> 1.2 |
| <a name="module_eks_data_addons"></a> [eks\_data\_addons](#module\_eks\_data\_addons) | aws-ia/eks-data-addons/aws | ~> 1.0 |
| <a name="module_emr_containers"></a> [emr\_containers](#module\_emr\_containers) | terraform-aws-modules/emr/aws//modules/virtual-cluster | ~> 1.0 |
Expand Down Expand Up @@ -64,6 +64,7 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/
| [kubernetes_secret_v1.spark_team_a](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/secret_v1) | resource |
| [kubernetes_service_account_v1.spark_team_a](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/service_account_v1) | resource |
| [random_password.grafana](https://registry.terraform.io/providers/hashicorp/random/latest/docs/resources/password) | resource |
| [random_string.grafana](https://registry.terraform.io/providers/hashicorp/random/latest/docs/resources/string) | resource |
| [aws_availability_zones.available](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/availability_zones) | data source |
| [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source |
| [aws_ecr_authorization_token.token](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/ecr_authorization_token) | data source |
Expand All @@ -81,7 +82,7 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/

| Name | Description | Type | Default | Required |
|------|-------------|------|---------|:--------:|
| <a name="input_eks_cluster_version"></a> [eks\_cluster\_version](#input\_eks\_cluster\_version) | EKS Cluster version | `string` | `"1.27"` | no |
| <a name="input_eks_cluster_version"></a> [eks\_cluster\_version](#input\_eks\_cluster\_version) | EKS Cluster version | `string` | `"1.28"` | no |
| <a name="input_enable_amazon_prometheus"></a> [enable\_amazon\_prometheus](#input\_enable\_amazon\_prometheus) | Enable AWS Managed Prometheus service | `bool` | `true` | no |
| <a name="input_enable_emr_spark_operator"></a> [enable\_emr\_spark\_operator](#input\_enable\_emr\_spark\_operator) | Enable the Spark Operator to submit jobs with EMR Runtime | `bool` | `false` | no |
| <a name="input_enable_fsx_for_lustre"></a> [enable\_fsx\_for\_lustre](#input\_enable\_fsx\_for\_lustre) | Deploys fsx for lustre addon, storage class and static FSx for Lustre filesystem for EMR | `bool` | `false` | no |
Expand Down
15 changes: 12 additions & 3 deletions analytics/terraform/emr-eks-karpenter/addons.tf
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ module "ebs_csi_driver_irsa" {
#---------------------------------------------------------------
module "eks_blueprints_addons" {
source = "aws-ia/eks-blueprints-addons/aws"
version = "~> 1.2"
version = "~> 1.2" # change this to version = 1.2.2 for oldder version of Karpenter deployment

cluster_name = module.eks.cluster_name
cluster_endpoint = module.eks.cluster_endpoint
Expand Down Expand Up @@ -87,11 +87,14 @@ module "eks_blueprints_addons" {
enable_karpenter = true
karpenter_enable_spot_termination = true
karpenter_node = {
iam_role_use_name_prefix = false
iam_role_name = "${local.name}-karpenter-node"
iam_role_additional_policies = {
AmazonSSMManagedInstanceCore = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore"
}
}
karpenter = {
chart_version = "v0.33.1"
repository_username = data.aws_ecrpublic_authorization_token.token.user_name
repository_password = data.aws_ecrpublic_authorization_token.token.password
}
Expand All @@ -107,7 +110,7 @@ module "eks_blueprints_addons" {
#---------------------------------------
# Adding AWS Load Balancer Controller
#---------------------------------------
enable_aws_load_balancer_controller = true
enable_aws_load_balancer_controller = false

#---------------------------------------
# Enable FSx for Lustre CSI Driver
Expand Down Expand Up @@ -189,6 +192,7 @@ resource "kubectl_manifest" "spark_monitor" {

depends_on = [module.eks_blueprints_addons]
}

#---------------------------------------------------------------
# Data on EKS Kubernetes Addons
#---------------------------------------------------------------
Expand Down Expand Up @@ -278,9 +282,14 @@ resource "random_password" "grafana" {
override_special = "@_"
}

resource "random_string" "grafana" {
length = 4
lower = true
}

#tfsec:ignore:aws-ssm-secret-use-customer-key
resource "aws_secretsmanager_secret" "grafana" {
name = "${local.name}-grafana"
name = "${local.name}-grafana-${random_string.grafana.result}"
recovery_window_in_days = 0 # Set to zero for this example to force delete during Terraform destroy
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,10 @@ spec:
# spark.kubernetes.allocation.batch.size: "20" # default 5 but adjust according to your cluster size
# -----------------------------------------------------
volumes:
# This is using the temp storage on the node.
# if you are using NVMe SSD then karpenter will configure the RAID0 under /mnt/k8s-disks/0 and copies the shuffle data to this location
- name: spark-local-dir-1
hostPath:
path: /local1
emptyDir: {}
driver:
volumeMounts:
- name: spark-local-dir-1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,10 @@ spec:
restartPolicy:
type: Never
volumes:
# This is using the temp storage on the node.
# if you are using NVMe SSD then karpenter will configure the RAID0 under /mnt/k8s-disks/0 and copies the shuffle data to this location
- name: spark-local-dir-1
hostPath:
path: /local1
emptyDir: {}
driver:
volumeMounts:
- name: spark-local-dir-1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ metadata:
namespace: emr-data-team-a
spec:
volumes:
# This is using the temp storage on the node.
# if you are using NVMe SSD then karpenter will configure the RAID0 under /mnt/k8s-disks/0 and copies the shuffle data to this location
- name: spark-local-dir-1
hostPath:
path: /local1
type: Directory
emptyDir: {}

nodeSelector:
provisioner: spark-compute-optimized
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ mkdir -p "../input"
wget https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2022-01.parquet -O "../input/yellow_tripdata_2022-0.parquet"

# Making duplicate copies to increase the size of the data.
max=100
max=20
for (( i=1; i <= $max; ++i ))
do
cp -rf "../input/yellow_tripdata_2022-0.parquet" "../input/yellow_tripdata_2022-${i}.parquet"
cp -rf "../input/yellow_tripdata_2022-0.parquet" "../input/yellow_tripdata_2022-${i}.parquet"
done

aws s3 sync "../input" ${INPUT_DATA_S3_PATH} # Sync from local folder to S3 path
Expand All @@ -64,7 +64,7 @@ aws emr-containers start-job-run \
"entryPointArguments": ["'"$INPUT_DATA_S3_PATH"'",
"'"$OUTPUT_DATA_S3_PATH"'"
],
"sparkSubmitParameters": "--conf spark.executor.instances=10"
"sparkSubmitParameters": "--conf spark.executor.instances=2"
}
}' \
--configuration-overrides '{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ metadata:

spec:
volumes:
# This is using the temp storage on the node.
# if you are using NVMe SSD then karpenter will configure the RAID0 under /mnt/k8s-disks/0 and copies the shuffle data to this location
- name: spark-local-dir-1
hostPath:
path: /local1
type: Directory
emptyDir: {}

nodeSelector:
provisioner: spark-compute-optimized
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ metadata:
namespace: emr-data-team-a
spec:
volumes:
# This is using the temp storage on the node.
# if you are using NVMe SSD then karpenter will configure the RAID0 under /mnt/k8s-disks/0 and copies the shuffle data to this location
- name: spark-local-dir-1
hostPath:
path: /local1
type: Directory
emptyDir: {}

nodeSelector:
NodeGroupType: "SparkGravitonMemoryOptimized"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ metadata:
namespace: emr-data-team-a
spec:
volumes:
# This is using the temp storage on the node.
# if you are using NVMe SSD then karpenter will configure the RAID0 under /mnt/k8s-disks/0 and copies the shuffle data to this location
- name: spark-local-dir-1
hostPath:
path: /local1
type: Directory
emptyDir: {}

nodeSelector:
NodeGroupType: "SparkGravitonMemoryOptimized"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ metadata:
namespace: emr-data-team-a
spec:
volumes:
# This is using the temp storage on the node.
# if you are using NVMe SSD then karpenter will configure the RAID0 under /mnt/k8s-disks/0 and copies the shuffle data to this location
- name: spark-local-dir-1
hostPath:
path: /local1
type: Directory
emptyDir: {}

nodeSelector:
NodeGroupType: "SparkMemoryOptimized"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ metadata:

spec:
volumes:
# This is using the temp storage on the node.
# if you are using NVMe SSD then karpenter will configure the RAID0 under /mnt/k8s-disks/0 and copies the shuffle data to this location
- name: spark-local-dir-1
hostPath:
path: /local1
type: Directory
emptyDir: {}

nodeSelector:
NodeGroupType: "SparkMemoryOptimized"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@ metadata:
}]
spec:
volumes:
# This is using the temp storage on the node.
# if you are using NVMe SSD then karpenter will configure the RAID0 under /mnt/k8s-disks/0 and copies the shuffle data to this location
- name: spark-local-dir-1
hostPath:
path: /local1
type: Directory
emptyDir: {}

nodeSelector:
NodeGroupType: "SparkMemoryOptimized"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ metadata:

spec:
volumes:
# This is using the temp storage on the node.
# if you are using NVMe SSD then karpenter will configure the RAID0 under /mnt/k8s-disks/0 and copies the shuffle data to this location
- name: spark-local-dir-1
hostPath:
path: /local1
type: Directory
emptyDir: {}

nodeSelector:
NodeGroupType: "SparkMemoryOptimized"
Expand Down
Loading

0 comments on commit 2b314e4

Please sign in to comment.