Skip to content

Commit

Permalink
CUMULUS-3759:Migrated ECS Autoscaling group from launch configuration…
Browse files Browse the repository at this point in the history
…s to launch templates (#3880)

* CUMULUS-3759:Migrated ECS Autoscaling group from launch configurations to launch templates

* update tf resources

* add ec2 container service policy

* update userdata format

* add cumulus-std deployment config

* update autoscaling_cf_template

* delete userdata file

* rename

* instance refresh lifecycle

* no need AmazonEC2ContainerServiceforEC2Role?

* migrate fakeprovider to launch templates

* refactor

* remove variable from cumulus-std
  • Loading branch information
jennyhliu authored Dec 19, 2024
1 parent fcd1c5b commit bae3a87
Show file tree
Hide file tree
Showing 9 changed files with 259 additions and 166 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

### Changed

- **CUMULUS-3759**
- Migrated `tf-modules/cumulus/ecs_cluster` ECS Autoscaling group from launch configurations to launch templates
- **CUMULUS-3955**
- Removed `VACUUM` statements from db migrations. In cases where the PG database is very large, these queries
can take a long time and exceed the Lambda timeout, causing failures on deployment.
Expand Down
5 changes: 5 additions & 0 deletions example/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ cumulus-sit:
apiUsername: jasmine
pdrNodeNameProviderBucket: cumulus-sit-pdr-node-name-provider

cumulus-std:
bucket: cumulus-sit-internal
apiUsername: jasmine
pdrNodeNameProviderBucket: cumulus-sit-pdr-node-name-provider

cumulus-es:
bucket: cumulus-sit-internal
apiUsername: jasmine
Expand Down
2 changes: 1 addition & 1 deletion example/cumulus-tf/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ variable "data_persistence_remote_state_config" {
}

variable "s3_replicator_config" {
type = object({ source_bucket = string, source_prefix = string, target_bucket = string, target_prefix = string, target_region = string })
type = object({ source_bucket = string, source_prefix = string, target_bucket = string, target_prefix = string, target_region = optional(string) })
default = null
description = "Configuration for the s3-replicator module. Items with prefix of source_prefix in the source_bucket will be replicated to the target_bucket with target_prefix."
}
Expand Down
65 changes: 65 additions & 0 deletions example/deployments/cumulus/cumulus-std.tfvars
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
prefix = "cumulus-std"

buckets = {
internal = {
name = "cumulus-sit-internal"
type = "internal"
},
private = {
name = "cumulus-sit-private"
type = "private"
},
protected = {
name = "cumulus-sit-protected"
type = "protected"
},
public = {
name = "cumulus-sit-public"
type = "public"
},
protected-2 = {
name = "cumulus-sit-protected-2"
type = "protected"
},
glacier = {
name = "cumulus-sit-orca-glacier"
type = "orca"
},
dashboard = {
name = "cumulus-sit-dashboard"
type = "dashboard"
}
}

key_name = "lp"

oauth_provider = "launchpad"

saml_entity_id = "https://dashboard.cumulus.sit.earthdata.nasa.gov"
saml_assertion_consumer_service = "https://api.cumulus.sit.earthdata.nasa.gov/saml/auth"
saml_idp_login = "https://auth.launchpad-sbx.nasa.gov/affwebservices/public/saml2sso"
saml_launchpad_metadata_url = "https://auth.launchpad-sbx.nasa.gov/unauth/metadata/launchpad-sbx.idp.xml"

deploy_cumulus_distribution = false

archive_api_url = "https://api.cumulus.sit.earthdata.nasa.gov/"
private_archive_api_gateway = true

# LOG CONFIGURATION (optional)
log_api_gateway_to_cloudwatch = true

tea_distribution_url = "https://data.cumulus.sit.earthdata.nasa.gov"

s3_replicator_config = {
source_bucket = "cumulus-std-access-logs"
source_prefix = "s3_access_logs"
target_bucket = "esdis-metrics-inbound-sit-cumulus-std-distribution"
target_prefix = "input/s3_access/cumulus-stdsit"
}

api_reserved_concurrency = 14

lambda_timeouts = {
queue_granules_task_timeout: 900,
discover_granules_task_timeout: 900
}
10 changes: 10 additions & 0 deletions example/deployments/data-persistence/cumulus-std.tfvars
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
prefix = "cumulus-std"

elasticsearch_config = {
domain_name = "es"
instance_count = 2
instance_type = "t2.small.elasticsearch"
version = "5.3"
volume_type = "gp2"
volume_size = 10
}
30 changes: 17 additions & 13 deletions example/fake-provider-cf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,19 +92,21 @@ Resources:
ToPort: 0
VpcId: !Ref VpcId

LaunchConfiguration:
Type: AWS::AutoScaling::LaunchConfiguration
LaunchTemplate:
Type: AWS::EC2::LaunchTemplate
Properties:
AssociatePublicIpAddress: false
IamInstanceProfile: !Ref InstanceProfile
ImageId: !Ref LatestAmiId
InstanceMonitoring: false
InstanceType: t3.small
SecurityGroups:
- !Ref SecurityGroup
UserData:
Fn::Base64:
Fn::Sub: |
LaunchTemplateName: "fake-provider-launch-template"
LaunchTemplateData:
IamInstanceProfile:
Arn: !GetAtt InstanceProfile.Arn
ImageId: !Ref LatestAmiId
Monitoring:
Enabled: false
InstanceType: t3.small
SecurityGroupIds:
- !Ref SecurityGroup
UserData:
Fn::Base64: !Sub |
#!/bin/bash -ex

TOKEN=$(curl -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600")
Expand Down Expand Up @@ -280,7 +282,9 @@ Resources:
MinInstancesInService: 0
DependsOn: S3ProviderBucket
Properties:
LaunchConfigurationName: !Ref LaunchConfiguration
LaunchTemplate:
LaunchTemplateId: !Ref LaunchTemplate
Version: !GetAtt LaunchTemplate.LatestVersionNumber
MinSize: "1"
DesiredCapacity: "1"
MaxSize: "1"
Expand Down
109 changes: 80 additions & 29 deletions tf-modules/cumulus/ecs_cluster.tf
Original file line number Diff line number Diff line change
Expand Up @@ -220,45 +220,96 @@ data "aws_efs_mount_target" "ecs_cluster_instance" {
}

locals {
ecs_instance_autoscaling_cf_template_config = {
ecs_instance_autoscaling_user_data_config = {
cluster_name = aws_ecs_cluster.default.name
container_stop_timeout = var.ecs_container_stop_timeout,
docker_hub_config = var.ecs_docker_hub_config,
docker_volume_size = var.ecs_cluster_instance_docker_volume_size,
docker_volume_create_size = var.ecs_cluster_instance_docker_volume_size - 1,
efs_dns_name = var.ecs_efs_config == null ? null : data.aws_efs_mount_target.ecs_cluster_instance[0].dns_name,
efs_mount_point = var.ecs_efs_config == null ? null : var.ecs_efs_config.mount_point,
image_id = var.ecs_cluster_instance_image_id,
include_docker_cleanup_cronjob = var.ecs_include_docker_cleanup_cronjob,
instance_profile = aws_iam_instance_profile.ecs_cluster_instance.arn,
instance_type = var.ecs_cluster_instance_type,
key_name = var.key_name,
min_size = var.ecs_cluster_min_size,
desired_capacity = var.ecs_cluster_desired_size,
max_size = var.ecs_cluster_max_size,
region = data.aws_region.current.name
security_group_ids = compact(concat(
[
aws_security_group.ecs_cluster_instance.id,
var.elasticsearch_security_group_id,
var.rds_security_group
],
var.ecs_custom_sg_ids
))
subnet_ids = var.ecs_cluster_instance_subnet_ids,
task_reaper_object = aws_s3_bucket_object.task_reaper
}

security_group_ids = compact(concat(
[
aws_security_group.ecs_cluster_instance.id,
var.elasticsearch_security_group_id,
var.rds_security_group
],
var.ecs_custom_sg_ids
))
}

resource "aws_launch_template" "ecs_cluster_instance" {
name_prefix = "${var.prefix}_ecs_cluster_template"
key_name = var.key_name
image_id = var.ecs_cluster_instance_image_id
instance_type = var.ecs_cluster_instance_type
vpc_security_group_ids = local.security_group_ids
block_device_mappings {
device_name = "/dev/xvdcz"
ebs {
delete_on_termination = true
encrypted = true
volume_size = var.ecs_cluster_instance_docker_volume_size
}
}

iam_instance_profile {
arn = aws_iam_instance_profile.ecs_cluster_instance.arn
}
monitoring {
enabled = true
}

user_data = base64encode(templatefile(
"${path.module}/ecs_cluster_instance_autoscaling_user_data.tmpl",
local.ecs_instance_autoscaling_user_data_config
))
}

resource "aws_cloudformation_stack" "ecs_instance_autoscaling_group" {
name = "${aws_ecs_cluster.default.name}-autoscaling-group"
template_body = templatefile("${path.module}/ecs_cluster_instance_autoscaling_cf_template.yml.tmpl", local.ecs_instance_autoscaling_cf_template_config)
tags = var.tags
resource "aws_autoscaling_group" "ecs_cluster_instance" {
name_prefix = aws_ecs_cluster.default.name
desired_capacity = var.ecs_cluster_desired_size
max_size = var.ecs_cluster_max_size
min_size = var.ecs_cluster_min_size
vpc_zone_identifier = var.ecs_cluster_instance_subnet_ids

instance_refresh {
strategy = "Rolling"
preferences {
min_healthy_percentage = 50
}
}
launch_template {
id = aws_launch_template.ecs_cluster_instance.id
version = aws_launch_template.ecs_cluster_instance.latest_version
}
lifecycle {
create_before_destroy = true
}

tag {
key = "Name"
value = aws_ecs_cluster.default.name
propagate_at_launch = true
}

dynamic "tag" {
for_each = var.tags
content {
key = tag.key
propagate_at_launch = true
value = tag.value
}
}
}

resource "aws_autoscaling_lifecycle_hook" "ecs_instance_termination_hook" {
name = "${aws_ecs_cluster.default.name}-ecs-termination-hook"
autoscaling_group_name = aws_cloudformation_stack.ecs_instance_autoscaling_group.outputs.AutoscalingGroupName
autoscaling_group_name = aws_autoscaling_group.ecs_cluster_instance.name
default_result = "CONTINUE"
heartbeat_timeout = 150
lifecycle_transition = "autoscaling:EC2_INSTANCE_TERMINATING"
Expand All @@ -267,8 +318,8 @@ resource "aws_autoscaling_lifecycle_hook" "ecs_instance_termination_hook" {
# Scale in config

resource "aws_autoscaling_policy" "ecs_instance_autoscaling_group_scale_in" {
name = "${aws_cloudformation_stack.ecs_instance_autoscaling_group.outputs.AutoscalingGroupName}-scale-in"
autoscaling_group_name = aws_cloudformation_stack.ecs_instance_autoscaling_group.outputs.AutoscalingGroupName
name = "${aws_autoscaling_group.ecs_cluster_instance.name}-scale-in"
autoscaling_group_name = aws_autoscaling_group.ecs_cluster_instance.name
adjustment_type = "PercentChangeInCapacity"
metric_aggregation_type = "Average"
policy_type = "StepScaling"
Expand All @@ -280,7 +331,7 @@ resource "aws_autoscaling_policy" "ecs_instance_autoscaling_group_scale_in" {
}

resource "aws_cloudwatch_metric_alarm" "ecs_instance_autoscaling_group_cpu_scale_in_alarm" {
alarm_name = "${aws_cloudformation_stack.ecs_instance_autoscaling_group.outputs.AutoscalingGroupName}-cpu-scale-in"
alarm_name = "${aws_autoscaling_group.ecs_cluster_instance.name}-cpu-scale-in"
comparison_operator = "LessThanThreshold"
alarm_actions = [aws_autoscaling_policy.ecs_instance_autoscaling_group_scale_in.arn]
datapoints_to_alarm = 1
Expand All @@ -298,8 +349,8 @@ resource "aws_cloudwatch_metric_alarm" "ecs_instance_autoscaling_group_cpu_scale
# Scale out config

resource "aws_autoscaling_policy" "ecs_instance_autoscaling_group_scale_out" {
name = "${aws_cloudformation_stack.ecs_instance_autoscaling_group.outputs.AutoscalingGroupName}-scale-out"
autoscaling_group_name = aws_cloudformation_stack.ecs_instance_autoscaling_group.outputs.AutoscalingGroupName
name = "${aws_autoscaling_group.ecs_cluster_instance.name}-scale-out"
autoscaling_group_name = aws_autoscaling_group.ecs_cluster_instance.name
adjustment_type = "PercentChangeInCapacity"
metric_aggregation_type = "Average"
policy_type = "StepScaling"
Expand All @@ -312,7 +363,7 @@ resource "aws_autoscaling_policy" "ecs_instance_autoscaling_group_scale_out" {
}

resource "aws_cloudwatch_metric_alarm" "ecs_instance_autoscaling_group_cpu_scale_out_alarm" {
alarm_name = "${aws_cloudformation_stack.ecs_instance_autoscaling_group.outputs.AutoscalingGroupName}-cpu-scale-out"
alarm_name = "${aws_autoscaling_group.ecs_cluster_instance.name}-cpu-scale-out"
comparison_operator = "GreaterThanThreshold"
alarm_actions = [aws_autoscaling_policy.ecs_instance_autoscaling_group_scale_out.arn]
datapoints_to_alarm = 1
Expand Down
Loading

0 comments on commit bae3a87

Please sign in to comment.