nasa · jennyhliu · Dec 19, 2024 · Dec 10, 2024 · Dec 10, 2024 · Dec 10, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -35,6 +35,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Changed
 
+- **CUMULUS-3759**
+  - Migrated `tf-modules/cumulus/ecs_cluster` ECS Autoscaling group from launch configurations to launch templates
 - **CUMULUS-3955**
   - Removed `VACUUM` statements from db migrations. In cases where the PG database is very large, these queries
     can take a long time and exceed the Lambda timeout, causing failures on deployment.

diff --git a/example/config.yml b/example/config.yml
@@ -8,6 +8,11 @@ cumulus-sit:
   apiUsername: jasmine
   pdrNodeNameProviderBucket: cumulus-sit-pdr-node-name-provider
 
+cumulus-std:
+  bucket: cumulus-sit-internal
+  apiUsername: jasmine
+  pdrNodeNameProviderBucket: cumulus-sit-pdr-node-name-provider
+
 cumulus-es:
   bucket: cumulus-sit-internal
   apiUsername: jasmine

diff --git a/example/cumulus-tf/variables.tf b/example/cumulus-tf/variables.tf
@@ -107,7 +107,7 @@ variable "data_persistence_remote_state_config" {
 }
 
 variable "s3_replicator_config" {
-  type        = object({ source_bucket = string, source_prefix = string, target_bucket = string, target_prefix = string, target_region = string })
+  type        = object({ source_bucket = string, source_prefix = string, target_bucket = string, target_prefix = string, target_region = optional(string) })
   default     = null
   description = "Configuration for the s3-replicator module. Items with prefix of source_prefix in the source_bucket will be replicated to the target_bucket with target_prefix."
 }

diff --git a/example/deployments/cumulus/cumulus-std.tfvars b/example/deployments/cumulus/cumulus-std.tfvars
@@ -0,0 +1,65 @@
+prefix = "cumulus-std"
+
+buckets = {
+  internal = {
+    name = "cumulus-sit-internal"
+    type = "internal"
+  },
+  private = {
+    name = "cumulus-sit-private"
+    type = "private"
+  },
+  protected = {
+    name = "cumulus-sit-protected"
+    type = "protected"
+  },
+  public = {
+    name = "cumulus-sit-public"
+    type = "public"
+  },
+  protected-2 = {
+    name = "cumulus-sit-protected-2"
+    type = "protected"
+  },
+  glacier = {
+    name = "cumulus-sit-orca-glacier"
+    type = "orca"
+  },
+  dashboard = {
+    name = "cumulus-sit-dashboard"
+    type = "dashboard"
+  }
+}
+
+key_name      = "lp"
+
+oauth_provider   = "launchpad"
+
+saml_entity_id                  = "https://dashboard.cumulus.sit.earthdata.nasa.gov"
+saml_assertion_consumer_service = "https://api.cumulus.sit.earthdata.nasa.gov/saml/auth"
+saml_idp_login                  = "https://auth.launchpad-sbx.nasa.gov/affwebservices/public/saml2sso"
+saml_launchpad_metadata_url     = "https://auth.launchpad-sbx.nasa.gov/unauth/metadata/launchpad-sbx.idp.xml"
+
+deploy_cumulus_distribution = false
+
+archive_api_url = "https://api.cumulus.sit.earthdata.nasa.gov/"
+private_archive_api_gateway = true
+
+# LOG CONFIGURATION (optional)
+log_api_gateway_to_cloudwatch = true
+
+tea_distribution_url = "https://data.cumulus.sit.earthdata.nasa.gov"
+
+s3_replicator_config = {
+  source_bucket = "cumulus-std-access-logs"
+  source_prefix = "s3_access_logs"
+  target_bucket = "esdis-metrics-inbound-sit-cumulus-std-distribution"
+  target_prefix = "input/s3_access/cumulus-stdsit"
+}
+
+api_reserved_concurrency = 14
+
+lambda_timeouts = {
+  queue_granules_task_timeout: 900,
+  discover_granules_task_timeout: 900
+}
diff --git a/example/deployments/data-persistence/cumulus-std.tfvars b/example/deployments/data-persistence/cumulus-std.tfvars
@@ -0,0 +1,10 @@
+prefix = "cumulus-std"
+
+elasticsearch_config = {
+  domain_name    = "es"
+  instance_count = 2
+  instance_type  = "t2.small.elasticsearch"
+  version        = "5.3"
+  volume_type    = "gp2"
+  volume_size    = 10
+}
diff --git a/example/fake-provider-cf.yml b/example/fake-provider-cf.yml
@@ -92,19 +92,21 @@ Resources:
           ToPort: 0
       VpcId: !Ref VpcId
 
-  LaunchConfiguration:
-    Type: AWS::AutoScaling::LaunchConfiguration
+  LaunchTemplate:
+    Type: AWS::EC2::LaunchTemplate
     Properties:
-      AssociatePublicIpAddress: false
-      IamInstanceProfile: !Ref InstanceProfile
-      ImageId: !Ref LatestAmiId
-      InstanceMonitoring: false
-      InstanceType: t3.small
-      SecurityGroups:
-        - !Ref SecurityGroup
-      UserData:
-        Fn::Base64:
-          Fn::Sub: |
+      LaunchTemplateName: "fake-provider-launch-template"
+      LaunchTemplateData:
+        IamInstanceProfile:
+          Arn: !GetAtt InstanceProfile.Arn
+        ImageId: !Ref LatestAmiId
+        Monitoring:
+          Enabled: false
+        InstanceType: t3.small
+        SecurityGroupIds:
+          - !Ref SecurityGroup
+        UserData:
+          Fn::Base64: !Sub |
             #!/bin/bash -ex
 
             TOKEN=$(curl -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600")
@@ -280,7 +282,9 @@ Resources:
         MinInstancesInService: 0
     DependsOn: S3ProviderBucket
     Properties:
-      LaunchConfigurationName: !Ref LaunchConfiguration
+      LaunchTemplate:
+        LaunchTemplateId: !Ref LaunchTemplate
+        Version: !GetAtt LaunchTemplate.LatestVersionNumber
       MinSize: "1"
       DesiredCapacity: "1"
       MaxSize: "1"

diff --git a/tf-modules/cumulus/ecs_cluster.tf b/tf-modules/cumulus/ecs_cluster.tf
@@ -220,45 +220,96 @@ data "aws_efs_mount_target" "ecs_cluster_instance" {
 }
 
 locals {
-  ecs_instance_autoscaling_cf_template_config = {
+  ecs_instance_autoscaling_user_data_config = {
     cluster_name              = aws_ecs_cluster.default.name
     container_stop_timeout    = var.ecs_container_stop_timeout,
     docker_hub_config         = var.ecs_docker_hub_config,
-    docker_volume_size        = var.ecs_cluster_instance_docker_volume_size,
     docker_volume_create_size = var.ecs_cluster_instance_docker_volume_size - 1,
     efs_dns_name              = var.ecs_efs_config == null ? null : data.aws_efs_mount_target.ecs_cluster_instance[0].dns_name,
     efs_mount_point           = var.ecs_efs_config == null ? null : var.ecs_efs_config.mount_point,
-    image_id                  = var.ecs_cluster_instance_image_id,
     include_docker_cleanup_cronjob = var.ecs_include_docker_cleanup_cronjob,
-    instance_profile          = aws_iam_instance_profile.ecs_cluster_instance.arn,
-    instance_type             = var.ecs_cluster_instance_type,
-    key_name                  = var.key_name,
-    min_size                  = var.ecs_cluster_min_size,
-    desired_capacity          = var.ecs_cluster_desired_size,
-    max_size                  = var.ecs_cluster_max_size,
     region                    = data.aws_region.current.name
-    security_group_ids        = compact(concat(
-      [
-        aws_security_group.ecs_cluster_instance.id,
-        var.elasticsearch_security_group_id,
-        var.rds_security_group
-      ],
-      var.ecs_custom_sg_ids
-    ))
-    subnet_ids                = var.ecs_cluster_instance_subnet_ids,
     task_reaper_object        = aws_s3_bucket_object.task_reaper
   }
+
+  security_group_ids = compact(concat(
+    [
+      aws_security_group.ecs_cluster_instance.id,
+      var.elasticsearch_security_group_id,
+      var.rds_security_group
+    ],
+    var.ecs_custom_sg_ids
+  ))
+}
+
+resource "aws_launch_template" "ecs_cluster_instance" {
+  name_prefix   = "${var.prefix}_ecs_cluster_template"
+  key_name               = var.key_name
+  image_id      = var.ecs_cluster_instance_image_id
+  instance_type = var.ecs_cluster_instance_type
+  vpc_security_group_ids = local.security_group_ids
+  block_device_mappings {
+    device_name = "/dev/xvdcz"
+    ebs {
+      delete_on_termination = true
+      encrypted             = true
+      volume_size           = var.ecs_cluster_instance_docker_volume_size
+    }
+  }
+
+  iam_instance_profile {
+    arn = aws_iam_instance_profile.ecs_cluster_instance.arn
+  }
+  monitoring {
+    enabled = true
+  }
+
+  user_data = base64encode(templatefile(
+    "${path.module}/ecs_cluster_instance_autoscaling_user_data.tmpl",
+    local.ecs_instance_autoscaling_user_data_config
+  ))
 }
 
-resource "aws_cloudformation_stack" "ecs_instance_autoscaling_group" {
-  name          = "${aws_ecs_cluster.default.name}-autoscaling-group"
-  template_body = templatefile("${path.module}/ecs_cluster_instance_autoscaling_cf_template.yml.tmpl", local.ecs_instance_autoscaling_cf_template_config)
-  tags          = var.tags
+resource "aws_autoscaling_group" "ecs_cluster_instance" {
+  name_prefix         = aws_ecs_cluster.default.name
+  desired_capacity    = var.ecs_cluster_desired_size
+  max_size            = var.ecs_cluster_max_size
+  min_size            = var.ecs_cluster_min_size
+  vpc_zone_identifier = var.ecs_cluster_instance_subnet_ids
+
+  instance_refresh {
+    strategy = "Rolling"
+    preferences {
+      min_healthy_percentage = 50
+    }
+  }
+  launch_template {
+    id      = aws_launch_template.ecs_cluster_instance.id
+    version = aws_launch_template.ecs_cluster_instance.latest_version
+  }
+  lifecycle {
+    create_before_destroy = true
+  }
+
+  tag {
+    key                 = "Name"
+    value               = aws_ecs_cluster.default.name
+    propagate_at_launch = true
+  }
+
+  dynamic "tag" {
+    for_each = var.tags
+    content {
+      key                 = tag.key
+      propagate_at_launch = true
+      value               = tag.value
+    }
+  }
 }
 
 resource "aws_autoscaling_lifecycle_hook" "ecs_instance_termination_hook" {
   name                   = "${aws_ecs_cluster.default.name}-ecs-termination-hook"
-  autoscaling_group_name = aws_cloudformation_stack.ecs_instance_autoscaling_group.outputs.AutoscalingGroupName
+  autoscaling_group_name = aws_autoscaling_group.ecs_cluster_instance.name
   default_result         = "CONTINUE"
   heartbeat_timeout      = 150
   lifecycle_transition   = "autoscaling:EC2_INSTANCE_TERMINATING"
@@ -267,8 +318,8 @@ resource "aws_autoscaling_lifecycle_hook" "ecs_instance_termination_hook" {
 # Scale in config
 
 resource "aws_autoscaling_policy" "ecs_instance_autoscaling_group_scale_in" {
-  name                    = "${aws_cloudformation_stack.ecs_instance_autoscaling_group.outputs.AutoscalingGroupName}-scale-in"
-  autoscaling_group_name  = aws_cloudformation_stack.ecs_instance_autoscaling_group.outputs.AutoscalingGroupName
+  name                    = "${aws_autoscaling_group.ecs_cluster_instance.name}-scale-in"
+  autoscaling_group_name  = aws_autoscaling_group.ecs_cluster_instance.name
   adjustment_type         = "PercentChangeInCapacity"
   metric_aggregation_type = "Average"
   policy_type             = "StepScaling"
@@ -280,7 +331,7 @@ resource "aws_autoscaling_policy" "ecs_instance_autoscaling_group_scale_in" {
 }
 
 resource "aws_cloudwatch_metric_alarm" "ecs_instance_autoscaling_group_cpu_scale_in_alarm" {
-  alarm_name          = "${aws_cloudformation_stack.ecs_instance_autoscaling_group.outputs.AutoscalingGroupName}-cpu-scale-in"
+  alarm_name          = "${aws_autoscaling_group.ecs_cluster_instance.name}-cpu-scale-in"
   comparison_operator = "LessThanThreshold"
   alarm_actions       = [aws_autoscaling_policy.ecs_instance_autoscaling_group_scale_in.arn]
   datapoints_to_alarm = 1
@@ -298,8 +349,8 @@ resource "aws_cloudwatch_metric_alarm" "ecs_instance_autoscaling_group_cpu_scale
 # Scale out config
 
 resource "aws_autoscaling_policy" "ecs_instance_autoscaling_group_scale_out" {
-  name                    = "${aws_cloudformation_stack.ecs_instance_autoscaling_group.outputs.AutoscalingGroupName}-scale-out"
-  autoscaling_group_name  = aws_cloudformation_stack.ecs_instance_autoscaling_group.outputs.AutoscalingGroupName
+  name                    = "${aws_autoscaling_group.ecs_cluster_instance.name}-scale-out"
+  autoscaling_group_name  = aws_autoscaling_group.ecs_cluster_instance.name
   adjustment_type         = "PercentChangeInCapacity"
   metric_aggregation_type = "Average"
   policy_type             = "StepScaling"
@@ -312,7 +363,7 @@ resource "aws_autoscaling_policy" "ecs_instance_autoscaling_group_scale_out" {
 }
 
 resource "aws_cloudwatch_metric_alarm" "ecs_instance_autoscaling_group_cpu_scale_out_alarm" {
-  alarm_name          = "${aws_cloudformation_stack.ecs_instance_autoscaling_group.outputs.AutoscalingGroupName}-cpu-scale-out"
+  alarm_name          = "${aws_autoscaling_group.ecs_cluster_instance.name}-cpu-scale-out"
   comparison_operator = "GreaterThanThreshold"
   alarm_actions       = [aws_autoscaling_policy.ecs_instance_autoscaling_group_scale_out.arn]
   datapoints_to_alarm = 1