Skip to content
This repository has been archived by the owner on Sep 12, 2023. It is now read-only.

Commit

Permalink
v1 (#2)
Browse files Browse the repository at this point in the history
feat: add new variable to control static monitoring vs anomaly detection
feat: add variables to control period and evaluation periods
breaking: var.xxx_threshold = -1 no longer works, set var.xxx = "off" instead
breaking: var.xxx_threshold = -2 no longer works, set var.xxx = "anomaly_detection" instead
  • Loading branch information
michaelwittig committed Mar 6, 2023
1 parent 0eb4935 commit a398e32
Show file tree
Hide file tree
Showing 4 changed files with 134 additions and 54 deletions.
14 changes: 7 additions & 7 deletions example/anomaly_detection/main.tf
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
source = "hashicorp/aws"
version = "4.56.0"
}
}
Expand All @@ -16,7 +16,7 @@ data "aws_subnet_ids" "default" {
}

resource "aws_security_group" "alb" {
vpc_id = data.aws_vpc.default.id
vpc_id = data.aws_vpc.default.id

ingress {
from_port = 80
Expand Down Expand Up @@ -52,7 +52,7 @@ resource "aws_lb_listener" "default" {
load_balancer_arn = aws_lb.default.arn
port = "80"
protocol = "HTTP"

default_action {
type = "forward"
target_group_arn = aws_lb_target_group.default.arn
Expand All @@ -62,9 +62,9 @@ resource "aws_lb_listener" "default" {
module "test" {
source = "../../"

endpoint_id = var.endpoint_id
endpoint_id = var.endpoint_id
loadbalancer_fullname = aws_lb.default.arn_suffix
targetgroup_fullname = aws_lb_target_group.default.arn_suffix
alb_5xx_count_threshold = -2
target_5xx_count_threshold = -2
targetgroup_fullname = aws_lb_target_group.default.arn_suffix
alb_5xx_count = "anomaly_detection"
target_5xx_count = "anomaly_detection"
}
10 changes: 5 additions & 5 deletions example/default/main.tf
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
source = "hashicorp/aws"
version = "4.56.0"
}
}
Expand All @@ -16,7 +16,7 @@ data "aws_subnet_ids" "default" {
}

resource "aws_security_group" "alb" {
vpc_id = data.aws_vpc.default.id
vpc_id = data.aws_vpc.default.id

ingress {
from_port = 80
Expand Down Expand Up @@ -52,7 +52,7 @@ resource "aws_lb_listener" "default" {
load_balancer_arn = aws_lb.default.arn
port = "80"
protocol = "HTTP"

default_action {
type = "forward"
target_group_arn = aws_lb_target_group.default.arn
Expand All @@ -62,7 +62,7 @@ resource "aws_lb_listener" "default" {
module "test" {
source = "../../"

endpoint_id = var.endpoint_id
endpoint_id = var.endpoint_id
loadbalancer_fullname = aws_lb.default.arn_suffix
targetgroup_fullname = aws_lb_target_group.default.arn_suffix
targetgroup_fullname = aws_lb_target_group.default.arn_suffix
}
48 changes: 24 additions & 24 deletions main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ resource "aws_cloudwatch_event_target" "monitoring_jump_start_connection" {
{
"Type": "monitoring-jump-start-tf-connection",
"Module": "alb",
"Version": "0.3.0",
"Version": "1.0.0",
"Partition": "${data.aws_partition.current.partition}",
"AccountId": "${data.aws_caller_identity.current.account_id}",
"Region": "${data.aws_region.current.name}"
Expand All @@ -134,15 +134,15 @@ resource "random_id" "id8" {

resource "aws_cloudwatch_metric_alarm" "alb_5xx_count_too_high" {
depends_on = [aws_sns_topic_subscription.marbot]
count = (var.alb_5xx_count_threshold >= 0 && var.enabled) ? 1 : 0
count = (var.alb_5xx_count == "static" && var.enabled) ? 1 : 0

alarm_name = "marbot-alb-5xx-count-too-high-${random_id.id8.hex}"
alarm_description = "Number of 5XX responses from ALB over the last minute too high. (created by marbot)"
alarm_description = "Number of 5XX responses from ALB too high. (created by marbot)"
namespace = "AWS/ApplicationELB"
metric_name = "HTTPCode_ELB_5XX_Count"
statistic = "Sum"
period = 60
evaluation_periods = 1
period = var.alb_5xx_count_period
evaluation_periods = var.alb_5xx_count_evaluation_periods
comparison_operator = "GreaterThanThreshold"
threshold = var.alb_5xx_count_threshold
alarm_actions = [local.topic_arn]
Expand All @@ -156,11 +156,11 @@ resource "aws_cloudwatch_metric_alarm" "alb_5xx_count_too_high" {

resource "aws_cloudwatch_metric_alarm" "alb_5xx_count_too_high_anomaly_detection" {
depends_on = [aws_sns_topic_subscription.marbot]
count = (var.alb_5xx_count_threshold < -1.5 && var.enabled) ? 1 : 0
count = (var.alb_5xx_count == "anomaly_detection" && var.enabled) ? 1 : 0

alarm_name = "marbot-alb-5xx-count-too-high-${random_id.id8.hex}"
alarm_description = "Number of 5XX responses from ALB over the last minute unexpected. (created by marbot)"
evaluation_periods = 1
alarm_description = "Number of 5XX responses from ALB unexpected. (created by marbot)"
evaluation_periods = var.alb_5xx_count_evaluation_periods
comparison_operator = "GreaterThanUpperThreshold"
threshold_metric_id = "e1"
alarm_actions = [local.topic_arn]
Expand All @@ -181,7 +181,7 @@ resource "aws_cloudwatch_metric_alarm" "alb_5xx_count_too_high_anomaly_detection
metric {
metric_name = "HTTPCode_ELB_5XX_Count"
namespace = "AWS/ApplicationELB"
period = 60
period = var.alb_5xx_count_period
stat = "Sum"

dimensions = {
Expand All @@ -193,15 +193,15 @@ resource "aws_cloudwatch_metric_alarm" "alb_5xx_count_too_high_anomaly_detection

resource "aws_cloudwatch_metric_alarm" "alb_rejected_connection_count_too_high" {
depends_on = [aws_sns_topic_subscription.marbot]
count = (var.alb_rejected_connection_count_threshold >= 0 && var.enabled) ? 1 : 0
count = (var.alb_rejected_connection_count == "static" && var.enabled) ? 1 : 0

alarm_name = "marbot-alb-rejected-connection-count-too-high-${random_id.id8.hex}"
alarm_description = "Number of rejected connections by ALB too high, ALB needs time to scale up. (created by marbot)"
namespace = "AWS/ApplicationELB"
metric_name = "RejectedConnectionCount"
statistic = "Sum"
period = 60
evaluation_periods = 1
period = var.alb_rejected_connection_count_period
evaluation_periods = var.alb_rejected_connection_count_evaluation_periods
comparison_operator = "GreaterThanThreshold"
threshold = var.alb_rejected_connection_count_threshold
alarm_actions = [local.topic_arn]
Expand All @@ -215,15 +215,15 @@ resource "aws_cloudwatch_metric_alarm" "alb_rejected_connection_count_too_high"

resource "aws_cloudwatch_metric_alarm" "target_5xx_count_too_high" {
depends_on = [aws_sns_topic_subscription.marbot]
count = (var.target_5xx_count_threshold >= 0 && var.enabled) ? 1 : 0
count = (var.target_5xx_count == "static" && var.enabled) ? 1 : 0

alarm_name = "marbot-target-5xx-count-too-high-${random_id.id8.hex}"
alarm_description = "Number of 5XX responses from targets over the last minute too high. (created by marbot)"
alarm_description = "Number of 5XX responses from targets too high. (created by marbot)"
namespace = "AWS/ApplicationELB"
metric_name = "HTTPCode_Target_5XX_Count"
statistic = "Sum"
period = 60
evaluation_periods = 1
period = var.target_5xx_count_period
evaluation_periods = var.target_5xx_count_evaluation_periods
comparison_operator = "GreaterThanThreshold"
threshold = var.target_5xx_count_threshold
alarm_actions = [local.topic_arn]
Expand All @@ -238,11 +238,11 @@ resource "aws_cloudwatch_metric_alarm" "target_5xx_count_too_high" {

resource "aws_cloudwatch_metric_alarm" "target_5xx_count_too_high_anomaly_detection" {
depends_on = [aws_sns_topic_subscription.marbot]
count = (var.target_5xx_count_threshold < -1.5 && var.enabled) ? 1 : 0
count = (var.target_5xx_count == "anomaly_detection" && var.enabled) ? 1 : 0

alarm_name = "marbot-target-5xx-count-too-high-${random_id.id8.hex}"
alarm_description = "Number of 5XX responses from targets over the last minute unexpected. (created by marbot)"
evaluation_periods = 1
alarm_description = "Number of 5XX responses from targets unexpected. (created by marbot)"
evaluation_periods = var.target_5xx_count_evaluation_periods
comparison_operator = "GreaterThanUpperThreshold"
threshold_metric_id = "e1"
alarm_actions = [local.topic_arn]
Expand All @@ -263,7 +263,7 @@ resource "aws_cloudwatch_metric_alarm" "target_5xx_count_too_high_anomaly_detect
metric {
metric_name = "HTTPCode_Target_5XX_Count"
namespace = "AWS/ApplicationELB"
period = 60
period = var.target_5xx_count_period
stat = "Sum"

dimensions = {
Expand All @@ -276,15 +276,15 @@ resource "aws_cloudwatch_metric_alarm" "target_5xx_count_too_high_anomaly_detect

resource "aws_cloudwatch_metric_alarm" "target_connection_error_count_too_high" {
depends_on = [aws_sns_topic_subscription.marbot]
count = (var.target_connection_error_count_threshold >= 0 && var.enabled) ? 1 : 0
count = (var.target_connection_error_count == "static" && var.enabled) ? 1 : 0

alarm_name = "marbot-target-connection-error-count-too-high-${random_id.id8.hex}"
alarm_description = "Number of rejected connections from ALB to targets over the last minute too high. (created by marbot)"
alarm_description = "Number of rejected connections from ALB to targets too high. (created by marbot)"
namespace = "AWS/ApplicationELB"
metric_name = "TargetConnectionErrorCount"
statistic = "Sum"
period = 60
evaluation_periods = 1
period = var.target_connection_error_count_period
evaluation_periods = var.target_connection_error_count_evaluation_periods
comparison_operator = "GreaterThanThreshold"
threshold = var.target_connection_error_count_threshold
alarm_actions = [local.topic_arn]
Expand Down
116 changes: 98 additions & 18 deletions variables.tf
Original file line number Diff line number Diff line change
@@ -1,3 +1,22 @@
# We can not only check the var.topic_arn !="" because of the Terraform error: The "count" value depends on resource attributes that cannot be determined until apply, so Terraform cannot predict how many instances will be created.
variable "create_topic" {
type = bool
description = "Create SNS topic? If set to false you must set topic_arn as well!"
default = true
}

variable "topic_arn" {
type = string
description = "Optional SNS topic ARN if create_topic := false (usually the output of the modules marbot-monitoring-basic or marbot-standalone-topic)."
default = ""
}

variable "stage" {
type = string
description = "marbot stage (never change this!)."
default = "v1"
}

variable "endpoint_id" {
type = string
description = "Your marbot endpoint ID (to get this value: select a channel where marbot belongs to and send a message like this: \"@marbot show me my endpoint id\")."
Expand Down Expand Up @@ -25,45 +44,106 @@ variable "targetgroup_fullname" {
description = "The full name of the target group (last part of ARN, e.g., targetgroup/target-group-name/1234567890123456)."
}



variable "alb_5xx_count" {
type = string
description = "5XX responses from the ALB (not the targets) (static|anomaly_detection|off)."
default = "static"
}

variable "alb_5xx_count_threshold" {
type = number
description = "The maximum number of 5XX responses from the ALB (not the targets) (set to -1 to disable or -2 for anomaly detection)."
description = "The maximum number of 5XX responses from the ALB, not the targets (>= 0)."
default = 0
}

variable "alb_5xx_count_period" {
type = number
description = "The period in seconds over which the specified statistic is applied (<= 86400 and multiple of 60)."
default = 60
}

variable "alb_5xx_count_evaluation_periods" {
type = number
description = "The number of periods over which data is compared to the specified threshold (>= 1 and $period*$evaluation_periods <= 86400)."
default = 1
}



variable "alb_rejected_connection_count" {
type = string
description = "Rejected connections because the ALB had reached its maximum number of connections (static|off)."
default = "static"
}

variable "alb_rejected_connection_count_threshold" {
type = number
description = "The maximum number of connections that were rejected because the ALB had reached its maximum number of connections (set -1 to disable)."
description = "The maximum number of connections (>= 0)."
default = 0
}

variable "alb_rejected_connection_count_period" {
type = number
description = "The period in seconds over which the specified statistic is applied (<= 86400 and multiple of 60)."
default = 60
}

variable "alb_rejected_connection_count_evaluation_periods" {
type = number
description = "The number of periods over which data is compared to the specified threshold (>= 1 and $period*$evaluation_periods <= 86400)."
default = 1
}



variable "target_5xx_count" {
type = string
description = "5XX responses from the targets (static|anomaly_detection|off)."
default = "static"
}

variable "target_5xx_count_threshold" {
type = number
description = "The maximum number of 5XX responses from the targets (set to -1 to disable or -2 for anomaly detection)."
description = "The maximum number of 5XX responses from the targets (>= 0)."
default = 0
}

variable "target_connection_error_count_threshold" {
variable "target_5xx_count_period" {
type = number
description = "The maximum number of connection errors from the ALB to the targets (set -1 to disable)."
default = 0
description = "The period in seconds over which the specified statistic is applied (<= 86400 and multiple of 60)."
default = 60
}

# We can not only check the var.topic_arn !="" because of the Terraform error: The "count" value depends on resource attributes that cannot be determined until apply, so Terraform cannot predict how many instances will be created.
variable "create_topic" {
type = bool
description = "Create SNS topic? If set to false you must set topic_arn as well!"
default = true
variable "target_5xx_count_evaluation_periods" {
type = number
description = "The number of periods over which data is compared to the specified threshold (>= 1 and $period*$evaluation_periods <= 86400)."
default = 1
}

variable "topic_arn" {


variable "target_connection_error_count" {
type = string
description = "Optional SNS topic ARN if create_topic := false (usually the output of the modules marbot-monitoring-basic or marbot-standalone-topic)."
default = ""
description = "5XX responses from the targets (static|off)."
default = "static"
}

variable "stage" {
type = string
description = "marbot stage (never change this!)."
default = "v1"
variable "target_connection_error_count_threshold" {
type = number
description = "The maximum number of connection errors from the ALB to the targets (>= 0)."
default = 0
}

variable "target_connection_error_count_period" {
type = number
description = "The period in seconds over which the specified statistic is applied (<= 86400 and multiple of 60)."
default = 60
}

variable "target_connection_error_count_evaluation_periods" {
type = number
description = "The number of periods over which data is compared to the specified threshold (>= 1 and $period*$evaluation_periods <= 86400)."
default = 1
}

0 comments on commit a398e32

Please sign in to comment.