From af7bf1f151ac71e5df970f94e76edcf5aa1b7b23 Mon Sep 17 00:00:00 2001 From: Matheus Fidelis Date: Tue, 22 Aug 2023 21:34:19 -0300 Subject: [PATCH] docs(terraform): Node Termination Handler with SQS --- docs/terraform/.gitignore | 5 + docs/terraform/README.md | 49 +++++++++ docs/terraform/data.tf | 9 ++ docs/terraform/event_rules.tf | 100 ++++++++++++++++++ .../helm_node_termination_handler.tf | 33 ++++++ docs/terraform/iam.tf | 58 ++++++++++ docs/terraform/providers.tf | 11 ++ docs/terraform/sqs.tf | 29 +++++ docs/terraform/variables.tf | 7 ++ 9 files changed, 301 insertions(+) create mode 100644 docs/terraform/.gitignore create mode 100644 docs/terraform/README.md create mode 100644 docs/terraform/data.tf create mode 100644 docs/terraform/event_rules.tf create mode 100644 docs/terraform/helm_node_termination_handler.tf create mode 100644 docs/terraform/iam.tf create mode 100644 docs/terraform/providers.tf create mode 100644 docs/terraform/sqs.tf create mode 100644 docs/terraform/variables.tf diff --git a/docs/terraform/.gitignore b/docs/terraform/.gitignore new file mode 100644 index 00000000..b8db45b1 --- /dev/null +++ b/docs/terraform/.gitignore @@ -0,0 +1,5 @@ +terraform.tf* +.terraform/** +.terraform.tfstate* +.terraform.lock* +.terraform \ No newline at end of file diff --git a/docs/terraform/README.md b/docs/terraform/README.md new file mode 100644 index 00000000..a70a49f6 --- /dev/null +++ b/docs/terraform/README.md @@ -0,0 +1,49 @@ +# Setup Node Termination Handler with Terraform Providers + +## Helm chart provider example + +```hcl +resource "helm_release" "node_termination_handler" { + name = "aws-node-termination-handler" + namespace = "kube-system" + + chart = "aws-node-termination-handler" + repository = "https://aws.github.io/eks-charts/" + version = "0.21.0" + + set { + name = "serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn" + value = aws_iam_role.aws_node_termination_handler_role.arn + } + + set { + name = "awsRegion" + value = var.aws_region + } + + set { + name = "queueURL" + value = aws_sqs_queue.main.url + } + + set { + name = "checkTagBeforeDraining" + value = false + } + + set { + name = "enableSqsTerminationDraining" + value = true + } +} +``` + +## Apply Terraform + +```bash +terraform init +``` + +```bash +terraform apply --auto-approve +``` \ No newline at end of file diff --git a/docs/terraform/data.tf b/docs/terraform/data.tf new file mode 100644 index 00000000..086b2ac2 --- /dev/null +++ b/docs/terraform/data.tf @@ -0,0 +1,9 @@ +data "aws_eks_cluster" "main" { + name = var.cluster_name +} + +data "aws_eks_cluster_auth" "main" { + name = var.cluster_name +} + +data "aws_caller_identity" "current" {} diff --git a/docs/terraform/event_rules.tf b/docs/terraform/event_rules.tf new file mode 100644 index 00000000..493bece9 --- /dev/null +++ b/docs/terraform/event_rules.tf @@ -0,0 +1,100 @@ +resource "aws_cloudwatch_event_rule" "node_termination_handler_instance_terminate" { + name = format("%s-node-termination-handler-instance-terminate", var.cluster_name) + description = var.cluster_name + + event_pattern = jsonencode({ + source = ["aws.autoscaling"] + detail-type = [ + "EC2 Instance-terminate Lifecycle Action" + ] + }) +} + +resource "aws_cloudwatch_event_target" "node_termination_handler_instance_terminate" { + rule = aws_cloudwatch_event_rule.node_termination_handler_instance_terminate.name + target_id = "SendToSQS" + arn = aws_sqs_queue.main.arn +} + + +resource "aws_cloudwatch_event_rule" "node_termination_handler_scheduled_change" { + name = format("%s-node-termination-handler-scheduled-change", var.cluster_name) + description = var.cluster_name + + event_pattern = jsonencode({ + source = ["aws.health"] + detail-type = [ + "AWS Health Event" + ] + detail = { + service = [ + "EC2" + ] + eventTypeCategory = [ + "scheduledChange" + ] + } + }) +} + +resource "aws_cloudwatch_event_target" "node_termination_handler_scheduled_change" { + rule = aws_cloudwatch_event_rule.node_termination_handler_scheduled_change.name + target_id = "SendToSQS" + arn = aws_sqs_queue.main.arn +} + +resource "aws_cloudwatch_event_rule" "node_termination_handler_spot_termination" { + name = format("%s-node-termination-handler-spot-termination", var.cluster_name) + description = var.cluster_name + + event_pattern = jsonencode({ + source = ["aws.ec2"] + detail-type = [ + "EC2 Spot Instance Interruption Warning" + ] + }) +} + +resource "aws_cloudwatch_event_target" "node_termination_handler_spot_termination" { + rule = aws_cloudwatch_event_rule.node_termination_handler_spot_termination.name + target_id = "SendToSQS" + arn = aws_sqs_queue.main.arn +} + + +resource "aws_cloudwatch_event_rule" "node_termination_handler_rebalance" { + name = format("%s-node-termination-handler-rebalance", var.cluster_name) + description = var.cluster_name + + event_pattern = jsonencode({ + source = ["aws.ec2"] + detail-type = [ + "EC2 Instance Rebalance Recommendation" + ] + }) +} + +resource "aws_cloudwatch_event_target" "node_termination_handler_rebalance" { + rule = aws_cloudwatch_event_rule.node_termination_handler_rebalance.name + target_id = "SendToSQS" + arn = aws_sqs_queue.main.arn +} + + +resource "aws_cloudwatch_event_rule" "node_termination_handler_state_change" { + name = format("%s-node-termination-handler-state-change", var.cluster_name) + description = var.cluster_name + + event_pattern = jsonencode({ + source = ["aws.ec2"] + detail-type = [ + "EC2 Instance State-change Notification" + ] + }) +} + +resource "aws_cloudwatch_event_target" "node_termination_handler_state_change" { + rule = aws_cloudwatch_event_rule.node_termination_handler_state_change.name + target_id = "SendToSQS" + arn = aws_sqs_queue.main.arn +} diff --git a/docs/terraform/helm_node_termination_handler.tf b/docs/terraform/helm_node_termination_handler.tf new file mode 100644 index 00000000..99f32524 --- /dev/null +++ b/docs/terraform/helm_node_termination_handler.tf @@ -0,0 +1,33 @@ +resource "helm_release" "node_termination_handler" { + name = "aws-node-termination-handler" + namespace = "kube-system" + + chart = "aws-node-termination-handler" + repository = "https://aws.github.io/eks-charts/" + version = "0.21.0" + + set { + name = "serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn" + value = aws_iam_role.aws_node_termination_handler_role.arn + } + + set { + name = "awsRegion" + value = var.aws_region + } + + set { + name = "queueURL" + value = aws_sqs_queue.main.url + } + + set { + name = "checkTagBeforeDraining" + value = false + } + + set { + name = "enableSqsTerminationDraining" + value = true + } +} diff --git a/docs/terraform/iam.tf b/docs/terraform/iam.tf new file mode 100644 index 00000000..58152dd6 --- /dev/null +++ b/docs/terraform/iam.tf @@ -0,0 +1,58 @@ +data "aws_iam_policy_document" "aws_node_termination_handler_role" { + statement { + actions = ["sts:AssumeRoleWithWebIdentity"] + effect = "Allow" + + principals { + identifiers = [ + format("arn:aws:iam::%s:oidc-provider/%s", data.aws_caller_identity.current.account_id, replace(data.aws_eks_cluster.main.identity[0].oidc[0].issuer, "https://", "")) + ] + type = "Federated" + } + } +} + +resource "aws_iam_role" "aws_node_termination_handler_role" { + assume_role_policy = data.aws_iam_policy_document.aws_node_termination_handler_role.json + name = format("%s-aws-node-termination-handler", var.cluster_name) +} + + +data "aws_iam_policy_document" "aws_node_termination_handler_policy" { + version = "2012-10-17" + + statement { + + effect = "Allow" + actions = [ + "autoscaling:CompleteLifecycleAction", + "autoscaling:DescribeAutoScalingInstances", + "autoscaling:DescribeTags", + "ec2:DescribeInstances", + "sqs:DeleteMessage", + "sqs:ReceiveMessage" + ] + + resources = [ + "*" + ] + + } +} + +resource "aws_iam_policy" "aws_node_termination_handler_policy" { + name = format("%s-aws_node_termination_handler", var.cluster_name) + path = "/" + description = var.cluster_name + + policy = data.aws_iam_policy_document.aws_node_termination_handler_policy.json +} + +resource "aws_iam_policy_attachment" "aws_node_termination_handler_policy" { + name = "aws_node_termination_handler" + roles = [ + aws_iam_role.aws_node_termination_handler_role.name + ] + + policy_arn = aws_iam_policy.aws_node_termination_handler_policy.arn +} diff --git a/docs/terraform/providers.tf b/docs/terraform/providers.tf new file mode 100644 index 00000000..b2ec83aa --- /dev/null +++ b/docs/terraform/providers.tf @@ -0,0 +1,11 @@ +provider "aws" { + region = var.aws_region +} + +provider "helm" { + kubernetes { + host = data.aws_eks_cluster.main.endpoint + cluster_ca_certificate = base64decode(data.aws_eks_cluster.main.certificate_authority.0.data) + token = data.aws_eks_cluster_auth.main.token + } +} diff --git a/docs/terraform/sqs.tf b/docs/terraform/sqs.tf new file mode 100644 index 00000000..8883f702 --- /dev/null +++ b/docs/terraform/sqs.tf @@ -0,0 +1,29 @@ +resource "aws_sqs_queue" "main" { + name = format("%s-aws-node-termination-handler", var.cluster_name) + delay_seconds = 0 + max_message_size = 2048 + message_retention_seconds = 86400 + receive_wait_time_seconds = 10 + visibility_timeout_seconds = 60 +} + +resource "aws_sqs_queue_policy" "main" { + queue_url = aws_sqs_queue.main.id + policy = <