Hi, I have a terraform script (below) which causes instances to consistently stop before their status checks are complete. The log I get in the Scaling group activity group states for each instance:
'Launching a new EC2 instance: i-04d7eb490406c33dc. Status Reason: Instance became unhealthy while waiting for instance to be in InService state. Termination Reason: Client.InstanceInitiatedShutdown: Instance initiated shutdown
At 2024-01-12T11:41:03Z an instance was launched in response to an unhealthy instance needing to be replaced.'
Below is the script - when I spin up the AMI via the console it works fine:
provider "aws" {
region = "eu-west-1"
}
Create a VPC
resource "aws_vpc" "my_vpc" {
cidr_block = "10.0.0.0/16"
enable_dns_support = true
enable_dns_hostnames = true
}
Create Internet Gateway
resource "aws_internet_gateway" "gw" {
vpc_id = aws_vpc.my_vpc.id
}
Create four subnets
resource "aws_subnet" "public_subnet_1" {
vpc_id = aws_vpc.my_vpc.id
cidr_block = "10.0.1.0/24"
availability_zone = "eu-west-1a"
map_public_ip_on_launch = true
}
resource "aws_subnet" "public_subnet_2" {
vpc_id = aws_vpc.my_vpc.id
cidr_block = "10.0.2.0/24"
availability_zone = "eu-west-1b"
map_public_ip_on_launch = true
}
resource "aws_subnet" "private_subnet_1" {
vpc_id = aws_vpc.my_vpc.id
cidr_block = "10.0.3.0/24"
availability_zone = "eu-west-1a"
}
resource "aws_subnet" "private_subnet_2" {
vpc_id = aws_vpc.my_vpc.id
cidr_block = "10.0.4.0/24"
availability_zone = "eu-west-1b"
}
Create Route Table for Public Subnets
resource "aws_route_table" "public_rt" {
vpc_id = aws_vpc.my_vpc.id
route {
cidr_block = "0.0.0.0/0"
gateway_id = aws_internet_gateway.gw.id
}
}
Associate Route Table with Public Subnets
resource "aws_route_table_association" "public_rt_assoc1" {
subnet_id = aws_subnet.public_subnet_1.id
route_table_id = aws_route_table.public_rt.id
}
resource "aws_route_table_association" "public_rt_assoc2" {
subnet_id = aws_subnet.public_subnet_2.id
route_table_id = aws_route_table.public_rt.id
}
Create a Security Group for the EC2 instances
resource "aws_security_group" "ec2_sg" {
vpc_id = aws_vpc.my_vpc.id
egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
ingress {
from_port = 22
to_port = 22
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
}
ingress {
from_port = 80
to_port = 80
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
}
}
Create a Load Balancer
resource "aws_lb" "my_lb" {
name = "my-lb"
internal = false
load_balancer_type = "application"
security_groups = [aws_security_group.ec2_sg.id]
subnets = [aws_subnet.public_subnet_1.id, aws_subnet.public_subnet_2.id]
enable_deletion_protection = false
}
resource "aws_lb_listener" "front_end" {
load_balancer_arn = aws_lb.my_lb.arn
port = "80"
protocol = "HTTP"
default_action {
type = "forward"
target_group_arn = aws_lb_target_group.my_tg.arn
}
}
resource "aws_lb_target_group" "my_tg" {
name = "my-tg"
port = 80
protocol = "HTTP"
vpc_id = aws_vpc.my_vpc.id
health_check {
enabled = true
interval = 30
path = "/"
port = "traffic-port"
protocol = "HTTP"
healthy_threshold = 3
unhealthy_threshold = 3
timeout = 5
matcher = "200-299"
}
}
Define Launch Template
resource "aws_launch_template" "my_lt" {
name_prefix = "lt-"
image_id = "ami-05d4867ed58f446d9"
instance_type = "t2.micro"
block_device_mappings {
device_name = "/dev/sda1"
ebs {
volume_size = 20
volume_type = "gp3"
throughput = 125
delete_on_termination = true
encrypted = false
}
}
network_interfaces {
associate_public_ip_address = true
security_groups = [aws_security_group.ec2_sg.id]
}
}
Create Auto Scaling Group
resource "aws_autoscaling_group" "my_asg" {
vpc_zone_identifier = [aws_subnet.public_subnet_1.id, aws_subnet.public_subnet_2.id]
max_size = 8
min_size = 2
desired_capacity = 2
launch_template {
id = aws_launch_template.my_lt.id
version = "$Latest"
}
target_group_arns = [aws_lb_target_group.my_tg.arn]
tag {
key = "Name"
value = "my-instance"
propagate_at_launch = true
}
health_check_type = "ELB"
health_check_grace_period = 900
force_delete = true
}
Auto Scaling Policy for scale up
resource "aws_autoscaling_policy" "scale_up" {
name = "scale-up-on-high-cpu"
scaling_adjustment = 1
adjustment_type = "ChangeInCapacity"
cooldown = 300
autoscaling_group_name = aws_autoscaling_group.my_asg.name
}
CloudWatch Metric Alarm for high CPU
resource "aws_cloudwatch_metric_alarm" "high_cpu_alarm" {
alarm_name = "high-cpu-usage"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = 2
metric_name = "CPUUtilization"
namespace = "AWS/EC2"
period = 300
statistic = "Average"
threshold = 65
alarm_description = "This alarm monitors EC2 CPU usage and triggers scaling up."
dimensions = {
AutoScalingGroupName = aws_autoscaling_group.my_asg.name
}
alarm_actions = [aws_autoscaling_policy.scale_up.arn]
}
I changed the health check from ELB to EC2 but the same issue persists.
Try removing that line completed from your TF and reapply.
Also take a look at your ASG config in the console to make sure it was changed.
Was this resolved?