I have an ECS cluster with tasks running on EC2, but the instance is not "using" the task role.
Despite having spent a lot of time debugging, made sure that ecs.config
has the correct values inside, and using an ECS optimized image, the task role based on IAM console has never been used, and is blocking the container from running as I have role based authentication set up with mongodb, so the process exits lacking connection with the database.
The same setup works with Fargate, not with EC2. What am I missing?
Setup:
Instance profile:
# IAM Role for ECS Instances
resource "aws_iam_role" "ecs_instance_role" {
name = "ecs-instance-role"
assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [{
Action = "sts:AssumeRole"
Effect = "Allow"
Principal = { Service = "ec2.amazonaws.com" }
}]
})
}
resource "aws_iam_role_policy_attachment" "ecs_instance_policy" {
role = aws_iam_role.ecs_instance_role.name
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role"
}
# Attach the AmazonSSMManagedInstanceCore policy for SSM access
resource "aws_iam_role_policy_attachment" "ecs_ssm_policy" {
role = aws_iam_role.ecs_instance_role.name
policy_arn = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore"
}
resource "aws_iam_instance_profile" "ecs_instance_profile" {
name = "ecs-instance-profile"
role = aws_iam_role.ecs_instance_role.name
}
EC2:
# ECS Optimized AMI
data "aws_ami" "ecs_optimized" {
most_recent = true
filter {
name = "name"
values = ["amzn2-ami-ecs-hvm-*-x86_64-ebs"]
}
owners = ["amazon"]
}
# Launch Configuration for ECS Instances
resource "aws_launch_template" "ecs_launch_template" {
name = "${var.app_name}-ecs-ec2-lt-${var.env}-${var.region}"
image_id = data.aws_ami.ecs_optimized.id
instance_type = var.instance_type
update_default_version = true
iam_instance_profile {
name = var.instance_profile_name
}
user_data = base64encode(
<<-EOF
#!/bin/bash
echo ECS_ENABLE_TASK_IAM_ROLE=true >> /etc/ecs/ecs.config
echo ECS_ENABLE_TASK_IAM_ROLE_NETWORK_HOST=true >> /etc/ecs/ecs.config
echo ECS_CLUSTER=${var.ecs_cluster_name} >> /etc/ecs/ecs.config
EOF
)
vpc_security_group_ids = var.sg_ids
}
# Auto Scaling Group for ECS Instances
resource "aws_autoscaling_group" "ecs_asg" {
desired_capacity = var.min_capacity
max_size = var.max_capacity
min_size = var.min_capacity
vpc_zone_identifier = data.aws_subnets.subnets.ids
launch_template {
id = aws_launch_template.ecs_launch_template.id
version = "$Latest"
}
health_check_type = "EC2"
health_check_grace_period = 300
}
resource "aws_ecs_capacity_provider" "ecs_capacity_provider" {
name = "${var.app_name}-capacity-provider-${var.env}-${var.region}"
auto_scaling_group_provider {
auto_scaling_group_arn = aws_autoscaling_group.ecs_asg.arn
managed_termination_protection = "ENABLED"
managed_draining = "ENABLED"
managed_scaling {
maximum_scaling_step_size = 2
minimum_scaling_step_size = 1
status = "ENABLED"
target_capacity = var.target_capacity_percentage
}
}
}
ECS service:
resource "aws_ecs_task_definition" "task_def" {
family = "${var.app_name}-task-def-${var.env}-${var.region}"
network_mode = "awsvpc"
task_role_arn = var.task_role_arn
execution_role_arn = var.exec_role_arn
cpu = var.cpu
memory = var.memory
runtime_platform {
cpu_architecture = "X86_64"
operating_system_family = "LINUX"
}
container_definitions = jsonencode([
{
name = "${var.app_name}-container-${var.env}-${var.region}"
image = "${var.ecr_repository_url}:latest"
cpu = var.cpu
memory = var.memory
essential = true
mountPoints = []
volumesFrom = []
portMappings = [
{
containerPort = var.port
hostPort = var.port
protocol = "tcp"
},
]
environment = var.envvars
logConfiguration = {
logDriver = "awslogs"
options = {
"awslogs-create-group" = "true"
"awslogs-group" = "${var.app_name}-task-def-${var.env}-${var.region}"
"awslogs-region" = "${var.region}"
"awslogs-stream-prefix" = "ecs"
}
}
},
])
}
resource "aws_ecs_service" "service" {
lifecycle {
ignore_changes = [
task_definition,
load_balancer,
capacity_provider_strategy
]
}
cluster = var.ecs_cluster_arn
name = "${var.app_name}-service-${var.env}-${var.region}"
task_definition = aws_ecs_task_definition.task_def.arn
health_check_grace_period_seconds = 30
load_balancer {
target_group_arn = var.blue_target_arn
container_name = "${var.app_name}-container-${var.env}-${var.region}"
container_port = var.port
}
capacity_provider_strategy {
capacity_provider = var.capacity_provider_name
weight = 100
}
scheduling_strategy = "REPLICA"
deployment_controller {
type = "CODE_DEPLOY"
}
network_configuration {
subnets = data.aws_subnets.subnets.ids
security_groups = [var.sg_id]
}
desired_count = var.min_capacity
}
# Auto Scaling for ECS Service
resource "aws_appautoscaling_target" "scalable_target" {
service_namespace = "ecs"
resource_id = "service/${var.ecs_cluster_name}/${aws_ecs_service.service.name}"
scalable_dimension = "ecs:service:DesiredCount"
min_capacity = var.min_capacity
max_capacity = var.max_capacity
}
resource "aws_appautoscaling_policy" "cpu_scaling_policy" {
name = "${var.app_name}-cpu-scaling-policy-${var.env}-${var.region}"
service_namespace = "ecs"
resource_id = "service/${var.ecs_cluster_name}/${aws_ecs_service.service.name}"
scalable_dimension = "ecs:service:DesiredCount"
policy_type = "TargetTrackingScaling"
target_tracking_scaling_policy_configuration {
target_value = var.cpu_scale_threshold
predefined_metric_specification {
predefined_metric_type = "ECSServiceAverageCPUUtilization"
}
scale_out_cooldown = 300
scale_in_cooldown = 300
disable_scale_in = false
}
}
resource "aws_appautoscaling_policy" "memory_scaling_policy" {
name = "${var.app_name}-memory-scaling-policy-${var.env}-${var.region}"
service_namespace = "ecs"
resource_id = "service/${var.ecs_cluster_name}/${aws_ecs_service.service.name}"
scalable_dimension = "ecs:service:DesiredCount"
policy_type = "TargetTrackingScaling"
target_tracking_scaling_policy_configuration {
target_value = var.memory_scale_threshold
predefined_metric_specification {
predefined_metric_type = "ECSServiceAverageMemoryUtilization"
}
scale_out_cooldown = 300
scale_in_cooldown = 300
disable_scale_in = false
}
}
Task and exec roles:
resource "aws_iam_role" "ecs_task_role" {
name = "${var.app_name}-ecsTaskRole-${var.env}-${var.region}"
assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Action = "sts:AssumeRole"
Effect = "Allow"
Sid = ""
Principal = {
Service = "ecs-tasks.amazonaws.com"
}
},
]
})
}
resource "aws_iam_role_policy_attachment" "attach_policies_to_task_role" {
for_each = toset(var.policy_arns)
role = aws_iam_role.ecs_task_role.name
policy_arn = each.value
}
data "aws_iam_policy_document" "only_allow_api_to_put_logs" {
statement {
effect = "Deny"
actions = [
"logs:CreateLogStream",
"logs:PutLogEvents",
]
resources = [var.api_log_group_arn]
not_principals {
type = "AWS"
identifiers = [aws_iam_role.ecs_task_role.arn]
}
}
}
resource "aws_iam_role" "ecs_exec_role" {
name = "${var.app_name}-ecsExecRole-${var.env}-${var.region}"
assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Action = "sts:AssumeRole"
Effect = "Allow"
Sid = ""
Principal = {
Service = "ecs-tasks.amazonaws.com"
}
},
]
})
}
resource "aws_iam_role_policy_attachment" "attach_log_groups_write_to_exec_role" {
role = aws_iam_role.ecs_exec_role.name
policy_arn = var.log_groups_write_policy_arn
}
resource "aws_iam_role_policy_attachment" "attach_ecs_task_exec_to_exec_role" {
role = aws_iam_role.ecs_exec_role.name
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy"
}
resource "aws_iam_role_policy_attachment" "attach_fault_injection_simulator_to_exec_role" {
role = aws_iam_role.ecs_exec_role.name
policy_arn = "arn:aws:iam::aws:policy/service-role/AWSFaultInjectionSimulatorECSAccess"
}
Putting them together:
module "ecr" {
source = "../../ecr"
app_name = var.app_name
env = var.env
}
module "api_log_group" {
source = "../../log-group"
app_name = var.app_name
env = var.env
region = var.region
}
module "roles" {
source = "../ecs-roles"
app_name = var.app_name
env = var.env
region = var.region
policy_arns = var.task_policy_arns
api_log_group_arn = module.api_log_group.log_group_arn
log_groups_write_policy_arn = var.log_groups_write_policy_arn
}
module "lb_targets" {
providers = {
aws = aws
aws.prod_us_east_1 = aws.prod_us_east_1
}
source = "../blue-green-alb"
app_name = var.app_name
env = var.env
region = var.region
lb_arn = var.lb_arn
lb_dns = var.lb_dns
lb_zone_id = var.lb_zone_id
health_check_path = var.health_check_path
domain = var.domain
url = var.url
port = var.port
}
module "ec2" {
source = "../ec2"
app_name = var.app_name
env = var.env
region = var.region
ecs_cluster_name = var.ecs_cluster_name
instance_profile_name = var.instance_profile_name
sg_ids = [var.sg_id]
instance_type = var.instance_type
min_capacity = var.min_capacity
max_capacity = var.max_capacity
target_capacity_percentage = var.cpu_scale_threshold
}
module "service" {
source = "../ecs-service"
app_name = var.app_name
env = var.env
envvars = var.envvars
region = var.region
task_role_arn = module.roles.task_role_arn
exec_role_arn = module.roles.exec_role_arn
blue_target_arn = module.lb_targets.blue_target_arn
port = var.port
sg_id = var.sg_id
ecs_cluster_arn = var.ecs_cluster_arn
ecs_cluster_name = var.ecs_cluster_name
ecr_repository_url = module.ecr.ecr_repository_url
capacity_provider_name = module.ec2.capacity_provider_name
cpu = var.cpu
memory = var.memory
cpu_scale_threshold = var.cpu_scale_threshold
memory_scale_threshold = var.memory_scale_threshold
min_capacity = var.min_capacity
max_capacity = var.max_capacity
desired_count = var.min_capacity
}
module "codedeploy" {
source = "../codedeploy"
app_name = var.app_name
env = var.env
region = var.region
listener_arn = module.lb_targets.listener_arn
ecs_cluster_name = var.ecs_cluster_name
ecs_service_name = module.service.ecs_service_name
blue_target_name = module.lb_targets.blue_target_name
green_target_name = module.lb_targets.green_target_name
}
It is doing that, it's just the role creation and task definition are in separate modules, and from role creation I'm passing the arns as variables to the task definition. In the management console they are properly assigned to the task definition, so this is not the solution to the problem.
Edited the post to clarify the connections between tf modules.