By using AWS re:Post, you agree to the AWS re:Post Terms of Use

EC2 container not using ECS Task role

0

I have an ECS cluster with tasks running on EC2, but the instance is not "using" the task role.

Despite having spent a lot of time debugging, made sure that ecs.config has the correct values inside, and using an ECS optimized image, the task role based on IAM console has never been used, and is blocking the container from running as I have role based authentication set up with mongodb, so the process exits lacking connection with the database. The same setup works with Fargate, not with EC2. What am I missing?

Setup:

Instance profile:

# IAM Role for ECS Instances
resource "aws_iam_role" "ecs_instance_role" {
  name = "ecs-instance-role"
  assume_role_policy = jsonencode({
    Version = "2012-10-17"
    Statement = [{
      Action    = "sts:AssumeRole"
      Effect    = "Allow"
      Principal = { Service = "ec2.amazonaws.com" }
    }]
  })
}

resource "aws_iam_role_policy_attachment" "ecs_instance_policy" {
  role       = aws_iam_role.ecs_instance_role.name
  policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role"
}
# Attach the AmazonSSMManagedInstanceCore policy for SSM access
resource "aws_iam_role_policy_attachment" "ecs_ssm_policy" {
  role       = aws_iam_role.ecs_instance_role.name
  policy_arn = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore"
}

resource "aws_iam_instance_profile" "ecs_instance_profile" {
  name = "ecs-instance-profile"
  role = aws_iam_role.ecs_instance_role.name
}

EC2:


# ECS Optimized AMI
data "aws_ami" "ecs_optimized" {
  most_recent = true
  filter {
    name   = "name"
    values = ["amzn2-ami-ecs-hvm-*-x86_64-ebs"]
  }
  owners = ["amazon"]
}

# Launch Configuration for ECS Instances
resource "aws_launch_template" "ecs_launch_template" {
  name = "${var.app_name}-ecs-ec2-lt-${var.env}-${var.region}"

  image_id      = data.aws_ami.ecs_optimized.id
  instance_type = var.instance_type

  update_default_version = true

  iam_instance_profile {
    name = var.instance_profile_name
  }

  user_data = base64encode(
    <<-EOF
      #!/bin/bash
      echo ECS_ENABLE_TASK_IAM_ROLE=true >> /etc/ecs/ecs.config
      echo ECS_ENABLE_TASK_IAM_ROLE_NETWORK_HOST=true >> /etc/ecs/ecs.config
      echo ECS_CLUSTER=${var.ecs_cluster_name} >> /etc/ecs/ecs.config
    EOF
  )

  vpc_security_group_ids = var.sg_ids
}

# Auto Scaling Group for ECS Instances
resource "aws_autoscaling_group" "ecs_asg" {
  desired_capacity     = var.min_capacity
  max_size             = var.max_capacity
  min_size             = var.min_capacity
  vpc_zone_identifier  = data.aws_subnets.subnets.ids
  launch_template       {
    id      = aws_launch_template.ecs_launch_template.id
    version = "$Latest"
  }
  health_check_type    = "EC2"
  health_check_grace_period = 300
}

resource "aws_ecs_capacity_provider" "ecs_capacity_provider" {
  name = "${var.app_name}-capacity-provider-${var.env}-${var.region}"

  auto_scaling_group_provider {
    auto_scaling_group_arn = aws_autoscaling_group.ecs_asg.arn
    managed_termination_protection = "ENABLED"
    managed_draining = "ENABLED"

    managed_scaling {
      maximum_scaling_step_size = 2
      minimum_scaling_step_size = 1
      status                    = "ENABLED"
      target_capacity           = var.target_capacity_percentage
    }
  }
}

ECS service:


resource "aws_ecs_task_definition" "task_def" {
  family = "${var.app_name}-task-def-${var.env}-${var.region}"
  network_mode = "awsvpc"
  task_role_arn = var.task_role_arn
  execution_role_arn = var.exec_role_arn
  cpu = var.cpu
  memory = var.memory
  runtime_platform {
    cpu_architecture = "X86_64"
    operating_system_family = "LINUX"
  }
  container_definitions = jsonencode([
    {
      name      = "${var.app_name}-container-${var.env}-${var.region}"
      image     = "${var.ecr_repository_url}:latest"
      cpu       = var.cpu
      memory    = var.memory
      essential = true
      mountPoints = []
      volumesFrom = []
      portMappings = [
        {
          containerPort = var.port
          hostPort      = var.port
          protocol = "tcp"
        },
      ]
      environment = var.envvars
      logConfiguration = {
          logDriver = "awslogs"
          options = {
            "awslogs-create-group" = "true"
            "awslogs-group" = "${var.app_name}-task-def-${var.env}-${var.region}"
            "awslogs-region" = "${var.region}"
            "awslogs-stream-prefix" = "ecs"
          }
        }
    },
  ])
}

resource "aws_ecs_service" "service" {
  lifecycle {
    ignore_changes = [
      task_definition,
      load_balancer,
      capacity_provider_strategy
    ]
  }
  cluster = var.ecs_cluster_arn
  name = "${var.app_name}-service-${var.env}-${var.region}"
  task_definition = aws_ecs_task_definition.task_def.arn
  health_check_grace_period_seconds = 30
  load_balancer {
    target_group_arn = var.blue_target_arn
    container_name = "${var.app_name}-container-${var.env}-${var.region}"
    container_port = var.port
  }
  capacity_provider_strategy {
    capacity_provider = var.capacity_provider_name
    weight            = 100
  }
  scheduling_strategy = "REPLICA"
  deployment_controller {
    type = "CODE_DEPLOY"
  }
  network_configuration {
    subnets = data.aws_subnets.subnets.ids
    security_groups = [var.sg_id]
  }
  desired_count = var.min_capacity
}

# Auto Scaling for ECS Service
resource "aws_appautoscaling_target" "scalable_target" {
  service_namespace  = "ecs"
  resource_id        = "service/${var.ecs_cluster_name}/${aws_ecs_service.service.name}"
  scalable_dimension = "ecs:service:DesiredCount"
  min_capacity       = var.min_capacity
  max_capacity       = var.max_capacity
}
resource "aws_appautoscaling_policy" "cpu_scaling_policy" {
  name = "${var.app_name}-cpu-scaling-policy-${var.env}-${var.region}"
  service_namespace = "ecs"
  resource_id = "service/${var.ecs_cluster_name}/${aws_ecs_service.service.name}"
  scalable_dimension = "ecs:service:DesiredCount"
  policy_type = "TargetTrackingScaling"
  target_tracking_scaling_policy_configuration {
    target_value = var.cpu_scale_threshold
    predefined_metric_specification {
      predefined_metric_type = "ECSServiceAverageCPUUtilization"
    }
    scale_out_cooldown = 300
    scale_in_cooldown = 300
    disable_scale_in = false
  }
}
resource "aws_appautoscaling_policy" "memory_scaling_policy" {
  name = "${var.app_name}-memory-scaling-policy-${var.env}-${var.region}"
  service_namespace = "ecs"
  resource_id = "service/${var.ecs_cluster_name}/${aws_ecs_service.service.name}"
  scalable_dimension = "ecs:service:DesiredCount"
  policy_type = "TargetTrackingScaling"
  target_tracking_scaling_policy_configuration {
    target_value = var.memory_scale_threshold
    predefined_metric_specification {
      predefined_metric_type = "ECSServiceAverageMemoryUtilization"
    }
    scale_out_cooldown = 300
    scale_in_cooldown = 300
    disable_scale_in = false
  }
}

Task and exec roles:

resource "aws_iam_role" "ecs_task_role" {
  name = "${var.app_name}-ecsTaskRole-${var.env}-${var.region}"
  assume_role_policy = jsonencode({
    Version = "2012-10-17"
    Statement = [
      {
        Action = "sts:AssumeRole"
        Effect = "Allow"
        Sid    = ""
        Principal = {
          Service = "ecs-tasks.amazonaws.com"
        }
      },
    ]
  })
}
resource "aws_iam_role_policy_attachment" "attach_policies_to_task_role" {
  for_each   = toset(var.policy_arns)
  role       = aws_iam_role.ecs_task_role.name
  policy_arn = each.value
}

data "aws_iam_policy_document" "only_allow_api_to_put_logs" {
  statement {
    effect = "Deny"

    actions = [
      "logs:CreateLogStream",
      "logs:PutLogEvents",
    ]

    resources = [var.api_log_group_arn]

    not_principals {
      type = "AWS"
      identifiers = [aws_iam_role.ecs_task_role.arn]
    }
  }
}
resource "aws_iam_role" "ecs_exec_role" {
  name = "${var.app_name}-ecsExecRole-${var.env}-${var.region}"
  assume_role_policy = jsonencode({
    Version = "2012-10-17"
    Statement = [
      {
        Action = "sts:AssumeRole"
        Effect = "Allow"
        Sid    = ""
        Principal = {
          Service = "ecs-tasks.amazonaws.com"
        }
      },
    ]
  })
}

resource "aws_iam_role_policy_attachment" "attach_log_groups_write_to_exec_role" {
  role       = aws_iam_role.ecs_exec_role.name
  policy_arn = var.log_groups_write_policy_arn
}
resource "aws_iam_role_policy_attachment" "attach_ecs_task_exec_to_exec_role" {
  role       = aws_iam_role.ecs_exec_role.name
  policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy"
}
resource "aws_iam_role_policy_attachment" "attach_fault_injection_simulator_to_exec_role" {
  role       = aws_iam_role.ecs_exec_role.name
  policy_arn = "arn:aws:iam::aws:policy/service-role/AWSFaultInjectionSimulatorECSAccess"
}

Putting them together:

module "ecr" {
  source = "../../ecr"

  app_name = var.app_name
  env = var.env
}

module "api_log_group" {
  source = "../../log-group"

  app_name = var.app_name
  env = var.env
  region = var.region
}

module "roles" {
  source = "../ecs-roles"

  app_name = var.app_name
  env = var.env
  region = var.region
  policy_arns = var.task_policy_arns
  api_log_group_arn = module.api_log_group.log_group_arn
  log_groups_write_policy_arn = var.log_groups_write_policy_arn
}

module "lb_targets" {
  providers = {
    aws = aws
    aws.prod_us_east_1 = aws.prod_us_east_1
  }

  source = "../blue-green-alb"

  app_name = var.app_name
  env = var.env
  region = var.region
  lb_arn = var.lb_arn
  lb_dns = var.lb_dns
  lb_zone_id = var.lb_zone_id
  health_check_path = var.health_check_path
  domain = var.domain
  url = var.url
  port = var.port
}

module "ec2" {
  source = "../ec2"

  app_name = var.app_name
  env = var.env
  region = var.region
  ecs_cluster_name = var.ecs_cluster_name
  instance_profile_name = var.instance_profile_name
  sg_ids = [var.sg_id]
  instance_type = var.instance_type
  min_capacity = var.min_capacity
  max_capacity = var.max_capacity
  target_capacity_percentage = var.cpu_scale_threshold
}

module "service" {
  source = "../ecs-service"

  app_name = var.app_name
  env = var.env
  envvars = var.envvars
  region = var.region
  task_role_arn = module.roles.task_role_arn
  exec_role_arn = module.roles.exec_role_arn
  blue_target_arn = module.lb_targets.blue_target_arn
  port = var.port
  sg_id = var.sg_id
  ecs_cluster_arn = var.ecs_cluster_arn
  ecs_cluster_name = var.ecs_cluster_name
  ecr_repository_url = module.ecr.ecr_repository_url
  capacity_provider_name = module.ec2.capacity_provider_name
  cpu = var.cpu
  memory = var.memory
  cpu_scale_threshold = var.cpu_scale_threshold
  memory_scale_threshold = var.memory_scale_threshold
  min_capacity = var.min_capacity
  max_capacity = var.max_capacity
  desired_count = var.min_capacity
}

module "codedeploy" {
  source = "../codedeploy"

  app_name = var.app_name
  env = var.env
  region = var.region
  listener_arn = module.lb_targets.listener_arn
  ecs_cluster_name = var.ecs_cluster_name
  ecs_service_name = module.service.ecs_service_name
  blue_target_name = module.lb_targets.blue_target_name
  green_target_name = module.lb_targets.green_target_name
}
1 Answer
0

Your terraform code is not refencing the role but a variable.

Updated these 2 lines

task_role_arn = aws_iam_role.ecs_task_role.arn execution_role_arn = aws_iam_role.ecs_exec_role.arn

From

resource "aws_ecs_task_definition" "task_def" {
  family = "${var.app_name}-task-def-${var.env}-${var.region}"
  network_mode = "awsvpc"
  task_role_arn = var.task_role_arn
  execution_role_arn = var.exec_role_arn
  cpu = var.cpu
  memory = var.memory
  runtime_platform {
    cpu_architecture = "X86_64"
    operating_system_family = "LINUX"
  }

to

resource "aws_ecs_task_definition" "task_def" {
  family = "${var.app_name}-task-def-${var.env}-${var.region}"
  network_mode = "awsvpc"
  task_role_arn = aws_iam_role.ecs_task_role.arn             #CHANGED HERE
  execution_role_arn = aws_iam_role.ecs_exec_role.arn        #CHANGED HERE
  cpu = var.cpu
  memory = var.memory
  runtime_platform {
    cpu_architecture = "X86_64"
    operating_system_family = "LINUX"
  }
profile picture
EXPERT
answered 25 days ago
  • It is doing that, it's just the role creation and task definition are in separate modules, and from role creation I'm passing the arns as variables to the task definition. In the management console they are properly assigned to the task definition, so this is not the solution to the problem.

  • Edited the post to clarify the connections between tf modules.

You are not logged in. Log in to post an answer.

A good answer clearly answers the question and provides constructive feedback and encourages professional growth in the question asker.

Guidelines for Answering Questions