Cloudformation - Autoscaling: how to get the summary (not average) of the metrics from all nodes?

0

I set my treshold to scale-up when cpu usage is 80% and scale-in when there is below 70% of usage. And the problem is that (AFAIK) for autoscaling group the average value is taken. Why its a problem? Example situation:

  1. There is one node, i make 100% cpu load
  2. Alarm is triggered, another instance is created
  3. Now metric is divided by 2 so (100% + 0%) / 2 = 50% which is below 70% -> scale-in alarm is triggered and even though one node is still loaded with 100%, one node is being destroyed.

Ideally for scale down i would use not average but SUMMARY of all loads on the nodes. There is AWS::CloudWatch::Alarm/Properties/Statistic settings with average or sum values but these are for Evaluation periods, not for ammount of factors in given dimension?

https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-cw-alarm.html#cfn-cloudwatch-alarms-statistic

my template

{
    "AWSTemplateFormatVersion":"2010-09-09",
    "Description" : "Creates Autoscaling group. Used securitygroup ids and subnets ids are hardcoded.",
    "Parameters" : {
        "myprojectAmiId": {
            "Description": "New AMI ID which will be used to create/update autoscaling group",
            "Type": "AWS::EC2::Image::Id"
        },
        "myprojectNodesDefaultQuantity":{
            "Type": "Number",
            "MinValue" : "1"
        }
    },

    "Resources" : {
        "myprojectLaunchTemplate":{
            "Type":"AWS::EC2::LaunchTemplate",
            "Properties":{
                "LaunchTemplateData":{
                    "IamInstanceProfile":{
                        "Arn": "arn:aws:iam::censored6:instance-profile/myproject-ec2"
                    },
                    "ImageId": { "Ref":"myprojectAmiId" },
                    "InstanceType" : "t3a.small",
                    "KeyName" : "my-ssh-key",
                    "SecurityGroupIds" : [ "sg-censored", "sg-censored", "sg-censored5", "sg-censored" ]
                }                
            }
        },
        "myprojectAutoScalingGroup": {
            "Type":"AWS::AutoScaling::AutoScalingGroup",
            "UpdatePolicy" : {
                "AutoScalingRollingUpdate" : {
                    "MaxBatchSize" : "1",
                    "MinInstancesInService" : "1",
                    "PauseTime" : "PT5M",
                    "WaitOnResourceSignals": "true"
                }
            },
            "Properties": {
                "MinSize":{ "Ref":"myprojectNodesDefaultQuantity" },
                "MaxSize":"3",
                "HealthCheckGracePeriod":300,
                "LaunchTemplate": {
                    "LaunchTemplateId": { "Ref":"myprojectLaunchTemplate" },
                    "Version":{
                        "Fn::GetAtt":[
                            "myprojectLaunchTemplate",
                            "LatestVersionNumber"
                        ]
                    }
                },
                "VPCZoneIdentifier" : [ "subnet-censored", "subnet-0censoredc" ],
                "TargetGroupARNs" : [ "arn:aws:elasticloadbalancing:us-west-2:censored:targetgroup/autoscaling-tests-targetgroup/censored" ],
                "Tags" : [
                    {"Key" : "Name", "Value" : "myproject-cloudformation-ascaling-tests", "PropagateAtLaunch" : true},
                    {"Key" : "Stack", "Value" : "dev-staging","PropagateAtLaunch" : true},
                    {"Key" : "CreatedBy", "Value" : "cloudformation", "PropagateAtLaunch" : true}
                ]
            }
        },
        "myprojectScaleUpPolicy":{
            "Type" : "AWS::AutoScaling::ScalingPolicy",
            "Properties" : {
                "AdjustmentType" : "ChangeInCapacity",
                "AutoScalingGroupName" : { "Ref" : "myprojectAutoScalingGroup" },
                "Cooldown" : "60",
                "ScalingAdjustment" : 1
            }
        },
        "myprojectScaleDownPolicy":{
            "Type" : "AWS::AutoScaling::ScalingPolicy",
            "Properties" : {
                "AdjustmentType" : "ChangeInCapacity",
                "AutoScalingGroupName" : { "Ref" : "myprojectAutoScalingGroup" },
                "Cooldown" : "60",
                "ScalingAdjustment" : -1
            }
        },
        "myprojectCPUAlarmHigh": {
            "Type" : "AWS::CloudWatch::Alarm",
            "Properties" : {
                "AlarmActions" : [ { "Ref" : "myprojectScaleUpPolicy" } ],
                "AlarmDescription" : "Scale-up if CPU > 80% for 5 minutes",
                "ComparisonOperator" : "GreaterThanThreshold",
                "Dimensions" : [
                    { "Name": "AutoScalingGroupName", "Value": { "Ref" : "myprojectAutoScalingGroup" }}
                ],
                "EvaluationPeriods" : 2,
                "MetricName" : "CPUUtilization",
                "Namespace" : "AWS/EC2",
                "Period" : 30,
                "Statistic" : "Average",
                "Threshold" : 80
            }
        },
        "myprojectCPUAlarmLow": {
            "Type" : "AWS::CloudWatch::Alarm",
            "Properties" : {
                "AlarmActions" : [ { "Ref" : "myprojectScaleDownPolicy" } ],
                "AlarmDescription" : "Scale-down if CPU < 70% for 10 minutes",
                "ComparisonOperator" : "LessThanThreshold",
                "Dimensions" : [
                    { "Name": "AutoScalingGroupName", "Value": { "Ref" : "myprojectAutoScalingGroup" }}
                ],
                "EvaluationPeriods" : 2,
                "MetricName" : "CPUUtilization",
                "Namespace" : "AWS/EC2",
                "Period" : 600,
                "Statistic" : "Average",
                "Threshold" : 70
            }
        }


    }
}
asked 2 years ago149 views
No Answers

You are not logged in. Log in to post an answer.

A good answer clearly answers the question and provides constructive feedback and encourages professional growth in the question asker.

Guidelines for Answering Questions