Hi everybody. I'm stuck when calling describe_auto_ml_job_v2 method.
Can't find the best Candidate because of a KeyError. Seems like when I print the method the following keys fail after sm.describe_auto_ml_job_v2(AutoMLJobName=auto_ml_job_name):
{'AutoMLJobName': 'automl-test-v220241201-01-02',
'AutoMLJobArn': 'arn:aws:sagemaker:us-east-x:automl-job/automl-test-v220241201-01-02',
'AutoMLJobInputDataConfig': [{'ChannelType': 'training',
'ContentType': 'text/csv;header=present',
'CompressionType': 'None',
'DataSource': {'S3DataSource': {'S3DataType': 'S3Prefix',
'S3Uri': 's3://machine-leanring/Dataset24.csv'}}}],
'OutputDataConfig': {'S3OutputPath': 's3://machine-leanring/models/autopilot'},
'RoleArn': 'arn:aws:iam::x:role/service-role/AmazonSageMaker-ExecutionRole-x',
'AutoMLJobObjective': {'MetricName': 'Accuracy'},
'AutoMLProblemTypeConfig': {'TabularJobConfig': {'CandidateGenerationConfig': {'AlgorithmsConfig': [{'AutoMLAlgorithms': ['xgboost', 'lightgbm', 'randomforest']}]},
'CompletionCriteria': {'MaxCandidates': 1,
'MaxRuntimePerTrainingJobInSeconds': 5,
'MaxAutoMLJobRuntimeInSeconds': 1600},
'Mode': 'ENSEMBLING',
'GenerateCandidateDefinitionsOnly': False,
'ProblemType': 'BinaryClassification',
'TargetAttributeName': 'flag',
'SampleWeightAttributeName': 'flag'}},
'AutoMLProblemTypeConfigName': 'Tabular',
'CreationTime': datetime.datetime(2024, 12, 1, 1, 3, 1, 867000, tzinfo=tzlocal()),
'EndTime': datetime.datetime(2024, 12, 1, 1, 37, 28, 49000, tzinfo=tzlocal()),
'LastModifiedTime': datetime.datetime(2024, 12, 1, 1, 37, 28, 99000, tzinfo=tzlocal()),
'FailureReason': 'Internal Server Error. Please try again later.',
'AutoMLJobStatus': 'Failed',
'AutoMLJobSecondaryStatus': 'Failed',
'ModelDeployConfig': {'AutoGenerateEndpointName': False,
'EndpointName': 'automl-test-v2-endpoint1320241201-01-02'},
'DataSplitConfig': {'ValidationFraction': 0.20000000298023224},
'AutoMLComputeConfig': {'EmrServerlessComputeConfig': {'ExecutionRoleARN': 'arn:aws:iam::x:role/service-role/AmazonSageMaker-ExecutionRole-x'}},
'ResponseMetadata': {'RequestId': 'x',
'HTTPStatusCode': 200,
'HTTPHeaders': {'x-amzn-requestid': 'x',
'content-type': 'application/x-amz-json-1.1',
'content-length': '1657',
'date': 'Sun, 01 Dec 2024 02:03:36 GMT'},
'RetryAttempts': 0}}
I'm still trying to figuring it out... if its beacuase of the CompletionCriteria key in the job config.
Share my code to have more details, hope someone can help me find the human error o if I need to add some extra configuration. Thanks a lot.
import boto3
from sagemaker import get_execution_role
import sagemaker
import datetime
from time import sleep
sess = sagemaker.Session()
bucket = 's3://machine-leanring'
dataset_location = 's3://machine-leanring/Dataset24.csv'
role = sagemaker.get_execution_role()
region = boto3.Session().region_name
sm = boto3.Session().client(service_name='sagemaker', region_name=region)
target = "flag"
input_data_config =[{
'DataSource': {
'S3DataSource': {
'S3DataType': 'S3Prefix',
'S3Uri': dataset_location
}
},
'ChannelType': 'training',
}]
output_data_config = {
'S3OutputPath': f'{bucket}/models/autopilot'
}
timestamp_suffix = strftime("%Y%m%d-%H-%M", gmtime())
auto_ml_job_name = "automl-test-v2" + timestamp_suffix
auto_ml_job_endpoint_name = "automl-test-endpoint-v2" + timestamp_suffix
job_config = {
'TabularJobConfig': {
'CandidateGenerationConfig': {
'AlgorithmsConfig': [
{
'AutoMLAlgorithms': [
'xgboost', 'lightgbm', 'randomforest'
]
},
]
},
'CompletionCriteria': {
'MaxCandidates': 1,
'MaxRuntimePerTrainingJobInSeconds': 5,
'MaxAutoMLJobRuntimeInSeconds':1600
},
"TargetAttributeName": target,
'Mode': 'ENSEMBLING',
'GenerateCandidateDefinitionsOnly': False,
'ProblemType': 'BinaryClassification',
'SampleWeightAttributeName': target
}
}
sm.create_auto_ml_job_v2(
AutoMLJobName=auto_ml_job_name,
AutoMLJobInputDataConfig=input_data_config,
OutputDataConfig=output_data_config,
AutoMLProblemTypeConfig=job_config,
RoleArn=role,
AutoMLJobObjective={
'MetricName': 'Accuracy'
},
ModelDeployConfig={
'AutoGenerateEndpointName': False,
'EndpointName': auto_ml_job_endpoint_name
},
DataSplitConfig={
'ValidationFraction': 0.2
},
AutoMLComputeConfig={
'EmrServerlessComputeConfig': {
'ExecutionRoleARN': role
}
}
)
describe_response = sm.describe_auto_ml_job_v2(AutoMLJobName=auto_ml_job_name)
job_run_status = describe_response["AutoMLJobStatus"]
while job_run_status not in ("Failed", "Completed", "Stopped"):
describe_response = sm.describe_auto_ml_job_v2(AutoMLJobName=auto_ml_job_name)
job_run_status = describe_response["AutoMLJobStatus"]
print(
datetime.datetime.now(), describe_response["AutoMLJobStatus"] + " - " + describe_response["AutoMLJobSecondaryStatus"]
)
sleep(80)
best_candidate = sm.describe_auto_ml_job_v2(AutoMLJobName=auto_ml_job_name)['BestCandidate']