I used the training script from https://sagemaker.readthedocs.io/en/stable/frameworks/xgboost/using_xgboost.html,and trying to train the model. And the here is the code I used for configuring the model.
` sess = sagemaker.Session()
hyperparams = {
"max_depth": "5",
"eta": "0.2",
"gamma": "4",
"min_child_weight": "6",
"subsample": "0.7",
"objective": "reg:squarederror",
"num_round": "50",
"verbosity": "2"}
instance_type = "ml.m5.2xlarge"
output_path = f's3://{bucket}/Model-Resource-Production/dr_training_folder'
content_type = "libsvm"
# Create XGBoost estimator
xgboost_estimator = XGBoost(
entry_point="script.py",
framework_version="1.7-1", # Note: framework_version is mandatory
hyperparameters=hyperparams,
role=role,
instance_count=1,
instance_type=instance_type,
output_path=output_path,)
file_path = 'Model-Resource-Production/dr_training_folder'
# Read train test file from S3
training_input_config = TrainingInput(s3_data=f's3://{bucket}/{file_path}/DR_train', content_type='csv')
validation_input_config = TrainingInput(s3_data=f's3://{bucket}/{file_path}/DR_valid', content_type='csv')
# Fit the model
xgboost_estimator.fit({'train': training_input_config, 'validation': validation_input_config})`
However, it returns the error during the "Training in progress" and here is the error:
` sess = sagemaker.Session()
hyperparams = {
"max_depth": "5",
"eta": "0.2",
"gamma": "4",
"min_child_weight": "6",
"subsample": "0.7",
"objective": "reg:squarederror",
"num_round": "50",
"verbosity": "2"}
instance_type = "ml.m5.2xlarge"
output_path = f's3://{bucket}/Model-Resource-Production/dr_training_folder'
content_type = "libsvm"
# Create XGBoost estimator
xgboost_estimator = XGBoost(
entry_point="script.py",
framework_version="1.7-1", # Note: framework_version is mandatory
hyperparameters=hyperparams,
role=role,
instance_count=1,
instance_type=instance_type,
output_path=output_path,)
file_path = 'Model-Resource-Production/dr_training_folder'
# Read train test file from S3
training_input_config = TrainingInput(s3_data=f's3://{bucket}/{file_path}/DR_train', content_type='csv')
validation_input_config = TrainingInput(s3_data=f's3://{bucket}/{file_path}/DR_valid', content_type='csv')
# Fit the model
xgboost_estimator.fit({'train': training_input_config, 'validation': validation_input_config})`
`---------------------------------------------------------------------------
UnexpectedStatusException Traceback (most recent call last)
Cell In[4], line 71
68 validation_input_config = TrainingInput(s3_data=f's3://{bucket}/{file_path}/DR_valid', content_type='csv')
70 # Fit the model
---> 71 xgboost_estimator.fit({'train': training_input_config, 'validation': validation_input_config})
File ~/anaconda3/envs/python3/lib/python3.10/site-packages/sagemaker/workflow/pipeline_context.py:284, in runnable_by_pipeline.<locals>.wrapper(*args, **kwargs)
280 return context
282 return _StepArguments(retrieve_caller_name(self_instance), run_func, *args, **kwargs)
--> 284 return run_func(*args, **kwargs)
File ~/anaconda3/envs/python3/lib/python3.10/site-packages/sagemaker/estimator.py:1198, in EstimatorBase.fit(self, inputs, wait, logs, job_name, experiment_config)
1196 self.jobs.append(self.latest_training_job)
1197 if wait:
-> 1198 self.latest_training_job.wait(logs=logs)
File ~/anaconda3/envs/python3/lib/python3.10/site-packages/sagemaker/estimator.py:2344, in _TrainingJob.wait(self, logs)
2342 # If logs are requested, call logs_for_jobs.
2343 if logs != "None":
-> 2344 self.sagemaker_session.logs_for_job(self.job_name, wait=True, log_type=logs)
2345 else:
2346 self.sagemaker_session.wait_for_job(self.job_name)
File ~/anaconda3/envs/python3/lib/python3.10/site-packages/sagemaker/session.py:4756, in Session.logs_for_job(self, job_name, wait, poll, log_type)
4753 last_profiler_rule_statuses = profiler_rule_statuses
4755 if wait:
-> 4756 self._check_job_status(job_name, description, "TrainingJobStatus")
4757 if dot:
4758 print()
File ~/anaconda3/envs/python3/lib/python3.10/site-packages/sagemaker/session.py:4263, in Session._check_job_status(self, job, desc, status_key_name)
4257 if "CapacityError" in str(reason):
4258 raise exceptions.CapacityError(
4259 message=message,
4260 allowed_statuses=["Completed", "Stopped"],
4261 actual_status=status,
4262 )
-> 4263 raise exceptions.UnexpectedStatusException(
4264 message=message,
4265 allowed_statuses=["Completed", "Stopped"],
4266 actual_status=status,
4267 )
UnexpectedStatusException: Error for Training job sagemaker-xgboost-2023-04-18-13-58-00-099: Failed. Reason: AlgorithmError: ExecuteUserScriptError:
Command "/miniconda3/bin/python3 -m script --eta 0.2 --gamma 4 --max_depth 5 --min_child_weight 6 --num_round 50 --objective reg:squarederror --subsample 0.7 --verbosity 2", exit code: 1`
It seems some problems during passing the hyperparameters but I don't know what actually is going on. Thanks so much in advance.