Error for Training job catboost-classification-model , ErrorMessage "TypeError: Cannot convert 'xxx'' to float

0

When I performed the following AWS tutorial, I got an error when training the model. https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/lightgbm_catboost_tabular/Amazon_Tabular_Classification_LightGBM_CatBoost.ipynb

The error that occurred is

UnexpectedStatusException: Error for Training job jumpstart-catboost-classification-model-2022-07-22-07-33-18-038: Failed. Reason: AlgorithmError: ExecuteUserScriptError:
ExitCode 1 ErrorMessage "TypeError: Cannot convert 'b'BROOKLYN'' to float

These are all the files that I have upload in S3 bucket : Amazon S3 --> Buckets---> R-sandbox-sagemaker--->ml/---> train/ and in the train folder 'data.csv' and 'categorical_index.json' are uploaded based on the mentioned tutorial. data point "BROOKLYN" is in the categorical column, its index is already included in the JSON file to tell Catboost that it is categorical data. Data has 55 categorical data columns; only two of them are integers , all other string

Could you give me some advice on how to solve it?

Also here all the code and full traceback of the issue:

!pip install sagemaker ipywidgets --upgrade –quiet
import sagemaker, boto3, json
from sagemaker import get_execution_role

aws_role = get_execution_role()
aws_region = boto3.Session().region_name
sess = sagemaker.Session()

##2.1 Retrieve Training Artifacts-
#retrieve the training docker container, the training algorithm source, and the tabular algorithm. Note that model_version="*" fetches the latest model.
# Currently, not all the object detection models in jumpstart support finetuning. Thus, we manually select a model
# which supports finetuning.

from sagemaker import image_uris, model_uris, script_uris
train_model_id, train_model_version, train_scope = "catboost-classification-model", "*", "training"
training_instance_type = "ml.m5.xlarge"

# Retrieve the docker image
train_image_uri = image_uris.retrieve(
    region=None,
    framework=None,
    model_id=train_model_id,
    model_version=train_model_version,
    image_scope=train_scope,
    instance_type=training_instance_type,
)
# Retrieve the training script
train_source_uri = script_uris.retrieve(
    model_id=train_model_id, model_version=train_model_version, script_scope=train_scope
)
# Retrieve the pre-trained model tarball to further fine-tune
train_model_uri = model_uris.retrieve(
    model_id=train_model_id, model_version=train_model_version, model_scope=train_scope
)
## 2.2 Set Training Parameters
# Sample training data is available in this bucket
training_data_bucket = "R-sandbox-sagemaker"
training_data_prefix = "ml"

training_dataset_s3_path = f"s3://{training_data_bucket}/{training_data_prefix}"

output_bucket = sess.default_bucket()
output_prefix = "jumpstart-example-tabular-training"

s3_output_location = f"s3://{output_bucket}/{output_prefix}/output"

from sagemaker import hyperparameters
# Retrieve the default hyper-parameters for fine-tuning the model
hyperparameters = hyperparameters.retrieve_default(
    model_id=train_model_id, model_version=train_model_version
)

# [Optional] Override default hyperparameters with custom values
hyperparameters[
    "iterations"
] = "500"  # The same hyperparameter is named as "iterations" for CatBoost
print(hyperparameters)

## 2.3. Train with Automatic Model Tuning
from sagemaker.tuner import ContinuousParameter, IntegerParameter, HyperparameterTuner

use_amt = True
if train_model_id == "lightgbm-classification-model":
    hyperparameter_ranges = {
        "learning_rate": ContinuousParameter(1e-4, 1, scaling_type="Logarithmic"),
        "num_boost_round": IntegerParameter(2, 30),
        "early_stopping_rounds": IntegerParameter(2, 30),
        "num_leaves": IntegerParameter(10, 50),
        "feature_fraction": ContinuousParameter(0, 1),
        "bagging_fraction": ContinuousParameter(0, 1),
        "bagging_freq": IntegerParameter(1, 10),
        "max_depth": IntegerParameter(5, 30),
        "min_data_in_leaf": IntegerParameter(5, 50),
    }
if train_model_id == "catboost-classification-model":
    hyperparameter_ranges = {
        "learning_rate": ContinuousParameter(0.00001, 0.1, scaling_type="Logarithmic"),
        "iterations": IntegerParameter(50, 1000),
        "early_stopping_rounds": IntegerParameter(1, 10),
        "depth": IntegerParameter(1, 10),
        "l2_leaf_reg": IntegerParameter(1, 10),
        "random_strength": ContinuousParameter(0.01, 10, scaling_type="Logarithmic"),
    }
## 2.4. Start Training
from sagemaker.estimator import Estimator
from sagemaker.utils import name_from_base
training_job_name = name_from_base(f"jumpstart-{'catboost-classification-model'}-training")

# Create SageMaker Estimator instance
tabular_estimator = Estimator(
    role=aws_role,
    image_uri=train_image_uri,
    source_dir=train_source_uri,
    model_uri=train_model_uri,
    entry_point="transfer_learning.py",
    instance_count=1,
    instance_type=training_instance_type,
    max_run=360000,
    #hyperparameters=hyperparameters,
    output_path=s3_output_location,
)
# Launch a SageMaker Training job by passing s3 path of the training data
tabular_estimator.fit(
        {"training": training_dataset_s3_path}, logs=True, job_name=training_job_name
    )
2022-07-22 07:33:18 Starting - Starting the training job...
2022-07-22 07:33:46 Starting - Preparing the instances for trainingProfilerReport-1658475198: InProgress
2022-07-22 07:35:06 Downloading - Downloading input data...
2022-07-22 07:35:46 Training - Downloading the training image...
2022-07-22 07:36:11 Training - Training image download completed. Training in progress..bash: cannot set terminal process group (-1): Inappropriate ioctl for device
bash: no job control in this shell
2022-07-22 07:36:14,025 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training
2022-07-22 07:36:14,027 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2022-07-22 07:36:14,036 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.
2022-07-22 07:36:14,041 sagemaker_pytorch_container.training INFO     Invoking user training script.
2022-07-22 07:36:15,901 sagemaker-training-toolkit INFO     Installing dependencies from requirements.txt:
/opt/conda/bin/python3.8 -m pip install -r requirements.txt
Processing ./catboost/tenacity-8.0.1-py3-none-any.whl
Processing ./catboost/plotly-5.1.0-py2.py3-none-any.whl
Processing ./catboost/graphviz-0.17-py3-none-any.whl
Processing ./catboost/catboost-1.0.1-cp38-none-manylinux1_x86_64.whl
Processing ./sagemaker_jumpstart_script_utilities-1.0.0-py2.py3-none-any.whl
Requirement already satisfied: six in /opt/conda/lib/python3.8/site-packages (from plotly==5.1.0->-r requirements.txt (line 2)) (1.16.0)
Requirement already satisfied: numpy>=1.16.0 in /opt/conda/lib/python3.8/site-packages (from catboost==1.0.1->-r requirements.txt (line 4)) (1.19.1)
Requirement already satisfied: scipy in /opt/conda/lib/python3.8/site-packages (from catboost==1.0.1->-r requirements.txt (line 4)) (1.7.1)
Requirement already satisfied: matplotlib in /opt/conda/lib/python3.8/site-packages (from catboost==1.0.1->-r requirements.txt (line 4)) (3.4.3)
Requirement already satisfied: pandas>=0.24.0 in /opt/conda/lib/python3.8/site-packages (from catboost==1.0.1->-r requirements.txt (line 4)) (1.2.4)
Requirement already satisfied: python-dateutil>=2.7.3 in /opt/conda/lib/python3.8/site-packages (from pandas>=0.24.0->catboost==1.0.1->-r requirements.txt (line 4)) (2.8.2)
Requirement already satisfied: pytz>=2017.3 in /opt/conda/lib/python3.8/site-packages (from pandas>=0.24.0->catboost==1.0.1->-r requirements.txt (line 4)) (2021.3)
Requirement already satisfied: pillow>=6.2.0 in /opt/conda/lib/python3.8/site-packages (from matplotlib->catboost==1.0.1->-r requirements.txt (line 4)) (8.3.2)
Requirement already satisfied: pyparsing>=2.2.1 in /opt/conda/lib/python3.8/site-packages (from matplotlib->catboost==1.0.1->-r requirements.txt (line 4)) (2.4.7)
Requirement already satisfied: cycler>=0.10 in /opt/conda/lib/python3.8/site-packages (from matplotlib->catboost==1.0.1->-r requirements.txt (line 4)) (0.10.0)
Requirement already satisfied: kiwisolver>=1.0.1 in /opt/conda/lib/python3.8/site-packages (from matplotlib->catboost==1.0.1->-r requirements.txt (line 4)) (1.3.2)
tenacity is already installed with the same version as the provided wheel. Use --force-reinstall to force an installation of the wheel.
Installing collected packages: plotly, graphviz, sagemaker-jumpstart-script-utilities, catboost
Attempting uninstall: plotly
Found existing installation: plotly 5.3.1
Uninstalling plotly-5.3.1:
Successfully uninstalled plotly-5.3.1
Successfully installed catboost-1.0.1 graphviz-0.17 plotly-5.1.0 sagemaker-jumpstart-script-utilities-1.0.0
WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
2022-07-22 07:36:32,568 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2022-07-22 07:36:32,604 sagemaker-training-toolkit INFO     Invoking user script
Training Env:
{
    "additional_framework_parameters": {},
    "channel_input_dirs": {
        "model": "/opt/ml/input/data/model",
        "training": "/opt/ml/input/data/training"
    },
    "current_host": "algo-1",
    "framework_module": "sagemaker_pytorch_container.training:main",
    "hosts": [
        "algo-1"
    ],
    "hyperparameters": {},
    "input_config_dir": "/opt/ml/input/config",
    "input_data_config": {
        "model": {
            "ContentType": "application/x-sagemaker-model",
            "TrainingInputMode": "File",
            "S3DistributionType": "FullyReplicated",
            "RecordWrapperType": "None"
        },
        "training": {
            "TrainingInputMode": "File",
            "S3DistributionType": "FullyReplicated",
            "RecordWrapperType": "None"
        }
    },
    "input_dir": "/opt/ml/input",
    "is_master": true,
    "job_name": "jumpstart-catboost-classification-model-2022-07-22-07-33-18-038",
    "log_level": 20,
    "master_hostname": "algo-1",
    "model_dir": "/opt/ml/model",
    "module_dir": "s3://jumpstart-cache-prod-us-east-1/source-directory-tarballs/catboost/transfer_learning/classification/v1.1.3/sourcedir.tar.gz",
    "module_name": "transfer_learning",
    "network_interface_name": "eth0",
    "num_cpus": 4,
    "num_gpus": 0,
    "output_data_dir": "/opt/ml/output/data",
    "output_dir": "/opt/ml/output",
    "output_intermediate_dir": "/opt/ml/output/intermediate",
    "resource_config": {
        "current_host": "algo-1",
        "current_instance_type": "ml.m5.xlarge",
        "current_group_name": "homogeneousCluster",
        "hosts": [
            "algo-1"
        ],
        "instance_groups": [
            {
                "instance_group_name": "homogeneousCluster",
                "instance_type": "ml.m5.xlarge",
                "hosts": [
                    "algo-1"
                ]
            }
        ],
        "network_interface_name": "eth0"
    },
    "user_entry_point": "transfer_learning.py"
}
Environment variables:
SM_HOSTS=["algo-1"]
SM_NETWORK_INTERFACE_NAME=eth0
SM_HPS={}
SM_USER_ENTRY_POINT=transfer_learning.py
SM_FRAMEWORK_PARAMS={}
SM_RESOURCE_CONFIG={"current_group_name":"homogeneousCluster","current_host":"algo-1","current_instance_type":"ml.m5.xlarge","hosts":["algo-1"],"instance_groups":[{"hosts":["algo-1"],"instance_group_name":"homogeneousCluster","instance_type":"ml.m5.xlarge"}],"network_interface_name":"eth0"}
SM_INPUT_DATA_CONFIG={"model":{"ContentType":"application/x-sagemaker-model","RecordWrapperType":"None","S3DistributionType":"FullyReplicated","TrainingInputMode":"File"},"training":{"RecordWrapperType":"None","S3DistributionType":"FullyReplicated","TrainingInputMode":"File"}}
SM_OUTPUT_DATA_DIR=/opt/ml/output/data
SM_CHANNELS=["model","training"]
SM_CURRENT_HOST=algo-1
SM_MODULE_NAME=transfer_learning
SM_LOG_LEVEL=20
SM_FRAMEWORK_MODULE=sagemaker_pytorch_container.training:main
SM_INPUT_DIR=/opt/ml/input
SM_INPUT_CONFIG_DIR=/opt/ml/input/config
SM_OUTPUT_DIR=/opt/ml/output
SM_NUM_CPUS=4
SM_NUM_GPUS=0
SM_MODEL_DIR=/opt/ml/model
SM_MODULE_DIR=s3://jumpstart-cache-prod-us-east-1/source-directory-tarballs/catboost/transfer_learning/classification/v1.1.3/sourcedir.tar.gz
SM_TRAINING_ENV={"additional_framework_parameters":{},"channel_input_dirs":{"model":"/opt/ml/input/data/model","training":"/opt/ml/input/data/training"},"current_host":"algo-1","framework_module":"sagemaker_pytorch_container.training:main","hosts":["algo-1"],"hyperparameters":{},"input_config_dir":"/opt/ml/input/config","input_data_config":{"model":{"ContentType":"application/x-sagemaker-model","RecordWrapperType":"None","S3DistributionType":"FullyReplicated","TrainingInputMode":"File"},"training":{"RecordWrapperType":"None","S3DistributionType":"FullyReplicated","TrainingInputMode":"File"}},"input_dir":"/opt/ml/input","is_master":true,"job_name":"jumpstart-catboost-classification-model-2022-07-22-07-33-18-038","log_level":20,"master_hostname":"algo-1","model_dir":"/opt/ml/model","module_dir":"s3://jumpstart-cache-prod-us-east-1/source-directory-tarballs/catboost/transfer_learning/classification/v1.1.3/sourcedir.tar.gz","module_name":"transfer_learning","network_interface_name":"eth0","num_cpus":4,"num_gpus":0,"output_data_dir":"/opt/ml/output/data","output_dir":"/opt/ml/output","output_intermediate_dir":"/opt/ml/output/intermediate","resource_config":{"current_group_name":"homogeneousCluster","current_host":"algo-1","current_instance_type":"ml.m5.xlarge","hosts":["algo-1"],"instance_groups":[{"hosts":["algo-1"],"instance_group_name":"homogeneousCluster","instance_type":"ml.m5.xlarge"}],"network_interface_name":"eth0"},"user_entry_point":"transfer_learning.py"}
SM_USER_ARGS=[]
SM_OUTPUT_INTERMEDIATE_DIR=/opt/ml/output/intermediate
SM_CHANNEL_MODEL=/opt/ml/input/data/model
SM_CHANNEL_TRAINING=/opt/ml/input/data/training
PYTHONPATH=/opt/ml/code:/opt/conda/bin:/opt/conda/lib/python38.zip:/opt/conda/lib/python3.8:/opt/conda/lib/python3.8/lib-dynload:/opt/conda/lib/python3.8/site-packages
Invoking script with the following command:
/opt/conda/bin/python3.8 transfer_learning.py
INFO:root:Validation data is not found. 20.0% of training data is randomly selected as validation data. The seed for random sampling is 200.
Traceback (most recent call last):
  File "_catboost.pyx", line 2167, in _catboost.get_float_feature
File "_catboost.pyx", line 1125, in _catboost._FloatOrNan
  File "_catboost.pyx", line 949, in _catboost._FloatOrNanFromString
TypeError: Cannot convert 'b'BROOKLYN'' to float
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
  File "transfer_learning.py", line 221, in <module>
ru
```
  • File "transfer_learning.py", line 221, in <module> run_with_args(args) File "transfer_learning.py", line 182, in run_with_args cat_train, cat_eval = Pool(data=X_train, label=y_train, cat_features=cat_features), Pool( File "/opt/conda/lib/python3.8/site-packages/catboost/core.py", line 628, in init self._init(data, label, cat_features, text_features, embedding_features, pairs, weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, thread_count) File "/opt/conda/lib/python3.8/site-packages/catboost/core.py", line 1171, in _init self._init_pool(data, label, cat_features, text_features, embedding_features, pairs, weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, thread_count) File "_catboost.pyx", line 3755, in _catboost._PoolBase._init_pool File "_catboost.pyx", line 3803, in _catboost._PoolBase._init_pool File "_catboost.pyx", line 3638, in _catboost._PoolBase._init_features_order_layout_pool File "_catboost.pyx", line 2664, in _catboost._set_features_order_data_pd_data_frame File "_catboost.pyx", line 2208, in _catboost.create_num_factor_data File "_catboost.pyx", line 2169, in _catboost.get_float_feature _catboost.CatBoostError: Bad value for num_feature[non_default_doc_idx=0,feature_idx=2]="BROOKLYN": Cannot convert 'b'BROOKLYN'' to float 2022-07-22 07:36:34,967 sagemaker-training-toolkit ERROR Reporting training FAILURE 2022-07-22 07:36:34,967 sagemaker-training-toolkit E

  • ExitCode 1 ErrorMessage "TypeError: Cannot convert 'b'BROOKLYN'' to float During handling of the above exception, another exception occurred: Traceback (most recent call last): File "transfer_learning.py", line 221, in <module> run_with_args(args) File "transfer_learning.py", line 182, in run_with_args cat_train, cat_eval = Pool(data=X_train, label=y_train, cat_features=cat_features), Pool( File "/opt/conda/lib/python3.8/site-packages/catboost/core.py", line 628, in init self._init(data, label, cat_features, text_features, embedding_features, pairs, weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, thread_count) File "/opt/conda/lib/python3.8/site-packages/catboost/core.py", line 1171, in _init self._init_pool(data, label, cat_features, text_features, embedding_features, pairs, weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, thread_count) File "_catboost.pyx", line 3755, in _catboost._PoolBase._init_pool File "_catboost.pyx", line 3803, in _catboost._PoolBase._init_pool File "_catboost.pyx", line 3638, in _catboost._PoolBase._init_features_order_layout_pool File "_catboost.pyx", line 2664, in _catboost._set_features_order_data_pd_data_frame File "_catboost.pyx", line 2208, in _catboost.create_num_factor_data File "_catboost.pyx", line 2169

  • in _catboost.get_float_feature _catboost.CatBoostError: Bad value for num_feature[non_default_doc_idx=0,feature_idx=2]="BROOKLYN": Cannot convert 'b'BROOKLYN'' to float" Command "/opt/conda/bin/python3.8 transfer_learning.py" 2022-07-22 07:36:34,967 sagemaker-training-toolkit ERROR Encountered exit_code 1

    2022-07-22 07:36:47 Uploading - Uploading generated training model 2022-07-22 07:36:47 Failed - Training job failed

    UnexpectedStatusException Traceback (most recent call last) /tmp/ipykernel_13950/1137348215.py in <cell line: 23>() 21 22 # Launch a SageMaker Training job by passing s3 path of the training data ---> 23 tabular_estimator.fit( 24 {"training": training_dataset_s3_path}, logs=True, job_name=training_job_name 25 ) ~/anaconda3/envs/python3/lib/python3.8/site-packages/sagemaker/estimator.py in fit(self, inputs, wait, logs, job_name, experiment_config) 953 ): 954 """Train a model using the input training dataset. --> 955 956 The API calls the Amazon SageMaker CreateTrainingJob API to start 957 model training. The API uses configuration you provided to create the ~/anaconda3/envs/python3/lib/python3.8/site-packages/sagemaker/estimator.py in wait(self, logs) 1954 logger.debug( 1955 "Selecting TrainingInput's input_mode (%s) for TrainingInputMode.",

  • 1958 train_args["input_mode"] = inputs.config["InputMode"]

    ~/anaconda3/envs/python3/lib/python3.8/site-packages/sagemaker/session.py in logs_for_job(self, job_name, wait, poll, log_type) 3796 color_wrap, 3797 ) -> 3798 if state == LogState.COMPLETE: 3799 break 3800

    ~/anaconda3/envs/python3/lib/python3.8/site-packages/sagemaker/session.py in _check_job_status(self, job, desc, status_key_name) 3334 3335 Args: -> 3336 name (str): Name of the Amazon SageMaker batch transform job. 3337 3338 Raises:

    UnexpectedStatusException: Error for Training job jumpstart-catboost-classification-model-2022-07-22-07-33-18-038: Failed. Reason: AlgorithmError: ExecuteUserScriptError: ExitCode 1 ErrorMessage "TypeError: Cannot convert 'b'BROOKLYN'' to float

  • UnexpectedStatusException: Error for Training job jumpstart-catboost-classification-model-2022-07-22-07-33-18-038: Failed. Reason: AlgorithmError: ExecuteUserScriptError: ExitCode 1 ErrorMessage "TypeError: Cannot convert 'b'BROOKLYN'' to float During handling of the above exception, another exception occurred: Traceback (most recent call last): File "transfer_learning.py", line 221, in <module> run_with_args(args) File "transfer_learning.py", line 182, in run_with_args cat_train, cat_eval = Pool(data=X_train, label=y_train, cat_features=cat_features), Pool( File "/opt/conda/lib/python3.8/site-packages/catboost/core.py", line 628, in init self._init(data, label, cat_features, text_features, embedding_features, pairs, weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, thread_count) File "/opt/conda/lib/python3.8/site-packages/catboost/core.py", line 1171, in _init self._init_pool(data, label, cat_features, text_features, embedding_features, pairs, weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, thread_count) File "_catboost.pyx", lin

M
asked 2 years ago1158 views
2 Answers
0

Hello - can you remove the categorical_index.json inside Amazon S3 --> Buckets---> R-sandbox-sagemaker--->ml/---> train/ and place it in the directory above it, i.e. Amazon S3 --> Buckets---> R-sandbox-sagemaker--->ml/. So only data.csv is in Amazon S3 --> Buckets---> R-sandbox-sagemaker--->ml/---> train/. Then re-run. Please let me know if this works.

AWS
answered 2 years ago
  • @AWS-User-9634033 please see the error

0

@AWS-User-9634033 I did what ever you suggest now it gets the below error:

2022-08-19 14:43:30 Uploading - Uploading generated training model
2022-08-19 14:43:30 Failed - Training job failed
ProfilerReport-1660920003: Stopping

---------------------------------------------------------------------------
UnexpectedStatusException                 Traceback (most recent call last)
/tmp/ipykernel_9559/1137348215.py in <cell line: 23>()
     21 
     22 # Launch a SageMaker Training job by passing s3 path of the training data
---> 23 tabular_estimator.fit(
     24         {"training": training_dataset_s3_path}, logs=True, job_name=training_job_name
     25     )

~/anaconda3/envs/python3/lib/python3.8/site-packages/sagemaker/workflow/pipeline_context.py in wrapper(*args, **kwargs)
    246             return self_instance.sagemaker_session.context
    247 
--> 248         return run_func(*args, **kwargs)
    249 
    250     return wrapper

~/anaconda3/envs/python3/lib/python3.8/site-packages/sagemaker/estimator.py in fit(self, inputs, wait, logs, job_name, experiment_config)
   1062         self.jobs.append(self.latest_training_job)
   1063         if wait:
-> 1064             self.latest_training_job.wait(logs=logs)
   1065 
   1066     def _compilation_job_name(self):

~/anaconda3/envs/python3/lib/python3.8/site-packages/sagemaker/estimator.py in wait(self, logs)
   2145         # If logs are requested, call logs_for_jobs.
   2146         if logs != "None":
-> 2147             self.sagemaker_session.logs_for_job(self.job_name, wait=True, log_type=logs)
   2148         else:
   2149             self.sagemaker_session.wait_for_job(self.job_name)

~/anaconda3/envs/python3/lib/python3.8/site-packages/sagemaker/session.py in logs_for_job(self, job_name, wait, poll, log_type)
   3851 
   3852         if wait:
-> 3853             self._check_job_status(job_name, description, "TrainingJobStatus")
   3854             if dot:
   3855                 print()

~/anaconda3/envs/python3/lib/python3.8/site-packages/sagemaker/session.py in _check_job_status(self, job, desc, status_key_name)
   3389                     actual_status=status,
   3390                 )
-> 3391             raise exceptions.UnexpectedStatusException(
   3392                 message=message,
   3393                 allowed_statuses=["Completed", "Stopped"],

UnexpectedStatusException: Error for Training job jumpstart-catboost-classification-model-2022-08-19-14-40-03-772: Failed. Reason: AlgorithmError: ExecuteUserScriptError:
ExitCode 1
ErrorMessage ""
Command "/opt/conda/bin/python3.8 transfer_learning.py", exit code: 1
M
answered 2 years ago

You are not logged in. Log in to post an answer.

A good answer clearly answers the question and provides constructive feedback and encourages professional growth in the question asker.

Guidelines for Answering Questions