I am trying to build two SageMaker pipelines. One is for training and the other is for inference. Training pipeline works and successfully registered my trained model in registry. There, I have trained a scikit-learn model with my own training script. In my inference pipeline, I am loading the model and retrieving both model_url and image_uri as follows:
training_model = Model(
image_uri=step_latest_model_fetch.properties.Outputs["ImageUri"],
entry_point="./train_new.py",
model_data=step_latest_model_fetch.properties.Outputs["ModelUrl"],
sagemaker_session=pipeline_session,
role=role,
env={'SAGEMAKER_PROGRAM': 'train_new.py',
'SAGEMAKER_CONTAINER_LOG_LEVEL':'20',
'SAGEMAKER_REGION':'eu-central-1'}
)
step_model_creation = ModelStep(name="ModelCreationStep",
step_args=training_model.create(instance_type="ml.m5.xlarge"))
then transform as follows:
transformer = Transformer(
model_name=step_model_creation.properties.ModelName,
instance_type="ml.m5.xlarge",
instance_count=1,
output_path=f"s3://{bucket}/sagemaker/batch_transform_results"
)
step_transform = TransformStep(
name="TransformStep",
transformer=transformer,
inputs=TransformInput(data=batch_data)
)
My cloud watch error log is as below:
Traceback (most recent call last):
File "/miniconda3/bin/serve", line 8, in <module>
sys.exit(serving_entrypoint())
File "/miniconda3/lib/python3.8/site-packages/sagemaker_sklearn_container/serving.py", line 146, in serving_entrypoint
server.start(env.ServingEnv().framework_module)
File "/miniconda3/lib/python3.8/site-packages/sagemaker_containers/_server.py", line 86, in start
_modules.import_module(env.module_dir, env.module_name)
File "/miniconda3/lib/python3.8/site-packages/sagemaker_containers/_modules.py", line 253, in import_module
_files.download_and_extract(uri, _env.code_dir)
File "/miniconda3/lib/python3.8/site-packages/sagemaker_containers/_files.py", line 129, in download_and_extract
s3_download(uri, dst)
File "/miniconda3/lib/python3.8/site-packages/sagemaker_containers/_files.py", line 165, in s3_download
s3.Bucket(bucket).download_file(key, dst)
File "/miniconda3/lib/python3.8/site-packages/boto3/s3/inject.py", line 277, in bucket_download_file
return self.meta.client.download_file(
File "/miniconda3/lib/python3.8/site-packages/boto3/s3/inject.py", line 190, in download_file
return transfer.download_file(
File "/miniconda3/lib/python3.8/site-packages/boto3/s3/transfer.py", line 320, in download_file
future.result()
File "/miniconda3/lib/python3.8/site-packages/s3transfer/futures.py", line 103, in result
return self._coordinator.result()
File "/miniconda3/lib/python3.8/site-packages/s3transfer/futures.py", line 266, in result
raise self._exception
File "/miniconda3/lib/python3.8/site-packages/s3transfer/tasks.py", line 269, in _main
self._submit(transfer_future=transfer_future, **kwargs)
File "/miniconda3/lib/python3.8/site-packages/s3transfer/download.py", line 354, in _submit
response = client.head_object(
File "/miniconda3/lib/python3.8/site-packages/botocore/client.py", line 508, in _api_call
return self._make_api_call(operation_name, kwargs)
File "/miniconda3/lib/python3.8/site-packages/botocore/client.py", line 915, in _make_api_call
raise error_class(parsed_response, operation_name)
botocore.exceptions.clienterror: An error occurred (404) when calling the HeadObject operation: Not Found
Please note that I am only using one script for training/sreving that is named "train_new.py" and already contains the below functions:
# inference functions ---------------
def model_fn(model_dir):
clf = joblib.load(os.path.join(model_dir, "model.joblib"))
return clf
def input_fn(input_data, content_type):
"""Parse input data payload
We currently only take csv input. Since we need to process both labelled
and unlabelled data we first determine whether the label column is present
by looking at how many columns were provided.
"""
df = pd.read_csv(StringIO(input_data), header=None, names=["lemma"])
return df["lemma"].astype(str).values
def predict_fn(input_data, model):
prediction = model.predict(input_data)
return prediction
def output_fn(prediction, accept):
"""Format prediction output
The default accept/content-type between containers for serial inference is JSON.
We also want to set the ContentType or mimetype as the same value as accept so the next
container can read the response payload correctly.
"""
if accept == "application/json":
json_output = {"instances": prediction.tolist()}
return worker.Response(json.dumps(json_output), mimetype=accept)
elif accept == "text/csv":
return worker.Response(encoders.encode(prediction, accept), mimetype=accept)
else:
raise RuntimeException(
"{} accept type is not supported by this script.".format(accept)
)
I would highly appreciate your help, I guess I have something wrong with my model container.