InvalidS3ObjectException using boto3 and textract

0

import boto3 from sagemaker.amazon.amazon_estimator import get_image_uri

region = boto3.Session().region_name

role = 'arn:aws:iam::xxxx'

bucket = 'xxxxx'
bucket_path = "https://s3-{}.amazonaws.com/{}".format('us-west-1', 'xxxxx')

client = boto3.client(
    's3',
    region_name = 'us-west-1',
    aws_access_key_id='xxxx',
    aws_secret_access_key='xxxx'
)

res = client.list_objects(Bucket=bucket)

file = res['Contents'][0]['Key']

filename = '_'.join(file.split(' '))

#Amazon Textract client
textract = boto3.client('textract')

# Call Amazon Textract
response = textract.detect_document_text(
    Document={
        'S3Object': {
            'Bucket': bucket,
            'Name': filename
        }
    })

print(response)



---------------------------------------------------------------------------
InvalidS3ObjectException                  Traceback (most recent call last)
/var/folders/d0/gnksqzwn2fn46fjgrkp6045c0000gn/T/ipykernel_87475/2286579661.py in <module>
     26 
     27 # Call Amazon Textract
---> 28 response = textract.detect_document_text(
     29     Document={
     30         'S3Object': {

/Applications/Anaconda/anaconda3/lib/python3.9/site-packages/botocore/client.py in _api_call(self, *args, **kwargs)
    389                     "%s() only accepts keyword arguments." % py_operation_name)
    390             # The "self" in this scope is referring to the BaseClient.
--> 391             return self._make_api_call(operation_name, kwargs)
    392 
    393         _api_call.__name__ = str(py_operation_name)

/Applications/Anaconda/anaconda3/lib/python3.9/site-packages/botocore/client.py in _make_api_call(self, operation_name, api_params)
    717             error_code = parsed_response.get("Error", {}).get("Code")
    718             error_class = self.exceptions.from_code(error_code)
--> 719             raise error_class(parsed_response, operation_name)
    720         else:
    721             return parsed_response

InvalidS3ObjectException: An error occurred (InvalidS3ObjectException) when calling the DetectDocumentText operation: Unable to get object metadata from S3. Check object key, region and/or access permissions.
asked a year ago547 views
1 Answer
1

I think the problem is that you're rewriting the filename (object name) to have _s in place of spaces. When you do this, the name you're passing to detect_document_text() is no longer the name of the object in the bucket, which is why it can't find it.

If you remove this line, it should work:

filename = '_'.join(file.split(' '))
profile pictureAWS
EXPERT
James_S
answered a year ago

You are not logged in. Log in to post an answer.

A good answer clearly answers the question and provides constructive feedback and encourages professional growth in the question asker.

Guidelines for Answering Questions