upload file to s3 and read file

0

I am trying to upload excel (.xlsx) file to s3 and read and extract data to save in DB In my local machine code is working fine when I deployed to lambda then this error is raised raise BadZipFile("Bad magic number for central directory")

API tool: fastapi language: python to read file openpyxl library is used

code:

def upload_file_to_s3(file: UploadFile, S3_BUCKET, S3_KEY):
    try:
        s3 = boto3.client('s3', aws_access_key_id=AWS_ACCESS_KEY, aws_secret_access_key=AWS_SECRET_KEY)

        # Create a BytesIO object from the file contents
        # file_contents = file.file.read()
        # file_like_object = BytesIO(file_contents)

        # Upload the file to S3 bucket
        response = s3.upload_fileobj(file.file, S3_BUCKET, S3_KEY)
        return response
    except NoCredentialsError:
        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail='No AWS credentials found')


@route.post("/uploadfile/", tags=["contact details"], dependencies=[Depends(check_active)])
def create_upload_file(file: UploadFile, promotionName:str = Form(...),promotionId:str = Form(...)):
    try:
        # Initialize Boto3 S3 client
        upload_file_to_s3(file=file, S3_BUCKET="catalog-naveen", S3_KEY=file.filename)
        key = file.filename
        # Load the Excel file from S3
        s3 = boto3.client('s3', aws_access_key_id=AWS_ACCESS_KEY, aws_secret_access_key=AWS_SECRET_KEY)
        obj = s3.get_object(Bucket="catalog-naveen", Key=key)
        excel_data = obj['Body'].read()

        # Create a BytesIO object from the Excel data
        # file_like_object = BytesIO(excel_data)
        # print("######",file_like_object)

        wb = load_workbook(filename=BytesIO(excel_data), read_only=True, data_only=True)
        # Get the first worksheet
        sheet = wb.active
        rows = []
        # Iterate over each row in the worksheet
        for row in sheet.iter_rows(values_only=True):
            rows.append(row)
        batch_size = len(rows)+1
        # print("#############",len(rows)+1)
        for i in range(1, len(rows), batch_size):
            batch = rows[i:i+batch_size]
            # guest = contactDetailsModel( guestName=i[1], guestNumber=i[2], promotionName=event)
            guest = [contactDetailsModel(contactName=row[1], contactNumber=row[2], promotionName=promotionName, promotionId=promotionId) for row in batch]
            db.bulk_save_objects(guest)
            db.commit()

            # print("#######", i[1])
        # Return the extracted rows
        return {"created"}
    except Exception as e:
        db.rollback()
        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail= e)

complete error from CloudWatch

Traceback (most recent call last):
  File "/var/task/routes/event.py", line 71, in create_upload_file
    wb = load_workbook(filename=file_like_object, read_only=True, data_only=True)
  File "/var/task/openpyxl/reader/excel.py", line 344, in load_workbook
    reader = ExcelReader(filename, read_only, keep_vba,
  File "/var/task/openpyxl/reader/excel.py", line 123, in __init__
    self.archive = _validate_archive(fn)
  File "/var/task/openpyxl/reader/excel.py", line 95, in _validate_archive
    archive = ZipFile(filename, 'r')
  File "/var/lang/lib/python3.10/zipfile.py", line 1269, in __init__
    self._RealGetContents()
  File "/var/lang/lib/python3.10/zipfile.py", line 1366, in _RealGetContents
    raise BadZipFile("Bad magic number for central directory")
zipfile.BadZipFile: Bad magic number for central directory

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/var/task/starlette/middleware/exceptions.py", line 68, in __call__
    await self.app(scope, receive, sender)
  File "/var/task/fastapi/middleware/asyncexitstack.py", line 20, in __call__
    raise e
  File "/var/task/fastapi/middleware/asyncexitstack.py", line 17, in __call__
    await self.app(scope, receive, send)
  File "/var/task/starlette/routing.py", line 718, in __call__
    await route.handle(scope, receive, send)
  File "/var/task/starlette/routing.py", line 276, in handle
    await self.app(scope, receive, send)
  File "/var/task/starlette/routing.py", line 66, in app
    response = await func(request)
  File "/var/task/fastapi/routing.py", line 241, in app
    raw_response = await run_endpoint_function(
  File "/var/task/fastapi/routing.py", line 169, in run_endpoint_function
    return await run_in_threadpool(dependant.call, **values)
  File "/var/task/starlette/concurrency.py", line 41, in run_in_threadpool
    return await anyio.to_thread.run_sync(func, *args)
  File "/var/task/anyio/to_thread.py", line 33, in run_sync
    return await get_asynclib().run_sync_in_worker_thread(
  File "/var/task/anyio/_backends/_asyncio.py", line 877, in run_sync_in_worker_thread
    return await future
  File "/var/task/anyio/_backends/_asyncio.py", line 807, in run
    result = context.run(func, *args)
  File "/var/task/routes/event.py", line 92, in create_upload_file
    raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail= e)
fastapi.exceptions.HTTPException

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/var/task/mangum/protocols/http.py", line 58, in run
    await app(self.scope, self.receive, self.send)
  File "/var/task/fastapi/applications.py", line 290, in __call__
    await super().__call__(scope, receive, send)
  File "/var/task/starlette/applications.py", line 122, in __call__
    await self.middleware_stack(scope, receive, send)
  File "/var/task/starlette/middleware/errors.py", line 184, in __call__
    raise exc
  File "/var/task/starlette/middleware/errors.py", line 162, in __call__
    await self.app(scope, receive, _send)
  File "/var/task/starlette/middleware/cors.py", line 83, in __call__
    await self.app(scope, receive, send)
  File "/var/task/starlette/middleware/exceptions.py", line 88, in __call__
    response = await handler(request, exc)
  File "/var/task/fastapi/exception_handlers.py", line 15, in http_exception_handler
    return JSONResponse(
  File "/var/task/starlette/responses.py", line 196, in __init__
    super().__init__(content, status_code, headers, media_type, background)
  File "/var/task/starlette/responses.py", line 55, in __init__
    self.body = self.render(content)
  File "/var/task/starlette/responses.py", line 199, in render
    return json.dumps(
  File "/var/lang/lib/python3.10/json/__init__.py", line 238, in dumps
    **kw).encode(obj)
  File "/var/lang/lib/python3.10/json/encoder.py", line 199, in encode
    chunks = self.iterencode(o, _one_shot=True)
  File "/var/lang/lib/python3.10/json/encoder.py", line 257, in iterencode
    return _iterencode(o, 0)
  File "/var/lang/lib/python3.10/json/encoder.py", line 179, in default
    raise TypeError(f'Object of type {o.__class__.__name__} '
TypeError: Object of type BadZipFile is not JSON serializable
  • It would help to see the code here; the error message is not entirely helpful - it looks like you're passing an object (probably the Excel file) into something which is expecting JSON.

  • I have updated the question with code

  • I have a same issue. Have you solved?

3 Antworten
0

I know the instructions in the re:Post article you've linked to talk about building the packages for alternate architectures; but as a just in case: Have you tried creating your Lambda ZIP file on an Amazon Linux instance that matches the architecture of the Lambda function (x64/arm)? Checking before I spend some time to try and recreate the issue that you're having.

profile pictureAWS
EXPERTE
beantwortet vor einem Jahr
  • Hi, I have forget to mention that I have different APIs in lambda and those are working fine, only issue with upload API

  • Ok - I'm not sure (as in confused) as to what problem you're trying to troubleshoot here.

  • when I am trying to upload a file with https API getting internal server error there error in reading xlsx file from s3

  • So are you getting the error when uploading (writing - PutObject) or when downloading (reading - GetObject)?

  • while reading

0

Hi, is the code of the Lambda deployed via zip file (given the error message) ? If yes, I would check this zip file: it may be corrupted and prevent proper deployment of the lambda. Best, Didier

profile pictureAWS
EXPERTE
beantwortet vor einem Jahr
0

What might be happening is that you're reading the file originally as an ASCII (i.e. non-binary) file and that might be messing with the conversion during the upload; which means that the file is corrupted and therefore the download fails.

In the original read (which is pseudocode here), ensure that you are doing a binary read; something like file = open('filename.xlsx', 'rb')

profile pictureAWS
EXPERTE
beantwortet vor einem Monat

Du bist nicht angemeldet. Anmelden um eine Antwort zu veröffentlichen.

Eine gute Antwort beantwortet die Frage klar, gibt konstruktives Feedback und fördert die berufliche Weiterentwicklung des Fragenstellers.

Richtlinien für die Beantwortung von Fragen