I am trying to upload excel (.xlsx) file to s3 and read and extract data to save in DB
In my local machine code is working fine when I deployed to lambda then this error is raised raise BadZipFile("Bad magic number for central directory")
API tool: fastapi
language: python
to read file openpyxl library is used
code:
def upload_file_to_s3(file: UploadFile, S3_BUCKET, S3_KEY):
try:
s3 = boto3.client('s3', aws_access_key_id=AWS_ACCESS_KEY, aws_secret_access_key=AWS_SECRET_KEY)
# Create a BytesIO object from the file contents
# file_contents = file.file.read()
# file_like_object = BytesIO(file_contents)
# Upload the file to S3 bucket
response = s3.upload_fileobj(file.file, S3_BUCKET, S3_KEY)
return response
except NoCredentialsError:
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail='No AWS credentials found')
@route.post("/uploadfile/", tags=["contact details"], dependencies=[Depends(check_active)])
def create_upload_file(file: UploadFile, promotionName:str = Form(...),promotionId:str = Form(...)):
try:
# Initialize Boto3 S3 client
upload_file_to_s3(file=file, S3_BUCKET="catalog-naveen", S3_KEY=file.filename)
key = file.filename
# Load the Excel file from S3
s3 = boto3.client('s3', aws_access_key_id=AWS_ACCESS_KEY, aws_secret_access_key=AWS_SECRET_KEY)
obj = s3.get_object(Bucket="catalog-naveen", Key=key)
excel_data = obj['Body'].read()
# Create a BytesIO object from the Excel data
# file_like_object = BytesIO(excel_data)
# print("######",file_like_object)
wb = load_workbook(filename=BytesIO(excel_data), read_only=True, data_only=True)
# Get the first worksheet
sheet = wb.active
rows = []
# Iterate over each row in the worksheet
for row in sheet.iter_rows(values_only=True):
rows.append(row)
batch_size = len(rows)+1
# print("#############",len(rows)+1)
for i in range(1, len(rows), batch_size):
batch = rows[i:i+batch_size]
# guest = contactDetailsModel( guestName=i[1], guestNumber=i[2], promotionName=event)
guest = [contactDetailsModel(contactName=row[1], contactNumber=row[2], promotionName=promotionName, promotionId=promotionId) for row in batch]
db.bulk_save_objects(guest)
db.commit()
# print("#######", i[1])
# Return the extracted rows
return {"created"}
except Exception as e:
db.rollback()
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail= e)
complete error from CloudWatch
Traceback (most recent call last):
File "/var/task/routes/event.py", line 71, in create_upload_file
wb = load_workbook(filename=file_like_object, read_only=True, data_only=True)
File "/var/task/openpyxl/reader/excel.py", line 344, in load_workbook
reader = ExcelReader(filename, read_only, keep_vba,
File "/var/task/openpyxl/reader/excel.py", line 123, in __init__
self.archive = _validate_archive(fn)
File "/var/task/openpyxl/reader/excel.py", line 95, in _validate_archive
archive = ZipFile(filename, 'r')
File "/var/lang/lib/python3.10/zipfile.py", line 1269, in __init__
self._RealGetContents()
File "/var/lang/lib/python3.10/zipfile.py", line 1366, in _RealGetContents
raise BadZipFile("Bad magic number for central directory")
zipfile.BadZipFile: Bad magic number for central directory
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/var/task/starlette/middleware/exceptions.py", line 68, in __call__
await self.app(scope, receive, sender)
File "/var/task/fastapi/middleware/asyncexitstack.py", line 20, in __call__
raise e
File "/var/task/fastapi/middleware/asyncexitstack.py", line 17, in __call__
await self.app(scope, receive, send)
File "/var/task/starlette/routing.py", line 718, in __call__
await route.handle(scope, receive, send)
File "/var/task/starlette/routing.py", line 276, in handle
await self.app(scope, receive, send)
File "/var/task/starlette/routing.py", line 66, in app
response = await func(request)
File "/var/task/fastapi/routing.py", line 241, in app
raw_response = await run_endpoint_function(
File "/var/task/fastapi/routing.py", line 169, in run_endpoint_function
return await run_in_threadpool(dependant.call, **values)
File "/var/task/starlette/concurrency.py", line 41, in run_in_threadpool
return await anyio.to_thread.run_sync(func, *args)
File "/var/task/anyio/to_thread.py", line 33, in run_sync
return await get_asynclib().run_sync_in_worker_thread(
File "/var/task/anyio/_backends/_asyncio.py", line 877, in run_sync_in_worker_thread
return await future
File "/var/task/anyio/_backends/_asyncio.py", line 807, in run
result = context.run(func, *args)
File "/var/task/routes/event.py", line 92, in create_upload_file
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail= e)
fastapi.exceptions.HTTPException
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/var/task/mangum/protocols/http.py", line 58, in run
await app(self.scope, self.receive, self.send)
File "/var/task/fastapi/applications.py", line 290, in __call__
await super().__call__(scope, receive, send)
File "/var/task/starlette/applications.py", line 122, in __call__
await self.middleware_stack(scope, receive, send)
File "/var/task/starlette/middleware/errors.py", line 184, in __call__
raise exc
File "/var/task/starlette/middleware/errors.py", line 162, in __call__
await self.app(scope, receive, _send)
File "/var/task/starlette/middleware/cors.py", line 83, in __call__
await self.app(scope, receive, send)
File "/var/task/starlette/middleware/exceptions.py", line 88, in __call__
response = await handler(request, exc)
File "/var/task/fastapi/exception_handlers.py", line 15, in http_exception_handler
return JSONResponse(
File "/var/task/starlette/responses.py", line 196, in __init__
super().__init__(content, status_code, headers, media_type, background)
File "/var/task/starlette/responses.py", line 55, in __init__
self.body = self.render(content)
File "/var/task/starlette/responses.py", line 199, in render
return json.dumps(
File "/var/lang/lib/python3.10/json/__init__.py", line 238, in dumps
**kw).encode(obj)
File "/var/lang/lib/python3.10/json/encoder.py", line 199, in encode
chunks = self.iterencode(o, _one_shot=True)
File "/var/lang/lib/python3.10/json/encoder.py", line 257, in iterencode
return _iterencode(o, 0)
File "/var/lang/lib/python3.10/json/encoder.py", line 179, in default
raise TypeError(f'Object of type {o.__class__.__name__} '
TypeError: Object of type BadZipFile is not JSON serializable
It would help to see the code here; the error message is not entirely helpful - it looks like you're passing an object (probably the Excel file) into something which is expecting JSON.
I have updated the question with code