Forgive me as I am a student learning the processes.
I have a Lambda function and I've created all the necessary layers, configured the environment and buckets.
However, I haven't received a statusCode aside from 200 during the entirety of changes and now I see that the Lambda test isn't writing the 'cleaned' data to the s3 bucket (de2023-on-youtube-cleansed-useast1-dev).
Any clues on where to start looking?
LAMBDA FUNCTION
import awswrangler as wr
import pandas as pd
import urllib.parse
import os
# Temporary hard-coded AWS Settings; i.e. to be set as OS variable in Lambda
os_input_s3_cleansed_layer = os.environ['s3_cleansed_layer']
os_input_glue_catalog_db_name = os.environ['glue_catalog_db_name']
os_input_glue_catalog_table_name = os.environ['glue_catalog_table_name']
os_input_write_data_operation = os.environ['write_data_operation']
def lambda_handler(event, context):
# Get the object from the event and show its content type
bucket = event['Records'][0]['s3']['bucket']['name']
key = urllib.parse.unquote_plus(event['Records'][0]['s3']['object']['key'], encoding='utf-8')
try:
# Creating DF from content
df_raw = wr.s3.read_json('s3://{}/{}'.format(bucket, key))
# Extract required columns:
df_step_1 = pd.json_normalize(df_raw['items'])
# Write to S3
wr_response = wr.s3.to_parquet(
df=df_step_1,
path=os_input_s3_cleansed_layer,
dataset=True,
database=os_input_glue_catalog_db_name,
table=os_input_glue_catalog_table_name,
mode=os_input_write_data_operation
)
return wr_response
except Exception as e:
print(e)
print('Error getting object {} from bucket {}. Make sure they exist and your bucket is in the same region as this function.'.format(key, bucket))
raise e
ENVIRONMENT VARIABLES
glue_catalog_db_name = db_youtube_cleaned
glue_catalog_table_name = cleaned_statistics_reference_data
s3_cleansed_layer = s3://de2023-on-youtube-cleansed-useast1-dev/youtube
write_data_operation = append