Hello Friends,
I am working on a project that reads data from Dremio data lakehouse solution, I am trying to read the data from one of its schema. Glue does not natively come with the connector, so I had to build a custom jdbc.
See my code base
import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkConf, SparkContext
from pyspark.sql import SparkSession
from awsglue.context import GlueContext
from awsglue.job import Job
conf = SparkConf()
args = getResolvedOptions(sys.argv, ['JOB_NAME'])
conf.set("spark.sql.extensions", "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions")\
.set("spark.sql.catalog.glue_catalog", "org.apache.iceberg.spark.SparkCatalog")\
.set("spark.sql.catalog.glue_catalog.warehouse", "s3://dev-smt-data-cache/stageZone_iceberg/iceber_repo/smt-data/")\
.set("spark.sql.catalog.glue_catalog.catalog-impl", "org.apache.iceberg.aws.glue.GlueCatalog")\
.set("spark.sql.catalog.glue_catalog.io-impl","org.apache.iceberg.aws.s3.S3FileIO")\
.set("--datalake-formats","iceberg")
sc = SparkContext(conf=conf)
glueContext = GlueContext(sc)
# below spark session will have the above configuration
spark = glueContext.spark_session
job = Job(glueContext)
job.init(args['JOB_NAME'], args)
DynamicFrame = glueContext.create_dynamic_frame.from_options(
connection_type = "jdbc",
connection_options = {
"query":""" 'SELECT FileID FROM "mp2-appsrvspace".ESG.CTG."USG_2000_Transaction" LIMIT 10' """,
"inferSchema":True,
# "dbtable": """ "mp2-appsrvspace".ESG.CTG."USG_2000_Transaction" """,
"connectionName":"Dremio-Stage"}
#"transformation_ctx" = "DynamicFrame"
)
applyformat = ApplyMapping.apply(
frame =DynamicFrame,
mappings =
[("field1","string","field1","string")
#("field2","string","field2","string")
],
transformation_ctx = "applyformat"
)
dynamicFrame = DynamicFrame.toDF().createOrReplaceTempView("temp_table")
print(dynamicFrame.head(5))
I keep getting this error before , I have different approaches none working out
raise Py4JJavaError(
py4j.protocol.Py4JJavaError: An error occurred while calling o115.getDynamicFrame.
: java.lang.UnsupportedOperationException: empty.reduceLeft
I will appreciate some guide/hints me on how can I fix the problem?