Hi, I am a security architect not a programmer so I am the blind squirrel looking for a nut in this situation but could use some help. We have a glue job failing and the error bounces around from Py4JJavaError: An error occurred while calling o117.pyWriteDynamicFrame. java.lang.reflect.InvocationTargetException and JavaError: An error occurred while calling o116.pyWriteDynamicFrame. the full error I believe would be the below... Any help would be awesome and greatly appreciated
"Event": "GlueETLJobExceptionEvent",
"Timestamp": 1666211480954,
"Failure Reason": "Traceback (most recent call last):\n File \"/tmp/governed_tables.py\", line 68, in <module>\n sink.writeFrame(dy_df)\n File \"/opt/amazon/lib/python3.6/site-packages/awsglue/data_sink.py\", line 32, in writeFrame\n
return DynamicFrame(self._jsink.pyWriteDynamicFrame(dynamic_frame._jdf, callsite(), info), dynamic_frame.glue_ctx, dynamic_frame.name + \"_errors\")\n File \"/opt/amazon/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py\", line 1305, in __call__\n answer, self.gateway_client, self.target_id, self.name)\n File \"/opt/amazon/spark/python/lib/pyspark.zip/pyspark/sql/utils.py\", line 111, in deco\n
return f(*a, **kw)\n File \"/opt/amazon/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py\", line 328, in get_return_value\n format(target_id, \".\", name), value)\npy4j.protocol.Py4JJavaError: An error occurred while calling o117.pyWriteDynamicFrame.\n: java.lang.reflect.InvocationTargetException\n\tat sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)\n\tat sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)\n\tat sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)\n\tat
java.lang.reflect.Constructor.newInstance(Constructor.java:423)\n\tat com.amazonaws.services.glue.remote.MichiganAWSCredentialProviderProxy$.get(MichiganAWSCredentialProviderProxy.scala:14)\n\tat com.amazonaws.services.glue.util.LakeformationClientWrapper.$anonfun$lakeformationUpdateTableObjectsInternal$1(LakeformationGovernedWrapper.scala:126)\n\tat scala.collection.immutable.List.map(List.scala:282)\n\tat com.amazonaws.services.glue.util.LakeformationClientWrapper.lakeformationUpdateTableObjectsInternal(LakeformationGovernedWrapper.scala:115)\n\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native
.Method)\n\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\n\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.lang.reflect.Method.invoke(Method.java:498)\n\tat com.amazonaws.services.glue.GlueUtility$.callLakeformationMethod(GlueUtility.scala:44)\n\tat com.amazonaws.services.glue.sinks.HadoopDataSink.lakeformationUpdateTableObjects(HadoopDataSink.scala:344)\n\tat com.amazonaws.services.glue.sinks.HadoopDataSink.$anonfun$writeDynamicFrame$2(HadoopDataSink.scala:313)\n\tat com.amazonaws.services.glue.util.FileSchemeWrapper.$anonfun$executeWithQualifiedScheme$1(FileSchemeWrapper.scala:90)\n\tat
com.amazonaws.services.glue.util.FileSchemeWrapper.executeWith(FileSchemeWrapper.scala:83)\n\tat com.amazonaws.services.glue.util.FileSchemeWrapper.executeWithQualifiedScheme(FileSchemeWrapper.scala:90)\n\tat com.amazonaws.services.glue.sinks.HadoopDataSink.$anonfun$writeDynamicFrame$1(HadoopDataSink.scala:158)\n\tat org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:253)\n\tat com.amazonaws.services.glue.sinks.HadoopDataSink.writeDynamicFrame(HadoopDataSink.scala:152)\n\tat com.amazonaws.services.glue.DataSink.pyWriteDynamicFrame(DataSink.scala:72)\n\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\n\tat
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\n\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.lang.reflect.Method.invoke(Method.java:498)\n\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\n\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\n\tat py4j.Gateway.invoke(Gateway.java:282)\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\n\tat py4j.GatewayConnection.run(GatewayConnection.java:238)\n\tat java.lang.Thread.run(Thread.java:750)\nCaused by:
java.lang.RuntimeException: class com.amazonaws.services.gluejobexecutor.model.EntityNotFoundException:Database null not found. (Service: AWSLakeFormation; Status Code: 400; Error Code: EntityNotFoundException; Request ID: 37ecd5ab-d47a-4c25-ad7a-80a070ed1558; Proxy: null) (Service: AWSGlueJobExecutor; Status Code: 400; Error Code: EntityNotFoundException; Request ID: b3862a0e-3bb4-440e-9801-452716b82337; Proxy: null)\n\tat com.amazonaws.services.glue.remote.LakeformationCredentialsProvider.refresh(LakeformationCredentialsProvider.scala:50)\n\tat com.amazonaws.services.glue.remote.LakeformationCredentialsProvider.<init>(LakeformationCredentialsProvider.scala:77)\n\t...
33 more\n",
"Stack Trace": [
{
"Declaring Class": "get_return_value",
"Method Name": "format(target_id, \".\", name), value)",
"File Name": "/opt/amazon/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py",
"Line Number": 328
},
{
"Declaring Class": "deco",
"Method Name": "return f(*a, **kw)",
"File Name": "/opt/amazon/spark/python/lib/pyspark.zip/pyspark/sql/utils.py",
"Line Number": 111
},
{
"Declaring Class": "__call__",
"Method Name": "answer, self.gateway_client, self.target_id, self.name)",
"File Name": "/opt/amazon/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py",
"Line Number": 1305
},
{
"Declaring Class": "writeFrame",
"Method Name": "return DynamicFrame(self._jsink.pyWriteDynamicFrame(dynamic_frame._jdf, callsite(), info), dynamic_frame.glue_ctx, dynamic_frame.name + \"_errors\")",
"File Name": "/opt/amazon/lib/python3.6/site-packages/awsglue/data_sink.py",
"Line Number": 32
},
{
"Declaring Class": "<module>",
"Method Name": "sink.writeFrame(dy_df)",
"File Name": "/tmp/governed_tables.py",
"Line Number": 68
}
],
"Last Executed Line number": 68,
"script": "governed_tables.py"
I'm getting the exact error right now. My Governed Table ETL was working fine but then suddenly started failing. Have you found a resolution for this? Thanks!
Did you manage to solve your issue? I'm having the same and I've given all the permissions I can think of to the database and still get database null using: database_name = "poc_lakeformation" table_name = "example_json" txId1 = glueContext.start_transaction(read_only=False) sink = glueContext.getSink(connection_type="s3", path = f"s3://my-bucket/data/{table_name}", enableUpdateCatalog=True,transactionId = txId1) sink.setFormat("glueparquet") sink.setCatalogInfo( catalogDatabase=database_name, catalogTableName=table_name) try: sink.writeFrame(dynF) glueContext=commit_transaction(txId1) except: glueContext.cancel_transaction(txId1) raise
The s3 data location is registered and the role that executes the glue job has permissions to the database
Anyone got luck on how to resolve this?