@@ -23,28 +23,67 @@ def bq_import_csv(table_id, gcs_path, table_schema, ckan_conf):
2323 job_config .source_format = bigquery .SourceFormat .CSV
2424 # overwrite a Table
2525 job_config .write_disposition = bigquery .WriteDisposition .WRITE_TRUNCATE
26- # set 'True' for schema autodetect but turning it off since we define schema in explicitly when publishing data using datapub
27- # job_config.autodetect = True
2826 load_job = client .load_table_from_uri (
2927 gcs_path , table_id , job_config = job_config
3028 )
3129
3230 load_job .result () # Waits for table load to complete.
3331 destination_table = client .get_table (table_id )
3432 except Exception as e :
35- job_config = bigquery .LoadJobConfig ()
36-
37- job_config .skip_leading_rows = 1
38- job_config .source_format = bigquery .SourceFormat .CSV
39- # overwrite a Table
40- job_config .write_disposition = bigquery .WriteDisposition .WRITE_TRUNCATE
41- # set 'True' for schema autodetect but turning it off since we define schema in explicitly when publishing data using datapub
42- # job_config.autodetect = True
43- load_job = client .load_table_from_uri (
44- gcs_path , table_id , job_config = job_config
33+ logging .info (e )
34+ # Use a list to build the string components efficiently.
35+ error_lines = []
36+ error_lines .append (
37+ "BigQuery Load Job Failed with a BadRequest."
4538 )
46- load_job .result () # Waits for table load to complete.
47- destination_table = client .get_table (table_id )
39+ error_lines .append (f"Original API message: { e } " )
40+
41+ # The key part: Iterate through the e.errors list and append to our list.
42+ if load_job .errors :
43+ error_lines .append ("\n --- Detailed Error Breakdown ---" )
44+ logging .info (load_job .errors )
45+ for i , error in enumerate (load_job .errors ):
46+ # Format each error dictionary into a readable line.
47+ line = (
48+ f"Error { i + 1 } : "
49+ f"Reason: { error .get ('reason' , 'N/A' )} , "
50+ f"Location: { error .get ('location' , 'N/A' )} , "
51+ f"Message: { error .get ('message' , 'N/A' )} "
52+ )
53+ error_lines .append (line )
54+ else :
55+ error_lines .append ("No detailed errors were provided in the exception." )
56+
57+ # Join the list of lines into a single string with newlines.
58+ error_report_string = "\n " .join (error_lines )
59+ logging .info (error_report_string )
60+ status_dict = {
61+ 'res_id' : ckan_conf .get ('resource_id' ),
62+ 'state' : 'failed' ,
63+ 'message' : error_report_string ,
64+ 'dag_run_id' : ckan_conf .get ('dag_run_id' )
65+ }
66+ aircan_status_update (ckan_conf .get ('site_url' ), ckan_conf .get ('api_key' ), status_dict )
67+ raise AirflowCKANException ('Data ingestion has failed.' , str (e ))
68+ #status_dict = {
69+ # 'res_id': ckan_conf.get('resource_id'),
70+ # 'state': 'progress',
71+ # 'message': 'Data ingestion using provided schema failed, trying to autodetect schema.',
72+ # 'dag_run_id': ckan_conf.get('dag_run_id')
73+ #}
74+ #aircan_status_update(ckan_conf.get('site_url'), ckan_conf.get('api_key'), status_dict)
75+ #job_config = bigquery.LoadJobConfig()
76+ #job_config.autodetect = True
77+
78+ #job_config.skip_leading_rows = 1
79+ #job_config.source_format = bigquery.SourceFormat.CSV
80+ ## overwrite a Table
81+ #job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE
82+ #load_job = client.load_table_from_uri(
83+ # gcs_path, table_id, job_config=job_config
84+ #)
85+ #load_job.result() # Waits for table load to complete.
86+ #destination_table = client.get_table(table_id)
4887 status_dict = {
4988 'res_id' : ckan_conf .get ('resource_id' ),
5089 'state' : 'progress' ,
0 commit comments