BigQuery copy method can convert dict column to JSON string

austinweisgrau · austinweisgrau · commit c7b376b12573 · 2024-10-02T12:42:22.000-07:00
By default without this change, a dict column will cause the copy
method to fail.
diff --git a/parsons/google/google_bigquery.py b/parsons/google/google_bigquery.py
@@ -1,5 +1,6 @@
 import datetime
 import logging
+import json
 import pickle
 import random
 import uuid
@@ -745,6 +746,7 @@ def copy(
         allow_jagged_rows: bool = True,
         quote: Optional[str] = None,
         schema: Optional[List[dict]] = None,
+        convert_dict_columns_to_json: bool = True,
         **load_kwargs,
     ):
         """
@@ -774,6 +776,8 @@ def copy(
             template_table: str
                 Table name to be used as the load schema. Load operation wil use the same
                 columns and data types as the template table.
+            convert_dict_columns_to_json: bool
+                If set to True, will convert any dict columns (which cannot by default be successfully loaded to BigQuery to JSON strings)
             **load_kwargs: kwargs
                 Arguments to pass to the underlying load_table_from_uri call on the BigQuery
                 client.
@@ -796,6 +800,19 @@ def copy(
         else:
             csv_delimiter = ","
 
+        if convert_dict_columns_to_json:
+            # Convert dict columns to JSON strings
+            for field in tbl.get_columns_type_stats():
+                if "dict" in field["type"]:
+                    new_petl = tbl.table.addfield(
+                        field["name"] + "_replace", lambda row: json.dumps(row[field["name"]])
+                    )
+                    new_tbl = Table(new_petl)
+                    new_tbl.remove_column(field["name"])
+                    new_tbl.rename_column(field["name"] + "_replace", field["name"])
+                    new_tbl.materialize()
+                    tbl = new_tbl
+
         job_config = self._process_job_config(
             job_config=job_config,
             destination_table_name=table_name,