Skip to content

Commit 2c91104

Browse files
BigQuery copy method can convert dict column to JSON string
By default without this change, a dict column will cause the copy method to fail.
1 parent f8e29c0 commit 2c91104

File tree

1 file changed

+17
-0
lines changed

1 file changed

+17
-0
lines changed

parsons/google/google_bigquery.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import datetime
22
import logging
3+
import json
34
import pickle
45
import random
56
import uuid
@@ -756,6 +757,7 @@ def copy(
756757
allow_jagged_rows: bool = True,
757758
quote: Optional[str] = None,
758759
schema: Optional[List[dict]] = None,
760+
convert_dict_columns_to_json: bool = True,
759761
**load_kwargs,
760762
):
761763
"""
@@ -786,6 +788,8 @@ def copy(
786788
template_table: str
787789
Table name to be used as the load schema. Load operation wil use the same
788790
columns and data types as the template table.
791+
convert_dict_columns_to_json: bool
792+
If set to True, will convert any dict columns (which cannot by default be successfully loaded to BigQuery to JSON strings)
789793
**load_kwargs: kwargs
790794
Arguments to pass to the underlying load_table_from_uri call on the BigQuery
791795
client.
@@ -812,6 +816,19 @@ def copy(
812816
else:
813817
csv_delimiter = ","
814818

819+
if convert_dict_columns_to_json:
820+
# Convert dict columns to JSON strings
821+
for field in tbl.get_columns_type_stats():
822+
if "dict" in field["type"]:
823+
new_petl = tbl.table.addfield(
824+
field["name"] + "_replace", lambda row: json.dumps(row[field["name"]])
825+
)
826+
new_tbl = Table(new_petl)
827+
new_tbl.remove_column(field["name"])
828+
new_tbl.rename_column(field["name"] + "_replace", field["name"])
829+
new_tbl.materialize()
830+
tbl = new_tbl
831+
815832
job_config = self._process_job_config(
816833
job_config=job_config,
817834
destination_table_name=table_name,

0 commit comments

Comments
 (0)