Skip to content

Commit c7b376b

Browse files
BigQuery copy method can convert dict column to JSON string
By default without this change, a dict column will cause the copy method to fail.
1 parent 710532a commit c7b376b

File tree

1 file changed

+17
-0
lines changed

1 file changed

+17
-0
lines changed

parsons/google/google_bigquery.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import datetime
22
import logging
3+
import json
34
import pickle
45
import random
56
import uuid
@@ -745,6 +746,7 @@ def copy(
745746
allow_jagged_rows: bool = True,
746747
quote: Optional[str] = None,
747748
schema: Optional[List[dict]] = None,
749+
convert_dict_columns_to_json: bool = True,
748750
**load_kwargs,
749751
):
750752
"""
@@ -774,6 +776,8 @@ def copy(
774776
template_table: str
775777
Table name to be used as the load schema. Load operation wil use the same
776778
columns and data types as the template table.
779+
convert_dict_columns_to_json: bool
780+
If set to True, will convert any dict columns (which cannot by default be successfully loaded to BigQuery to JSON strings)
777781
**load_kwargs: kwargs
778782
Arguments to pass to the underlying load_table_from_uri call on the BigQuery
779783
client.
@@ -796,6 +800,19 @@ def copy(
796800
else:
797801
csv_delimiter = ","
798802

803+
if convert_dict_columns_to_json:
804+
# Convert dict columns to JSON strings
805+
for field in tbl.get_columns_type_stats():
806+
if "dict" in field["type"]:
807+
new_petl = tbl.table.addfield(
808+
field["name"] + "_replace", lambda row: json.dumps(row[field["name"]])
809+
)
810+
new_tbl = Table(new_petl)
811+
new_tbl.remove_column(field["name"])
812+
new_tbl.rename_column(field["name"] + "_replace", field["name"])
813+
new_tbl.materialize()
814+
tbl = new_tbl
815+
799816
job_config = self._process_job_config(
800817
job_config=job_config,
801818
destination_table_name=table_name,

0 commit comments

Comments
 (0)