Skip to content

Commit

Permalink
Hotfix/file destination write (#108)
Browse files Browse the repository at this point in the history
* Simplify code in FileDestination.render_row() to improve readability.

* Change FileDestination write logic to compute and write each partition, instead of mapping writes over rows.

* Update CHANGELOG and VERSION in preparation for patch.
  • Loading branch information
jayckaiser authored Jun 26, 2024
1 parent f709b10 commit 9bb733a
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 10 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
### v0.3.4
<details>
<summary>Released 2024-06-26</summary>

* hotfix: Fix bug when writing out JSON in `FileDestination`

</details>


### v0.3.3
<details>
<summary>Released 2024-06-18</summary>
Expand Down
2 changes: 1 addition & 1 deletion earthmover/VERSION.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.3.3
0.3.4
21 changes: 12 additions & 9 deletions earthmover/nodes/destination.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,9 @@ def execute(self, **kwargs):
if self.header:
fp.write(self.header)

self.data.apply(lambda row: fp.write(row + '\n'), meta=pd.Series('string')).compute()
for partition in self.data.partitions:
fp.writelines(partition.compute())
partition = None # Remove partition from memory immediately after write.

if self.footer:
fp.write(self.footer)
Expand All @@ -113,16 +115,17 @@ def execute(self, **kwargs):
self.size = os.path.getsize(self.file)

def render_row(self, row: pd.Series):
row = row.to_dict()
types_to_cast = [bool, int, float]
keys_to_cast = list(filter(lambda x: type(row[x]) in types_to_cast, row.keys()))
# this line (a) converts the keys_to_cast to string, and also converts all Nones to empty string:
row = dict(map(lambda x: (x[0], str(x[1]) if x[0] in keys_to_cast else (x[1] if x[1] else "")), row.items()))
_data_tuple = row
_data_tuple["__row_data__"] = row
types_to_cast = (bool, int, float)

# this line converts the keys_to_cast to string, and also converts all Nones to empty string:
row_data = {
field: str(value) if isinstance(field, types_to_cast) else (value or "")
for field, value in row.to_dict().items()
}
row_data["__row_data__"] = row_data

try:
json_string = self.jinja_template.render(_data_tuple)
json_string = self.jinja_template.render(row_data) + "\n"

except Exception as err:
self.error_handler.throw(
Expand Down

0 comments on commit 9bb733a

Please sign in to comment.