From 71b9db54372d66c34010afe2ff8e8f448aa1003e Mon Sep 17 00:00:00 2001 From: rileyh Date: Tue, 13 Aug 2024 16:10:14 +0000 Subject: [PATCH] [#142] Sort the columns in exploded_df_a and exploded_df_b when selecting them out Previously we were selecting with a set, so the columns got all mixed up. Let's sort them so that they are easier to work with. The order of the columns should not affect the results. --- hlink/linking/matching/link_step_explode.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hlink/linking/matching/link_step_explode.py b/hlink/linking/matching/link_step_explode.py index 83bc6a2..a0d5e45 100644 --- a/hlink/linking/matching/link_step_explode.py +++ b/hlink/linking/matching/link_step_explode.py @@ -153,7 +153,7 @@ def _explode( # be a breaking change to remove this. We'd have to look into the # ramifications. if len(all_exploding_columns) > 0: - exploded_df = exploded_df.select(*all_column_names) + exploded_df = exploded_df.select(sorted(all_column_names)) return exploded_df