Skip to content

Commit

Permalink
[#137] Support the new blocking_columns format in potential_matches.sql
Browse files Browse the repository at this point in the history
  • Loading branch information
riley-harper committed Jun 5, 2024
1 parent 83f92c4 commit f637500
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 3 deletions.
2 changes: 1 addition & 1 deletion hlink/linking/matching/link_step_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def _run(self):
config["id_column"],
)

t_ctx["blocking_columns"] = [bc["column_name"] for bc in blocking]
t_ctx["blocking_columns"] = [[bc["column_name"]] for bc in blocking]

blocking_exploded_columns = [
bc["column_name"] for bc in blocking if "explode" in bc and bc["explode"]
Expand Down
4 changes: 2 additions & 2 deletions hlink/linking/matching/templates/potential_matches.sql
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ SELECT DISTINCT
{% endif %}
FROM exploded_df_a a
JOIN exploded_df_b b ON
{% for col in blocking_columns %}
a.{{ col }} = b.{{ col }} {{ "AND" if not loop.last }}
{% for or_group in blocking_columns %}
({% for col in or_group %}a.{{ col }} = b.{{ col }}{{ " OR " if not loop.last }}{% endfor %}) {{ "AND" if not loop.last }}
{% endfor %}
{% if distance_table %}
{% for d in distance_table %}
Expand Down

0 comments on commit f637500

Please sign in to comment.