From d2430e9cbde08786d37da10429dca989e2dcfa37 Mon Sep 17 00:00:00 2001 From: rileyh Date: Wed, 5 Jun 2024 21:28:08 +0000 Subject: [PATCH] [#137] Add a test for the potential_matches.sql template itself --- .../tests/matching_potential_matches_test.py | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/hlink/tests/matching_potential_matches_test.py b/hlink/tests/matching_potential_matches_test.py index 9983f0a..4780620 100755 --- a/hlink/tests/matching_potential_matches_test.py +++ b/hlink/tests/matching_potential_matches_test.py @@ -3,6 +3,7 @@ # in this project's top-level directory, and also on-line at: # https://github.com/ipums/hlink +from jinja2 import Environment, PackageLoader import pytest @@ -87,3 +88,35 @@ def test_step_4_aggregate_features( )["exact_all_mult2"].iloc[0] == 9 ) + + +def test_potential_matches_sql_template() -> None: + loader = PackageLoader("hlink.linking.matching") + jinja_env = Environment(loader=loader) + template = jinja_env.get_template("potential_matches.sql") + context = { + "dataset_columns": ["AGE", "SEX"], + "feature_columns": [], + "blocking_columns": ["AGE_3", "SEX"], + } + query = template.render(context).strip() + query_lines = query.splitlines() + query_lines_clean = [line.strip() for line in query_lines] + + assert query_lines_clean == [ + "SELECT DISTINCT", + "", + "a.AGE as AGE_a", + ",b.AGE as AGE_b", + "", + ",a.SEX as SEX_a", + ",b.SEX as SEX_b", + "", + "", + "FROM exploded_df_a a", + "JOIN exploded_df_b b ON", + "", + "a.AGE_3 = b.AGE_3 AND", + "", + "a.SEX = b.SEX", + ]