From d2430e9cbde08786d37da10429dca989e2dcfa37 Mon Sep 17 00:00:00 2001
From: rileyh <rileyh@umn.edu>
Date: Wed, 5 Jun 2024 21:28:08 +0000
Subject: [PATCH] [#137] Add a test for the potential_matches.sql template
 itself

---
 .../tests/matching_potential_matches_test.py  | 33 +++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/hlink/tests/matching_potential_matches_test.py b/hlink/tests/matching_potential_matches_test.py
index 9983f0a..4780620 100755
--- a/hlink/tests/matching_potential_matches_test.py
+++ b/hlink/tests/matching_potential_matches_test.py
@@ -3,6 +3,7 @@
 # in this project's top-level directory, and also on-line at:
 #   https://github.com/ipums/hlink
 
+from jinja2 import Environment, PackageLoader
 import pytest
 
 
@@ -87,3 +88,35 @@ def test_step_4_aggregate_features(
         )["exact_all_mult2"].iloc[0]
         == 9
     )
+
+
+def test_potential_matches_sql_template() -> None:
+    loader = PackageLoader("hlink.linking.matching")
+    jinja_env = Environment(loader=loader)
+    template = jinja_env.get_template("potential_matches.sql")
+    context = {
+        "dataset_columns": ["AGE", "SEX"],
+        "feature_columns": [],
+        "blocking_columns": ["AGE_3", "SEX"],
+    }
+    query = template.render(context).strip()
+    query_lines = query.splitlines()
+    query_lines_clean = [line.strip() for line in query_lines]
+
+    assert query_lines_clean == [
+        "SELECT DISTINCT",
+        "",
+        "a.AGE as AGE_a",
+        ",b.AGE as AGE_b",
+        "",
+        ",a.SEX as SEX_a",
+        ",b.SEX as SEX_b",
+        "",
+        "",
+        "FROM exploded_df_a a",
+        "JOIN exploded_df_b b ON",
+        "",
+        "a.AGE_3 = b.AGE_3 AND",
+        "",
+        "a.SEX = b.SEX",
+    ]