[#21] Remove a skipped matching secondary_threshold test

Reading the comment at the top of this test, I went looking for something that directly replaced the "secondary_threshold" matching attribute. I couldn't find anything that looked closely related. So let's just remove this test.
ipums · Jun 18, 2024 · 1b69d54 · 1b69d54
1 parent b87ed91
commit 1b69d54
Showing 1 changed file with 0 additions and 68 deletions.
diff --git a/hlink/tests/matching_scoring_test.py b/hlink/tests/matching_scoring_test.py
@@ -10,74 +10,6 @@
 from hlink.linking.matching.link_step_score import LinkStepScore
 
 
-@pytest.mark.skip(
-    reason="We still want to test that whatever 'secondary_threshold' became is being applied correctly, but we need to refactor this test to account for the fact that this was totally renamed and is now being carried out in a different step (step 3 doesn't exist anymore)."
-)
-def test_step_3_uniq_and_secondary_threshold(spark, matching_conf, matching):
-    """Test a secondary threshold with uniqueness"""
-    matching_conf["comparison_features"] = [
-        {
-            "alias": "namefrst_jw",
-            "column_name": "namefrst",
-            "comparison_type": "jaro_winkler",
-        },
-        {
-            "alias": "namelast_jw",
-            "column_name": "namelast",
-            "comparison_type": "jaro_winkler",
-        },
-    ]
-
-    matching_conf["comparisons"] = {
-        "comp_a": {
-            "feature_name": "namefrst_jw",
-            "threshold": 0.8,
-            "comparison_type": "threshold",
-        },
-        "comp_b": {
-            "feature_name": "namelast_jw",
-            "comparison_type": "threshold",
-            "threshold": 0.8,
-        },
-        "operator": "AND",
-    }
-
-    matching_conf["secondary_threshold"] = {
-        "threshold_a": {
-            "feature_name": "namefrst_jw",
-            "comparison_type": "threshold",
-            "threshold": 0.9,
-        },
-        "threshold_b": {
-            "feature_name": "namelast_jw",
-            "comparison_type": "threshold",
-            "threshold": 0.9,
-        },
-        "unique_true": {"id_a": "id_a", "id_b": "id_b"},
-        "operator": "AND",
-        "secondary": True,
-    }
-
-    matching.step_0_explode()
-    matching.step_1_match()
-    hlink.linking.matching._step_2_score.__create_features(matching, matching_conf)
-
-    # Create pandas DFs of the step_2 potential matches table
-    potential_matches_df = spark.table("potential_matches_prepped").toPandas()
-
-    #    matching.step_3_secondary_threshold()
-    # unique_matches_df = spark.table("potential_matches").toPandas()
-    unique_high_matches_df = spark.table("potential_matches_prepped").toPandas()
-
-    assert len(potential_matches_df.id_a) == 5
-    # assert (len(unique_matches_df.id_a) == 1)
-    # assert (unique_matches_df.query("id_a == 10 and id_b == 10")["namelast_jw"].iloc[0] > 0.8)
-    # assert (unique_matches_df.query("id_a == 10 and id_b == 10")["namelast_jw"].iloc[0] < 0.9)
-    # assert (unique_matches_df.query("id_a == 10 and id_b == 10")["namefrst_jw"].iloc[0] > 0.8)
-    # assert (unique_matches_df.query("id_a == 10 and id_b == 10")["namefrst_jw"].iloc[0] > 0.9)
-    assert unique_high_matches_df.empty
-
-
 def test_step_2_skip_on_no_conf(spark, matching_conf, matching, capsys):
     """Test matching step 2 doesn't run if no training config"""