[#179] Return math.nan from core.model_metrics

I think this is nice because it disentangles the core library from numpy. But it does mean that we have to explicitly convert NaNs to numpy.nan in model exploration. So it's a bit messy.
ipums · Dec 11, 2024 · ae59da3 · ae59da3
1 parent 75b4414
commit ae59da3
Show file tree

Hide file tree

Showing 2 changed files with 10 additions and 7 deletions.
diff --git a/hlink/linking/core/model_metrics.py b/hlink/linking/core/model_metrics.py
@@ -4,8 +4,6 @@
 #   https://github.com/ipums/hlink
 import math
 
-import numpy as np
-
 
 def f_measure(true_pos: int, false_pos: int, false_neg: int) -> float:
     return 2 * true_pos / (2 * true_pos + false_pos + false_neg)
@@ -40,7 +38,7 @@ def mcc(true_pos: int, true_neg: int, false_pos: int, false_neg: int) -> float:
 
 def precision(true_pos: int, false_pos: int) -> float:
     if (true_pos + false_pos) == 0:
-        precision = np.nan
+        precision = math.nan
     else:
         precision = true_pos / (true_pos + false_pos)
 
@@ -49,7 +47,7 @@ def precision(true_pos: int, false_pos: int) -> float:
 
 def recall(true_pos: int, false_neg: int) -> float:
     if (true_pos + false_neg) == 0:
-        recall = np.nan
+        recall = math.nan
     else:
         recall = true_pos / (true_pos + false_neg)
 

diff --git a/hlink/linking/model_exploration/link_step_train_test_models.py b/hlink/linking/model_exploration/link_step_train_test_models.py
@@ -658,9 +658,14 @@ def _capture_prediction_results(
             fn_count,
             tn_count,
         ) = _get_confusion_matrix(predictions, dep_var)
-        precision = metrics_core.precision(tp_count, fp_count)
-        recall = metrics_core.recall(tp_count, fn_count)
-        mcc = metrics_core.mcc(tp_count, tn_count, fp_count, fn_count)
+        precision_raw = metrics_core.precision(tp_count, fp_count)
+        recall_raw = metrics_core.recall(tp_count, fn_count)
+        mcc_raw = metrics_core.mcc(tp_count, tn_count, fp_count, fn_count)
+
+        # Convert Python's math.nan to np.nan for numpy/pandas processing
+        precision = precision_raw if not math.isnan(precision_raw) else np.nan
+        recall = recall_raw if not math.isnan(recall_raw) else np.nan
+        mcc = mcc_raw if not math.isnan(mcc_raw) else np.nan
 
         result = ThresholdTestResult(
             precision=precision,