diff --git a/hlink/linking/core/model_metrics.py b/hlink/linking/core/model_metrics.py index 46533bb..769feee 100644 --- a/hlink/linking/core/model_metrics.py +++ b/hlink/linking/core/model_metrics.py @@ -4,8 +4,6 @@ # https://github.com/ipums/hlink import math -import numpy as np - def f_measure(true_pos: int, false_pos: int, false_neg: int) -> float: return 2 * true_pos / (2 * true_pos + false_pos + false_neg) @@ -40,7 +38,7 @@ def mcc(true_pos: int, true_neg: int, false_pos: int, false_neg: int) -> float: def precision(true_pos: int, false_pos: int) -> float: if (true_pos + false_pos) == 0: - precision = np.nan + precision = math.nan else: precision = true_pos / (true_pos + false_pos) @@ -49,7 +47,7 @@ def precision(true_pos: int, false_pos: int) -> float: def recall(true_pos: int, false_neg: int) -> float: if (true_pos + false_neg) == 0: - recall = np.nan + recall = math.nan else: recall = true_pos / (true_pos + false_neg) diff --git a/hlink/linking/model_exploration/link_step_train_test_models.py b/hlink/linking/model_exploration/link_step_train_test_models.py index d00b7c4..5418622 100644 --- a/hlink/linking/model_exploration/link_step_train_test_models.py +++ b/hlink/linking/model_exploration/link_step_train_test_models.py @@ -658,9 +658,14 @@ def _capture_prediction_results( fn_count, tn_count, ) = _get_confusion_matrix(predictions, dep_var) - precision = metrics_core.precision(tp_count, fp_count) - recall = metrics_core.recall(tp_count, fn_count) - mcc = metrics_core.mcc(tp_count, tn_count, fp_count, fn_count) + precision_raw = metrics_core.precision(tp_count, fp_count) + recall_raw = metrics_core.recall(tp_count, fn_count) + mcc_raw = metrics_core.mcc(tp_count, tn_count, fp_count, fn_count) + + # Convert Python's math.nan to np.nan for numpy/pandas processing + precision = precision_raw if not math.isnan(precision_raw) else np.nan + recall = recall_raw if not math.isnan(recall_raw) else np.nan + mcc = mcc_raw if not math.isnan(mcc_raw) else np.nan result = ThresholdTestResult( precision=precision,