Skip to content

Commit

Permalink
[#179] Return math.nan from core.model_metrics
Browse files Browse the repository at this point in the history
I think this is nice because it disentangles the core library from
numpy. But it does mean that we have to explicitly convert NaNs to
numpy.nan in model exploration. So it's a bit messy.
  • Loading branch information
riley-harper committed Dec 11, 2024
1 parent 75b4414 commit ae59da3
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 7 deletions.
6 changes: 2 additions & 4 deletions hlink/linking/core/model_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
# https://github.com/ipums/hlink
import math

import numpy as np


def f_measure(true_pos: int, false_pos: int, false_neg: int) -> float:
return 2 * true_pos / (2 * true_pos + false_pos + false_neg)
Expand Down Expand Up @@ -40,7 +38,7 @@ def mcc(true_pos: int, true_neg: int, false_pos: int, false_neg: int) -> float:

def precision(true_pos: int, false_pos: int) -> float:
if (true_pos + false_pos) == 0:
precision = np.nan
precision = math.nan
else:
precision = true_pos / (true_pos + false_pos)

Expand All @@ -49,7 +47,7 @@ def precision(true_pos: int, false_pos: int) -> float:

def recall(true_pos: int, false_neg: int) -> float:
if (true_pos + false_neg) == 0:
recall = np.nan
recall = math.nan
else:
recall = true_pos / (true_pos + false_neg)

Expand Down
11 changes: 8 additions & 3 deletions hlink/linking/model_exploration/link_step_train_test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -658,9 +658,14 @@ def _capture_prediction_results(
fn_count,
tn_count,
) = _get_confusion_matrix(predictions, dep_var)
precision = metrics_core.precision(tp_count, fp_count)
recall = metrics_core.recall(tp_count, fn_count)
mcc = metrics_core.mcc(tp_count, tn_count, fp_count, fn_count)
precision_raw = metrics_core.precision(tp_count, fp_count)
recall_raw = metrics_core.recall(tp_count, fn_count)
mcc_raw = metrics_core.mcc(tp_count, tn_count, fp_count, fn_count)

# Convert Python's math.nan to np.nan for numpy/pandas processing
precision = precision_raw if not math.isnan(precision_raw) else np.nan
recall = recall_raw if not math.isnan(recall_raw) else np.nan
mcc = mcc_raw if not math.isnan(mcc_raw) else np.nan

result = ThresholdTestResult(
precision=precision,
Expand Down

0 comments on commit ae59da3

Please sign in to comment.