diff --git a/hlink/linking/core/model_metrics.py b/hlink/linking/core/model_metrics.py index d75a9b3..d23fa00 100644 --- a/hlink/linking/core/model_metrics.py +++ b/hlink/linking/core/model_metrics.py @@ -2,10 +2,32 @@ # For copyright and licensing information, see the NOTICE and LICENSE files # in this project's top-level directory, and also on-line at: # https://github.com/ipums/hlink +""" +Metrics for evaluating the performance of a machine learning model. These +metrics operate on the "confusion matrix", which contains the four counts of +true positives, true negatives, false positives, and false negatives. +Throughout this module, we use the abbreviations true_pos, true_neg, false_pos, +and false_neg for these confusion matrix values. + +All of these functions return math.nan in cases where they are not well-defined, +such as cases with division by zero. +""" + import math def f_measure(true_pos: int, false_pos: int, false_neg: int) -> float: + """ + Compute the F-measure, which is defined as the harmonic mean of precision + and recall: + + 2 * precision * recall / (precision + recall) + + Using the definitions of precision and recall, we can write this in terms of + the confusion matrix entries as + + 2 * true_pos / (2 * true_pos + false_pos + false_neg) + """ denominator = 2 * true_pos + false_pos + false_neg if denominator == 0: return math.nan @@ -14,44 +36,56 @@ def f_measure(true_pos: int, false_pos: int, false_neg: int) -> float: def mcc(true_pos: int, true_neg: int, false_pos: int, false_neg: int) -> float: """ - Given the counts of true positives (true_pos), true negatives (true_neg), false - positives (false_pos), and false negatives (false_neg) for a model run, compute the - Matthews Correlation Coefficient (MCC). - """ - if ( - math.sqrt( - (true_pos + false_pos) - * (true_pos + false_neg) - * (true_neg + false_pos) - * (true_neg + false_neg) - ) - ) != 0: - mcc = ((true_pos * true_neg) - (false_pos * false_neg)) / ( - math.sqrt( - (true_pos + false_pos) - * (true_pos + false_neg) - * (true_neg + false_pos) - * (true_neg + false_neg) - ) - ) - else: - mcc = math.nan - return mcc + Compute the Matthews Correlation Coefficient (MCC). This can be written as + numerator / denominator, where + + numerator = true_pos * true_neg - false_pos * false_neg + + and + + denominator = sqrt( + (true_pos + false_pos) * + (true_pos + false_neg) * + (true_neg + false_pos) * + (true_neg + false_neg) + ) + """ + denominator = math.sqrt( + (true_pos + false_pos) + * (true_pos + false_neg) + * (true_neg + false_pos) + * (true_neg + false_neg) + ) + if denominator == 0: + return math.nan + + numerator = true_pos * true_neg - false_pos * false_neg + return numerator / denominator def precision(true_pos: int, false_pos: int) -> float: - if (true_pos + false_pos) == 0: - precision = math.nan - else: - precision = true_pos / (true_pos + false_pos) + """ + Compute the precision, also known as the positive predictive value (PPV). + This can be written in terms of the entries of the confusion matrix as + + true_pos / (true_pos + false_pos) + """ + denominator = true_pos + false_pos + if denominator == 0: + return math.nan - return precision + return true_pos / denominator def recall(true_pos: int, false_neg: int) -> float: - if (true_pos + false_neg) == 0: - recall = math.nan - else: - recall = true_pos / (true_pos + false_neg) + """ + Compute the recall, which can be written in terms of the entries of the + confusion matrix as + + true_pos / (true_pos + false_neg) + """ + denominator = true_pos + false_neg + if denominator == 0: + return math.nan - return recall + return true_pos / denominator diff --git a/hlink/tests/core/model_metrics_test.py b/hlink/tests/core/model_metrics_test.py index 41b70b4..235ed75 100644 --- a/hlink/tests/core/model_metrics_test.py +++ b/hlink/tests/core/model_metrics_test.py @@ -72,6 +72,28 @@ def test_mcc_example() -> None: assert abs(mcc_score - 0.8111208) < 0.0001, "expected MCC to be near 0.8111208" +@given( + true_pos=NonNegativeInt, + true_neg=NonNegativeInt, + false_pos=NonNegativeInt, + false_neg=NonNegativeInt, +) +def test_mcc_is_between_negative_1_and_positive_1( + true_pos: int, true_neg: int, false_pos: int, false_neg: int +) -> None: + """ + Under "normal circumstances", where the denominator of the Matthews Correlation + Coefficient isn't 0, its range is the interval [-1, 1]. + """ + assume(true_pos + false_pos > 0) + assume(true_pos + false_neg > 0) + assume(true_neg + false_pos > 0) + assume(true_neg + false_neg > 0) + + mcc_score = mcc(true_pos, true_neg, false_pos, false_neg) + assert -1.0 <= mcc_score <= 1.0 + + @pytest.mark.parametrize( "true_pos,true_neg,false_pos,false_neg", [(0, 0, 0, 0), (0, 1, 0, 1), (0, 1, 1, 0), (1, 0, 0, 1), (1, 0, 1, 0)],