Skip to content

Commit

Permalink
[#179] Add F-measure to the output thresholded metrics data frame
Browse files Browse the repository at this point in the history
I also renamed the existing columns to remove the "_test" part, since we aren't
computing "_train" versions of these metrics anymore.
  • Loading branch information
riley-harper committed Dec 12, 2024
1 parent d87c5de commit 74a7dd9
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 44 deletions.
61 changes: 23 additions & 38 deletions hlink/linking/model_exploration/link_step_train_test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ def _run(self) -> None:
# threshold matrix entries.
threshold_matrix_size = len(threshold_test_results[0])

thresholded_metrics_df = _create_thresholded_metrics_df()
thresholded_metrics_df = pd.DataFrame()
for i in range(threshold_matrix_size):
print(f"Aggregate threshold matrix entry {i}")
thresholded_metrics_df = _aggregate_per_threshold_results(
Expand All @@ -549,7 +549,7 @@ def _run(self) -> None:
thresholded_metrics_df
)
_print_thresholded_metrics_df(
thresholded_metrics_df.sort_values(by="mcc_test_mean", ascending=False)
thresholded_metrics_df.sort_values(by="mcc_mean", ascending=False)
)

self._save_training_results(thresholded_metrics_df, self.task.spark)
Expand Down Expand Up @@ -835,32 +835,34 @@ def _aggregate_per_threshold_results(
threshold_ratio = prediction_results[0].threshold_ratio

# Pull out columns to be aggregated
precision_test = [
r.precision for r in prediction_results if not math.isnan(r.precision)
]
recall_test = [r.recall for r in prediction_results if not math.isnan(r.recall)]
pr_auc_test = [r.pr_auc for r in prediction_results if not math.isnan(r.pr_auc)]
mcc_test = [r.mcc for r in prediction_results if not math.isnan(r.mcc)]

(precision_test_mean, precision_test_sd) = _compute_mean_and_stdev(precision_test)
(recall_test_mean, recall_test_sd) = _compute_mean_and_stdev(recall_test)
(pr_auc_test_mean, pr_auc_test_sd) = _compute_mean_and_stdev(pr_auc_test)
(mcc_test_mean, mcc_test_sd) = _compute_mean_and_stdev(mcc_test)
precision = [r.precision for r in prediction_results if not math.isnan(r.precision)]
recall = [r.recall for r in prediction_results if not math.isnan(r.recall)]
pr_auc = [r.pr_auc for r in prediction_results if not math.isnan(r.pr_auc)]
mcc = [r.mcc for r in prediction_results if not math.isnan(r.mcc)]
f_measure = [r.f_measure for r in prediction_results if not math.isnan(r.f_measure)]

(precision_mean, precision_sd) = _compute_mean_and_stdev(precision)
(recall_mean, recall_sd) = _compute_mean_and_stdev(recall)
(pr_auc_mean, pr_auc_sd) = _compute_mean_and_stdev(pr_auc)
(mcc_mean, mcc_sd) = _compute_mean_and_stdev(mcc)
(f_measure_mean, f_measure_sd) = _compute_mean_and_stdev(f_measure)

new_desc = pd.DataFrame(
{
"model": [best_models[0].model_type],
"parameters": [best_models[0].hyperparams],
"alpha_threshold": [alpha_threshold],
"threshold_ratio": [threshold_ratio],
"precision_test_mean": [precision_test_mean],
"precision_test_sd": [precision_test_sd],
"recall_test_mean": [recall_test_mean],
"recall_test_sd": [recall_test_sd],
"pr_auc_test_mean": [pr_auc_test_mean],
"pr_auc_test_sd": [pr_auc_test_sd],
"mcc_test_mean": [mcc_test_mean],
"mcc_test_sd": [mcc_test_sd],
"precision_mean": [precision_mean],
"precision_sd": [precision_sd],
"recall_mean": [recall_mean],
"recall_sd": [recall_sd],
"pr_auc_mean": [pr_auc_mean],
"pr_auc_sd": [pr_auc_sd],
"mcc_mean": [mcc_mean],
"mcc_sd": [mcc_sd],
"f_measure_mean": [f_measure_mean],
"f_measure_sd": [f_measure_sd],
},
)

Expand Down Expand Up @@ -905,23 +907,6 @@ def _load_thresholded_metrics_df_params(desc_df: pd.DataFrame) -> pd.DataFrame:
return desc_df


def _create_thresholded_metrics_df() -> pd.DataFrame:
return pd.DataFrame(
columns=[
"model",
"parameters",
"alpha_threshold",
"threshold_ratio",
"precision_test_mean",
"precision_test_sd",
"recall_test_mean",
"recall_test_sd",
"mcc_test_mean",
"mcc_test_sd",
]
)


def _custom_param_grid_builder(
model_parameters: list[dict[str, Any]]
) -> list[dict[str, Any]]:
Expand Down
11 changes: 5 additions & 6 deletions hlink/tests/model_exploration_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -695,7 +695,7 @@ def test_step_2_train_random_forest_spark(
tr = spark.table("model_eval_training_results").toPandas()
print(f"training results {tr}")
# assert tr.shape == (1, 18)
assert tr.query("model == 'random_forest'")["pr_auc_test_mean"].iloc[0] > 2.0 / 3.0
assert tr.query("model == 'random_forest'")["pr_auc_mean"].iloc[0] > 2.0 / 3.0
assert tr.query("model == 'random_forest'")["maxDepth"].iloc[0] == 3

# TODO probably remove these since we're not planning to test suspicious data anymore.
Expand Down Expand Up @@ -731,10 +731,10 @@ def test_step_2_train_logistic_regression_spark(
tr = spark.table("model_eval_training_results").toPandas()
# assert tr.count == 3

assert tr.shape == (1, 11)
assert tr.shape == (1, 13)
# This is now 0.83333333333.... I'm not sure it's worth testing against
# assert tr.query("model == 'logistic_regression'")["pr_auc_mean"].iloc[0] == 0.75
assert tr.query("model == 'logistic_regression'")["pr_auc_test_mean"].iloc[0] > 0.74
assert tr.query("model == 'logistic_regression'")["pr_auc_mean"].iloc[0] > 0.74
assert (
round(tr.query("model == 'logistic_regression'")["alpha_threshold"].iloc[0], 1)
== 0.7
Expand All @@ -759,9 +759,8 @@ def test_step_2_train_decision_tree_spark(

print(f"Decision tree results: {tr}")

# TODO This is 1,12 instead of 1,13, because the precision_test_mean column is dropped as it is NaN
assert tr.shape == (1, 13)
# assert tr.query("model == 'decision_tree'")["precision_test_mean"].iloc[0] > 0
assert tr.shape == (1, 15)
# assert tr.query("model == 'decision_tree'")["precision_mean"].iloc[0] > 0
assert tr.query("model == 'decision_tree'")["maxDepth"].iloc[0] == 3
assert tr.query("model == 'decision_tree'")["minInstancesPerNode"].iloc[0] == 1
assert tr.query("model == 'decision_tree'")["maxBins"].iloc[0] == 7
Expand Down

0 comments on commit 74a7dd9

Please sign in to comment.