diff --git a/hlink/linking/model_exploration/link_step_train_test_models.py b/hlink/linking/model_exploration/link_step_train_test_models.py index e599dcd..3d98abe 100644 --- a/hlink/linking/model_exploration/link_step_train_test_models.py +++ b/hlink/linking/model_exploration/link_step_train_test_models.py @@ -181,6 +181,7 @@ def _run(self) -> None: } print(f"PR AUC for splits on current model and params: {pr_auc_dict}") this_model_results = pd.DataFrame(pr_auc_dict) + # I'm not sure what this dataframe is for probability_metrics_df = pd.concat( [probability_metrics_df, this_model_results] ) @@ -205,7 +206,7 @@ def _run(self) -> None: id_a, id_b, dep_var, - ).cache() + ) thresholding_predict_train = _get_probability_and_select_pred_columns( thresholding_training_data, thresholding_model, @@ -213,7 +214,7 @@ def _run(self) -> None: id_a, id_b, dep_var, - ).cache() + ) i = 0 for threshold_index, (alpha_threshold, threshold_ratio) in enumerate( @@ -486,7 +487,7 @@ def _save_otd_data( print("There were no true negatives recorded.") def _create_otd_data(self, id_a: str, id_b: str) -> dict[str, Any] | None: - """Output Suspicous Data (OTD): used to check config to see if you should find sketchy training data that the models routinely mis-classify""" + """Output Suspicious Data (OTD): used to check config to see if you should find sketchy training data that the models routinely mis-classify""" training_conf = str(self.task.training_conf) config = self.task.link_run.config