From b9c21238737a0d43c5bc8db81ad2fd3f591183ee Mon Sep 17 00:00:00 2001 From: Colin Davis Date: Tue, 3 Dec 2024 14:02:49 -0600 Subject: [PATCH] fixed quote indent --- .../link_step_train_test_models.py | 3 --- hlink/tests/model_exploration_test.py | 14 +++++++------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/hlink/linking/model_exploration/link_step_train_test_models.py b/hlink/linking/model_exploration/link_step_train_test_models.py index f9a1134..e02b7f7 100644 --- a/hlink/linking/model_exploration/link_step_train_test_models.py +++ b/hlink/linking/model_exploration/link_step_train_test_models.py @@ -177,9 +177,6 @@ def _train_model( predictions_tmp = _get_probability_and_select_pred_columns( test_data, model, post_transformer, id_a, id_b, dep_var ) - predict_train_tmp = _get_probability_and_select_pred_columns( - training_data, model, post_transformer, id_a, id_b, dep_var - ) test_pred = predictions_tmp.toPandas() precision, recall, thresholds_raw = precision_recall_curve( diff --git a/hlink/tests/model_exploration_test.py b/hlink/tests/model_exploration_test.py index a7b8513..fecb30d 100644 --- a/hlink/tests/model_exploration_test.py +++ b/hlink/tests/model_exploration_test.py @@ -78,7 +78,7 @@ def test_all( assert tr.__len__() == 2 # TODO this should be a valid test once we fix the results output - #assert tr.query("threshold_ratio == 1.01")["precision_test_mean"].iloc[0] >= 0.5 + # assert tr.query("threshold_ratio == 1.01")["precision_test_mean"].iloc[0] >= 0.5 assert tr.query("threshold_ratio == 1.3")["alpha_threshold"].iloc[0] == 0.8 # The old behavior was to process all the model types, but now we select the best @@ -91,8 +91,8 @@ def test_all( # == tr.query("threshold_ratio == 1.3")["pr_auc_mean"].iloc[0] # ) -# TODO these asserts will mostly succeed if you change the random number seed: Basically the -""" + # TODO these asserts will mostly succeed if you change the random number seed: Basically the + """ preds = spark.table("model_eval_predictions").toPandas() assert ( preds.query("id_a == 20 and id_b == 30")["probability"].round(2).iloc[0] > 0.5 @@ -110,7 +110,7 @@ def test_all( pred_train = spark.table("model_eval_predict_train").toPandas() assert pred_train.query("id_a == 20 and id_b == 50")["match"].iloc[0] == 0 -""" + """ # assert pd.isnull( # pred_train.query("id_a == 10 and id_b == 50")["second_best_prob"].iloc[1] # ) @@ -341,7 +341,7 @@ def test_step_2_train_decision_tree_spark( # TODO This is 1,12 instead of 1,13, because the precision_test_mean column is dropped as it is NaN assert tr.shape == (1, 12) - #assert tr.query("model == 'decision_tree'")["precision_test_mean"].iloc[0] > 0 + # assert tr.query("model == 'decision_tree'")["precision_test_mean"].iloc[0] > 0 assert tr.query("model == 'decision_tree'")["maxDepth"].iloc[0] == 3 assert tr.query("model == 'decision_tree'")["minInstancesPerNode"].iloc[0] == 1 assert tr.query("model == 'decision_tree'")["maxBins"].iloc[0] == 7 @@ -381,9 +381,9 @@ def test_step_2_train_gradient_boosted_trees_spark( # assert tr.shape == (1, 18) # TODO once the train_tgest results are properly combined this should pass - #assert ( + # assert ( # tr.query("model == 'gradient_boosted_trees'")["precision_test_mean"].iloc[0] > 0 - #) + # ) assert tr.query("model == 'gradient_boosted_trees'")["maxDepth"].iloc[0] == 5 assert ( tr.query("model == 'gradient_boosted_trees'")["minInstancesPerNode"].iloc[0]