@@ -684,7 +684,6 @@ def test_step_2_train_random_forest_spark(
684
684
"featureSubsetStrategy" : "sqrt" ,
685
685
}
686
686
]
687
- feature_conf ["training" ]["output_suspicious_TD" ] = True
688
687
feature_conf ["training" ]["n_training_iterations" ] = 3
689
688
690
689
model_exploration .run_step (0 )
@@ -694,9 +693,12 @@ def test_step_2_train_random_forest_spark(
694
693
tr = spark .table ("model_eval_training_results" ).toPandas ()
695
694
print (f"training results { tr } " )
696
695
# assert tr.shape == (1, 18)
697
- assert tr .query ("model == 'random_forest'" )["pr_auc_mean " ].iloc [0 ] > 2.0 / 3.0
696
+ assert tr .query ("model == 'random_forest'" )["pr_auc_test_mean " ].iloc [0 ] > 2.0 / 3.0
698
697
assert tr .query ("model == 'random_forest'" )["maxDepth" ].iloc [0 ] == 3
699
698
699
+ # TODO probably remove these since we're not planning to test suspicious data anymore.
700
+ # I disabled the saving of suspicious in this test config so these are invalid currently.
701
+ """
700
702
FNs = spark.table("model_eval_repeat_fns").toPandas()
701
703
assert FNs.shape == (3, 4)
702
704
assert FNs.query("id_a == 30")["count"].iloc[0] == 3
@@ -706,6 +708,7 @@ def test_step_2_train_random_forest_spark(
706
708
707
709
TNs = spark.table("model_eval_repeat_tns").toPandas()
708
710
assert TNs.shape == (6, 4)
711
+ """
709
712
710
713
main .do_drop_all ("" )
711
714
@@ -717,18 +720,19 @@ def test_step_2_train_logistic_regression_spark(
717
720
feature_conf ["training" ]["model_parameters" ] = [
718
721
{"type" : "logistic_regression" , "threshold" : 0.7 }
719
722
]
720
- feature_conf ["training" ]["n_training_iterations" ] = 4
723
+ feature_conf ["training" ]["n_training_iterations" ] = 3
721
724
722
725
model_exploration .run_step (0 )
723
726
model_exploration .run_step (1 )
724
727
model_exploration .run_step (2 )
725
728
726
729
tr = spark .table ("model_eval_training_results" ).toPandas ()
730
+ # assert tr.count == 3
727
731
728
732
assert tr .shape == (1 , 11 )
729
733
# This is now 0.83333333333.... I'm not sure it's worth testing against
730
734
# assert tr.query("model == 'logistic_regression'")["pr_auc_mean"].iloc[0] == 0.75
731
- assert tr .query ("model == 'logistic_regression'" )["pr_auc_mean " ].iloc [0 ] > 0.74
735
+ assert tr .query ("model == 'logistic_regression'" )["pr_auc_test_mean " ].iloc [0 ] > 0.74
732
736
assert (
733
737
round (tr .query ("model == 'logistic_regression'" )["alpha_threshold" ].iloc [0 ], 1 )
734
738
== 0.7
0 commit comments