@@ -168,7 +168,7 @@ def _collect_train_test_splits(
168
168
cached_test_data = test_data .cache ()
169
169
170
170
split_start_info = f"Training and testing the model on train-test split { split_index } of { len (splits )} "
171
- print (split_start_info )
171
+ # print(split_start_info)
172
172
logger .debug (split_start_info )
173
173
prauc = self ._train_model (
174
174
cached_training_data ,
@@ -199,7 +199,7 @@ def _evaluate_hyperparam_combinations(
199
199
results = []
200
200
for index , params_combo in enumerate (all_model_parameter_combos , 1 ):
201
201
eval_start_info = f"Starting run { index } of { len (all_model_parameter_combos )} with these parameters: { params_combo } "
202
- print (eval_start_info )
202
+ # print(eval_start_info)
203
203
logger .info (eval_start_info )
204
204
# Copy because the params combo will get stripped of extra key-values
205
205
# so only the hyperparams remain.
@@ -266,15 +266,15 @@ def _choose_best_training_results(self, evals: list[ModelEval]) -> ModelEval:
266
266
raise RuntimeError (
267
267
"No model evaluations provided, cannot choose the best one."
268
268
)
269
- print ("\n **************************************************" )
269
+ print ("\n \n **************************************************" )
270
270
print (" All Model - hyper-parameter combinations" )
271
271
print ("**************************************************\n " )
272
272
best_eval = evals [0 ]
273
273
for e in evals :
274
274
print (e )
275
275
if best_eval .score < e .score :
276
276
best_eval = e
277
- print ("--------------------------------------------------\n " )
277
+ print ("--------------------------------------------------\n \n " )
278
278
return best_eval
279
279
280
280
def _evaluate_threshold_combinations (
@@ -295,9 +295,9 @@ def _evaluate_threshold_combinations(
295
295
# but for now it's a single ModelEval instance -- the one with the highest score.
296
296
best_results = self ._choose_best_training_results (hyperparam_evaluation_results )
297
297
298
- print (f"======== Best Model and Parameters ========= " )
299
- print (f"{ best_results } " )
300
- print ("============================================================== " )
298
+ print (f"\n ======== Best Model and Parameters ========\n " )
299
+ print (f"\t { best_results } \n " )
300
+ print ("=============================================\n ] \n " )
301
301
302
302
# TODO check if we should make a different split, like starting from a different seed?
303
303
# or just not re-using one we used in making the PR_AUC mean value?
@@ -306,6 +306,9 @@ def _evaluate_threshold_combinations(
306
306
# thresholding_test_data = splits_for_thresholding_eval[1].cache()
307
307
threshold_matrix = best_results .make_threshold_matrix ()
308
308
logger .debug (f"The threshold matrix has { len (threshold_matrix )} entries" )
309
+ print (
310
+ f"Testing the best model + parameters against all { len (threshold_matrix )} threshold combinations."
311
+ )
309
312
results_dfs : dict [int , pd .DataFrame ] = {}
310
313
for i in range (len (threshold_matrix )):
311
314
results_dfs [i ] = _create_results_df ()
@@ -367,10 +370,6 @@ def _evaluate_threshold_combinations(
367
370
config ["id_column" ],
368
371
)
369
372
370
- print (
371
- f"Capture results for threshold matrix entry { threshold_index } and split index { split_index } "
372
- )
373
-
374
373
results_dfs [i ] = self ._capture_results (
375
374
predictions ,
376
375
predict_train ,
@@ -535,12 +534,12 @@ def _capture_results(
535
534
# write to sql tables for testing
536
535
predictions .createOrReplaceTempView (f"{ table_prefix } predictions" )
537
536
predict_train .createOrReplaceTempView (f"{ table_prefix } predict_train" )
538
- print ("------------------------------------------------------------" )
539
- print (f"Capturing predictions:" )
540
- predictions .show ()
541
- print (f"Capturing predict_train:" )
542
- predict_train .show ()
543
- print ("------------------------------------------------------------" )
537
+ # print("------------------------------------------------------------")
538
+ # print(f"Capturing predictions:")
539
+ # predictions.show()
540
+ # print(f"Capturing predict_train:")
541
+ # predict_train.show()
542
+ # print("------------------------------------------------------------")
544
543
545
544
(
546
545
test_TP_count ,
@@ -769,19 +768,19 @@ def _get_confusion_matrix(
769
768
FP = predictions .filter ((predictions [dep_var ] == 0 ) & (predictions .prediction == 1 ))
770
769
FP_count = FP .count ()
771
770
772
- print (
773
- f"Confusion matrix -- true positives and false positivesTP { TP_count } FP { FP_count } "
774
- )
771
+ # print(
772
+ # f"Confusion matrix -- true positives and false positivesTP {TP_count} FP {FP_count}"
773
+ # )
775
774
776
775
FN = predictions .filter ((predictions [dep_var ] == 1 ) & (predictions .prediction == 0 ))
777
776
FN_count = FN .count ()
778
777
779
778
TN = predictions .filter ((predictions [dep_var ] == 0 ) & (predictions .prediction == 0 ))
780
779
TN_count = TN .count ()
781
780
782
- print (
783
- f"Confusion matrix -- true negatives and false negatives: FN { FN_count } TN { TN_count } "
784
- )
781
+ # print(
782
+ # f"Confusion matrix -- true negatives and false negatives: FN {FN_count} TN {TN_count}"
783
+ # )
785
784
786
785
if otd_data :
787
786
id_a = otd_data ["id_a" ]
@@ -829,7 +828,7 @@ def _get_aggregate_metrics(
829
828
else :
830
829
recall = TP_count / (TP_count + FN_count )
831
830
mcc = _calc_mcc (TP_count , TN_count , FP_count , FN_count )
832
- print (f"XX Aggregates precision { precision } recall { recall } " )
831
+ # print(f"XX Aggregates precision {precision} recall {recall}")
833
832
return precision , recall , mcc
834
833
835
834
0 commit comments