From 9b74707482506a25508f5f12a431a1093555cd43 Mon Sep 17 00:00:00 2001 From: Veronika Maurerova Date: Wed, 20 Sep 2023 15:18:17 +0200 Subject: [PATCH] Move custom threshold logic from make metrics to another PR --- h2o-core/src/main/java/hex/AUUC.java | 4 +- .../java/hex/ModelMetricsBinomialUplift.java | 16 +----- .../java/water/api/ModelMetricsHandler.java | 13 +---- h2o-py/h2o/h2o.py | 10 +--- h2o-py/h2o/model/model_base.py | 8 +-- .../tests/testdir_misc/pyunit_make_metrics.py | 51 +++++++------------ h2o-r/h2o-package/R/models.R | 10 ++-- .../runit_make_metrics_uplift_binomial.R | 30 +++++------ 8 files changed, 46 insertions(+), 96 deletions(-) diff --git a/h2o-core/src/main/java/hex/AUUC.java b/h2o-core/src/main/java/hex/AUUC.java index 2129215cb9a3..842ec0869ad9 100644 --- a/h2o-core/src/main/java/hex/AUUC.java +++ b/h2o-core/src/main/java/hex/AUUC.java @@ -234,7 +234,9 @@ public static double[] calculateQuantileThresholds(int groups, Vec preds) { if (qm != null) qm.remove(); if (fr != null) DKV.remove(fr._key); } - if(Double.isNaN(quantiles[0])){ + if(quantiles == null){ + quantiles = new double[]{0}; + } else if(Double.isNaN(quantiles[0])){ quantiles[0] = 0; } return quantiles; diff --git a/h2o-core/src/main/java/hex/ModelMetricsBinomialUplift.java b/h2o-core/src/main/java/hex/ModelMetricsBinomialUplift.java index 317858f6c060..a58ca933aa4f 100644 --- a/h2o-core/src/main/java/hex/ModelMetricsBinomialUplift.java +++ b/h2o-core/src/main/java/hex/ModelMetricsBinomialUplift.java @@ -82,7 +82,7 @@ protected StringBuilder appendToStringMetrics(StringBuilder sb) { * @param customAuucThresholds custom threshold to calculate AUUC, if is not specified, the thresholds will be calculated from prediction vector * @return ModelMetrics object */ - static public ModelMetricsBinomialUplift make(Vec predictedProbs, Vec actualLabels, Vec treatment, String[] domain, AUUC.AUUCType auucType, int auucNbins, double[] customAuucThresholds) { + static public ModelMetricsBinomialUplift make(Vec predictedProbs, Vec actualLabels, Vec treatment, String[] domain, AUUC.AUUCType auucType, int auucNbins) { Scope.enter(); try { Vec labels = actualLabels.toCategoricalVec(); @@ -99,14 +99,6 @@ static public ModelMetricsBinomialUplift make(Vec predictedProbs, Vec actualLabe if (!treatment.isCategorical() || treatment.cardinality() != 2) throw new IllegalArgumentException("Treatment values should be catecorical value and have 2 class " + Arrays.toString(treatment.domain()) + " for uplift binomial uplift metrics."); long dataSize = treatment.length(); - if (customAuucThresholds != null) { - if(customAuucThresholds.length == 0){ - throw new IllegalArgumentException("Custom AUUC thresholds array should have size greater than 0."); - } - if (auucNbins != customAuucThresholds.length) { - Log.info("Custom AUUC thresholds are specified, so number of AUUC bins will equal to thresholds size."); - } - } if (auucNbins < -1 || auucNbins == 0 || auucNbins > dataSize) throw new IllegalArgumentException("The number of bins to calculate AUUC need to be -1 (default value) or higher than zero, but less than data size."); if(auucNbins == -1) @@ -115,11 +107,7 @@ static public ModelMetricsBinomialUplift make(Vec predictedProbs, Vec actualLabe fr.add("labels", labels); fr.add("treatment", treatment); MetricBuilderBinomialUplift mb; - if (customAuucThresholds == null) { - mb = new UpliftBinomialMetrics(labels.domain(), AUUC.calculateQuantileThresholds(auucNbins, predictedProbs)).doAll(fr)._mb; - } else { - mb = new UpliftBinomialMetrics(labels.domain(), customAuucThresholds).doAll(fr)._mb; - } + mb = new UpliftBinomialMetrics(labels.domain(), AUUC.calculateQuantileThresholds(auucNbins, predictedProbs)).doAll(fr)._mb; labels.remove(); ModelMetricsBinomialUplift mm = (ModelMetricsBinomialUplift) mb.makeModelMetrics(null, fr, auucType); mm._description = "Computed on user-given predictions and labels."; diff --git a/h2o-core/src/main/java/water/api/ModelMetricsHandler.java b/h2o-core/src/main/java/water/api/ModelMetricsHandler.java index a2fe63a6e27c..d68cd56d77b0 100644 --- a/h2o-core/src/main/java/water/api/ModelMetricsHandler.java +++ b/h2o-core/src/main/java/water/api/ModelMetricsHandler.java @@ -37,7 +37,6 @@ public static final class ModelMetricsList extends Iced { public boolean _compare_abs; public String _auuc_type; public int _auuc_nbins; - public double[] _custom_auuc_thresholds; // Fetch all metrics that match model and/or frame ModelMetricsList fetch() { @@ -238,7 +237,6 @@ public static final class ModelMetricsListSchemaV3 extends RequestSchemaV3>> fr = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate.csv.zip") @@ -2043,7 +2041,6 @@ def make_metrics(predicted, actual, domain=None, distribution=None, weights=None assert_is_type(distribution, str, None) assert_satisfies(actual.ncol, actual.ncol == 1) assert_is_type(auc_type, str) - assert_is_type(custom_auuc_thresholds, [float], None) allowed_auc_types = ["MACRO_OVO", "MACRO_OVR", "WEIGHTED_OVO", "WEIGHTED_OVR", "AUTO", "NONE"] assert auc_type in allowed_auc_types, "auc_type should be "+(" ".join([str(type) for type in allowed_auc_types])) if domain is None and any(actual.isfactor()): @@ -2051,6 +2048,7 @@ def make_metrics(predicted, actual, domain=None, distribution=None, weights=None params = {"domain": domain, "distribution": distribution} if weights is not None: params["weights_frame"] = weights.frame_id + params["auc_type"] = auc_type if treatment is not None: assert treatment.ncol == 1, "`treatment` frame should have exactly 1 column" params["treatment_frame"] = treatment.frame_id @@ -2059,10 +2057,6 @@ def make_metrics(predicted, actual, domain=None, distribution=None, weights=None params["auuc_type"] = auuc_type assert auuc_nbins == -1 or auuc_nbins > 0, "auuc_nbis should be -1 or higner than 0." params["auuc_nbins"] = auuc_nbins - if custom_auuc_thresholds is not None: - assert len(custom_auuc_thresholds) > 0, "custom_auuc_thresholds size should be higher than 0." - params["custom_auuc_thresholds"] = custom_auuc_thresholds - params["auc_type"] = auc_type res = api("POST /3/ModelMetrics/predictions_frame/%s/actuals_frame/%s" % (predicted.frame_id, actual.frame_id), data=params) return res["model_metrics"] diff --git a/h2o-py/h2o/model/model_base.py b/h2o-py/h2o/model/model_base.py index 539ab8b8773a..7582a19faab7 100644 --- a/h2o-py/h2o/model/model_base.py +++ b/h2o-py/h2o/model/model_base.py @@ -460,7 +460,7 @@ def training_model_metrics(self): return self._model_json["output"]["training_metrics"]._metric_json def model_performance(self, test_data=None, train=False, valid=False, xval=False, auc_type=None, - auuc_type=None, auuc_nbins=-1, custom_auuc_thresholds=None): + auuc_type=None): """ Generate model metrics for this model on ``test_data``. @@ -517,16 +517,12 @@ def model_performance(self, test_data=None, train=False, valid=False, xval=False data={"auc_type": auc_type}) elif auuc_type is not None: assert_is_type(auuc_type, Enum("AUTO", "qini", "gain", "lift")) - assert_is_type(custom_auuc_thresholds, [float], None) - if custom_auuc_thresholds is not None and len(custom_auuc_thresholds) == 0: - print("WARNING: Model metrics cannot be calculated and metric_json is empty due to the custom_auuc_tresholds are empty.") - return if (self._model_json["treatment_column_name"] is not None) and not(self._model_json["treatment_column_name"] in test_data.names): print("WARNING: Model metrics cannot be calculated and metric_json is empty due to the absence of the treatment column in your dataset.") return res = h2o.api("POST /3/ModelMetrics/models/%s/frames/%s" % (self.model_id, test_data.frame_id), - data={"auuc_type": auuc_type, "auuc_nbins": auuc_nbins, "custom_auuc_thresholds": custom_auuc_thresholds}) + data={"auuc_type": auuc_type}) else: res = h2o.api("POST /3/ModelMetrics/models/%s/frames/%s" % (self.model_id, test_data.frame_id)) # FIXME need to do the client-side filtering... (https://github.com/h2oai/h2o-3/issues/13862) diff --git a/h2o-py/tests/testdir_misc/pyunit_make_metrics.py b/h2o-py/tests/testdir_misc/pyunit_make_metrics.py index 94cf10a95fdf..c839d4c11baf 100644 --- a/h2o-py/tests/testdir_misc/pyunit_make_metrics.py +++ b/h2o-py/tests/testdir_misc/pyunit_make_metrics.py @@ -198,9 +198,9 @@ def pyunit_make_metrics_uplift(): train[treatment_column] = train[treatment_column].asfactor() train[response_column] = train[response_column].asfactor() - test = h2o.import_file(pyunit_utils.locate("smalldata/uplift/upliftml_test.csv")) - test[treatment_column] = test[treatment_column].asfactor() - test[response_column] = test[response_column].asfactor() + valid = h2o.import_file(pyunit_utils.locate("smalldata/uplift/upliftml_test.csv")) + valid[treatment_column] = valid[treatment_column].asfactor() + valid[response_column] = valid[response_column].asfactor() nbins = 20 model = H2OUpliftRandomForestEstimator( @@ -211,57 +211,42 @@ def pyunit_make_metrics_uplift(): ntrees=3 ) - model.train(y=response_column, x=feature_cols, training_frame=train, validation_frame=test) + model.train(y=response_column, x=feature_cols, training_frame=train, validation_frame=valid) # test on validation data, train metrics are affected by sample rate m0 = model.model_performance(valid=True) - predicted = h2o.assign(model.predict(test)[0], "pred") - actual = test[response_column] - treatment = test[treatment_column] - m1 = model.model_performance(test_data=test, auuc_type="AUTO", auuc_nbins=nbins) + predicted = h2o.assign(model.predict(valid)[0], "pred") + actual = valid[response_column] + treatment = valid[treatment_column] + m1 = model.model_performance(test_data=valid, auuc_type="AUTO") m2 = h2o.make_metrics(predicted, actual, treatment=treatment, auuc_type="AUTO", auuc_nbins=nbins) - m3 = h2o.make_metrics(predicted, actual, treatment=treatment, auuc_type="AUTO", auuc_nbins=nbins, - custom_auuc_thresholds=m1.thresholds()) - m4 = h2o.make_metrics(predicted, actual, treatment=treatment, auuc_type="AUTO", auuc_nbins=nbins, - custom_auuc_thresholds=model.default_auuc_thresholds()) + new_nbins = nbins - 10 - m5 = h2o.make_metrics(predicted, actual, treatment=treatment, auuc_type="AUTO", auuc_nbins=new_nbins) - m6 = model.model_performance(test_data=test, auuc_type="AUTO", auuc_nbins=new_nbins) + m3 = h2o.make_metrics(predicted, actual, treatment=treatment, auuc_type="AUTO", auuc_nbins=new_nbins) print("Model AUUC: {}".format(model.auuc())) print("thresholds: {}".format(model.default_auuc_thresholds())) print("Model performance AUUC: {}".format(m0.auuc())) print("thresholds: {}".format(m0.thresholds())) - print("Model performance AUUC: {}".format(m1.auuc())) + print("Model performance AUUC recalculate with data: {}".format(m1.auuc())) print("thresholds: {}".format(m1.thresholds())) - print("Make AUUC with no custom thresholds: {}".format(m2.auuc())) + print("Make AUUC: {}".format(m2.auuc())) print("thresholds: {}".format(m2.thresholds())) - print("Make AUUC with custom thresholds from m1: {}".format(m3.auuc())) + print("Make AUUC with new number of bins: {}".format(m3.auuc())) print("thresholds: {}".format(m3.thresholds())) - print("Make AUUC with custom thresholds from model defaults: {}".format(m4.auuc())) - print("thresholds: {}".format(m4.thresholds())) - print("Make AUUC with no custom thresholds but change nbins parameter: {}".format(m5.auuc())) - print("thresholds: {}".format(m5.thresholds())) - print("Performance AUUC with no custom thresholds but change nbins parameter: {}".format(m6.auuc())) - print("thresholds: {}".format(m6.thresholds())) tol = 1e-5 # default model auuc is calculated from train data, default thresholds are from validation data assert abs(model.auuc() - m0.auuc()) > tol - # model performance calculates new thresholds but from the same data with the same number of bins, so AUUCs are same + # model performance uses default thresholds, so AUUCs are same assert abs(m0.auuc() - m1.auuc()) < tol - # make method calculates new thresholds but from the same data with the same number of bins, so AUUCs are same + # make method calculates new thresholds but from the same data with same nbins so AUUCs are same assert abs(m1.auuc() - m2.auuc()) < tol - # if we use thresholds from performance metric and use it as custom, it makes the same metrics - assert abs(m1.auuc() - m3.auuc()) < tol - # make methods with different nbins parameter changes thresholds and AUUC - assert abs(m3.auuc() - m5.auuc()) > tol - # performance methods with different nbins parameter changes thresholds and AUUC - assert abs(m3.auuc() - m6.auuc()) > tol - # make and performance method with the same nbins parameter and the same data calculates the same thresholds - assert abs(m5.auuc() - m6.auuc()) < tol + # make method with the new auuc_nbins parameter calculates the new thresholds + assert abs(m2.auuc() - m3.auuc()) > tol print("===========================") + def suite_model_metrics(): diff --git a/h2o-r/h2o-package/R/models.R b/h2o-r/h2o-package/R/models.R index f250950aea5d..adb8bbfdb0ca 100755 --- a/h2o-r/h2o-package/R/models.R +++ b/h2o-r/h2o-package/R/models.R @@ -1039,7 +1039,7 @@ h2o.feature_frequencies <- feature_frequencies.H2OModel #' h2o.performance(model = prostate_gbm_balanced, train = TRUE) #' } #' @export -h2o.performance <- function(model, newdata=NULL, train=FALSE, valid=FALSE, xval=FALSE, data=NULL, auc_type="NONE", auuc_type="NONE", auuc_nbins=-1) { +h2o.performance <- function(model, newdata=NULL, train=FALSE, valid=FALSE, xval=FALSE, data=NULL, auc_type="NONE", auuc_type="NONE") { # data is now deprecated and the new arg name is newdata if (!is.null(data)) { @@ -1082,10 +1082,7 @@ h2o.performance <- function(model, newdata=NULL, train=FALSE, valid=FALSE, xval= parms[["auuc_type"]] <- auuc_type } else if(!is.null(model@parameters$auuc_type) && model@parameters$auuc_type != "NONE"){ parms[["auuc_type"]] <- model@parameters$auuc_type - } - if(auuc_nbins > 0){ - parms[["auuc_nbins"]] <- auuc_nbins - } + } res <- .h2o.__remoteSend(method = "POST", .h2o.__MODEL_METRICS(model@model_id, newdata.id), .params = parms) #### @@ -1144,7 +1141,7 @@ h2o.performance <- function(model, newdata=NULL, train=FALSE, valid=FALSE, xval= #' } #' @export h2o.make_metrics <- function(predicted, actuals, domain=NULL, distribution=NULL, weights=NULL, treatment=NULL, - auc_type="NONE", auuc_type="AUTO", auuc_nbins=-1, custom_auuc_thresholds=NULL) { + auc_type="NONE", auuc_type="AUTO", auuc_nbins=-1) { predicted <- .validate.H2OFrame(predicted, required=TRUE) actuals <- .validate.H2OFrame(actuals, required=TRUE) weights <- .validate.H2OFrame(weights, required=FALSE) @@ -1169,7 +1166,6 @@ h2o.make_metrics <- function(predicted, actuals, domain=NULL, distribution=NULL, } params$auuc_type <- auuc_type params$auuc_nbins <- auuc_nbins - params$custom_auuc_thresholds <- paste("[", paste(custom_auuc_thresholds, collapse = ", "),"]") } params$domain <- domain params$distribution <- distribution diff --git a/h2o-r/tests/testdir_misc/runit_make_metrics_uplift_binomial.R b/h2o-r/tests/testdir_misc/runit_make_metrics_uplift_binomial.R index 04d42667a222..07846a9a24fd 100644 --- a/h2o-r/tests/testdir_misc/runit_make_metrics_uplift_binomial.R +++ b/h2o-r/tests/testdir_misc/runit_make_metrics_uplift_binomial.R @@ -13,6 +13,7 @@ test.make_metrics_uplift_binomial <- function() { predictors <- sprintf("feature_%s",seq(0:11)) + nbins <- 20 model <- h2o.upliftRandomForest(training_frame=train, validation_frame=valid, @@ -20,13 +21,11 @@ test.make_metrics_uplift_binomial <- function() { y=response, treatment_column=treatment, seed=42, - auuc_nbins=20, + auuc_nbins=nbins, score_each_iteration=TRUE, ntrees=3) print(model) - h2oPred <- as.data.frame(h2o.predict(model,valid)[,1]) - pred <- h2o.assign(h2o.predict(model,valid)[,1], "pred") actual <- h2o.assign(valid[,response], "act") treat <- h2o.assign(valid[,treatment], "treatment") @@ -36,25 +35,26 @@ test.make_metrics_uplift_binomial <- function() { m0 <- h2o.performance(model, valid=TRUE) thresholds0 <- m0@metrics$thresholds$thresholds - m1 <- h2o.make_metrics(pred, actual, treatment=treat, custom_auuc_thresholds=thresholds) + m1 <- h2o.make_metrics(pred, actual, treatment=treat, auuc_nbins=nbins) thresholds1 <- m1@metrics$thresholds$thresholds + print(m1) - m2 <- h2o.performance(model, valid, auuc_nbins=20) + m2 <- h2o.performance(model, valid) thresholds2 <- m2@metrics$thresholds$thresholds + print(m2) tol <- 1e-10 - ltol <- 1e-1 # There are few differences in prediction R vs. Java scoring, so the results are not the same but similar # thresholds should be the same expect_equal(thresholds, thresholds0, tolerance=tol) - expect_equal(thresholds0, thresholds1, tolerance=ltol) + expect_equal(thresholds0, thresholds1, tolerance=tol) expect_equal(thresholds0, thresholds2, tolerance=tol) auuc0 <- h2o.auuc(m0) auuc1 <- h2o.auuc(m1) auuc2 <- h2o.auuc(m2) - expect_equal(auuc0, auuc1, tolerance=ltol) + expect_equal(auuc0, auuc1, tolerance=tol) expect_equal(auuc0, auuc2, tolerance=tol) auuc_table0 <- h2o.auuc_table(m0) @@ -65,21 +65,21 @@ test.make_metrics_uplift_binomial <- function() { expect_true(is.data.frame(auuc_table1)) expect_true(is.data.frame(auuc_table2)) - expect_equal(auuc_table0, auuc_table1, tolerance=ltol) + expect_equal(auuc_table0, auuc_table1, tolerance=tol) expect_equal(auuc_table0, auuc_table2, tolerance=tol) thr_table0 <- h2o.thresholds_and_metric_scores(m0) thr_table1 <- h2o.thresholds_and_metric_scores(m1) thr_table2 <- h2o.thresholds_and_metric_scores(m2) - expect_equal(thr_table0, thr_table1, tolerance=ltol) + expect_equal(thr_table0, thr_table1, tolerance=tol) expect_equal(thr_table0, thr_table2, tolerance=tol) qini0 <- h2o.qini(m0) qini1 <- h2o.qini(m1) qini2 <- h2o.qini(m2) - expect_equal(qini0, qini1, tolerance=ltol) + expect_equal(qini0, qini1, tolerance=tol) expect_equal(qini0, qini2, tolerance=tol) aecu_table0 <- h2o.aecu_table(m0) @@ -90,28 +90,28 @@ test.make_metrics_uplift_binomial <- function() { expect_true(is.data.frame(aecu_table1)) expect_true(is.data.frame(aecu_table2)) - expect_equal(aecu_table0, aecu_table1, tolerance=ltol) + expect_equal(aecu_table0, aecu_table1, tolerance=tol) expect_equal(aecu_table0, aecu_table2, tolerance=tol) ate0 <- h2o.ate(m0) ate1 <- h2o.ate(m1) ate2 <- h2o.ate(m2) - expect_equal(ate0, ate1, tolerance=ltol) + expect_equal(ate0, ate1, tolerance=tol) expect_equal(ate0, ate2, tolerance=tol) att0 <- h2o.att(m0) att1 <- h2o.att(m1) att2 <- h2o.att(m2) - expect_equal(att0, att1, tolerance=ltol) + expect_equal(att0, att1, tolerance=tol) expect_equal(att0, att2, tolerance=tol) atc0 <- h2o.atc(m0) atc1 <- h2o.atc(m1) atc2 <- h2o.atc(m2) - expect_equal(atc0, atc1, tolerance=ltol) + expect_equal(atc0, atc1, tolerance=tol) expect_equal(atc0, atc2, tolerance=tol) }