Move custom threshold logic from make metrics to another PR

h2oai · Sep 20, 2023 · 9b74707 · 9b74707
1 parent 48afd94
commit 9b74707
Show file tree

Hide file tree

Showing 8 changed files with 46 additions and 96 deletions.
diff --git a/h2o-core/src/main/java/hex/AUUC.java b/h2o-core/src/main/java/hex/AUUC.java
@@ -234,7 +234,9 @@ public static double[] calculateQuantileThresholds(int groups, Vec preds) {
             if (qm != null) qm.remove();
             if (fr != null) DKV.remove(fr._key);
         }
-        if(Double.isNaN(quantiles[0])){
+        if(quantiles == null){
+            quantiles = new double[]{0};
+        } else if(Double.isNaN(quantiles[0])){
             quantiles[0] = 0;
         }
         return quantiles;

diff --git a/h2o-core/src/main/java/hex/ModelMetricsBinomialUplift.java b/h2o-core/src/main/java/hex/ModelMetricsBinomialUplift.java
@@ -82,7 +82,7 @@ protected StringBuilder appendToStringMetrics(StringBuilder sb) {
      * @param customAuucThresholds custom threshold to calculate AUUC, if is not specified, the thresholds will be calculated from prediction vector       
      * @return ModelMetrics object
      */
-    static public ModelMetricsBinomialUplift make(Vec predictedProbs, Vec actualLabels, Vec treatment, String[] domain, AUUC.AUUCType auucType, int auucNbins, double[] customAuucThresholds) {
+    static public ModelMetricsBinomialUplift make(Vec predictedProbs, Vec actualLabels, Vec treatment, String[] domain, AUUC.AUUCType auucType, int auucNbins) {
         Scope.enter();
         try {
             Vec labels = actualLabels.toCategoricalVec();
@@ -99,14 +99,6 @@ static public ModelMetricsBinomialUplift make(Vec predictedProbs, Vec actualLabe
             if (!treatment.isCategorical() || treatment.cardinality() != 2)
                 throw new IllegalArgumentException("Treatment values should be catecorical value and have 2 class " + Arrays.toString(treatment.domain()) + " for uplift binomial uplift metrics.");
             long dataSize = treatment.length();
-            if (customAuucThresholds != null) {
-                if(customAuucThresholds.length == 0){
-                    throw new IllegalArgumentException("Custom AUUC thresholds array should have size greater than 0.");
-                }
-                if (auucNbins != customAuucThresholds.length) {
-                    Log.info("Custom AUUC thresholds are specified, so number of AUUC bins will equal to thresholds size.");
-                }
-            }
             if (auucNbins < -1 || auucNbins == 0 || auucNbins > dataSize)
                 throw new IllegalArgumentException("The number of bins to calculate AUUC need to be -1 (default value) or higher than zero, but less than data size.");
             if(auucNbins == -1)
@@ -115,11 +107,7 @@ static public ModelMetricsBinomialUplift make(Vec predictedProbs, Vec actualLabe
             fr.add("labels", labels);
             fr.add("treatment", treatment);
             MetricBuilderBinomialUplift mb;
-            if (customAuucThresholds == null) {
-                mb = new UpliftBinomialMetrics(labels.domain(), AUUC.calculateQuantileThresholds(auucNbins, predictedProbs)).doAll(fr)._mb;
-            } else {
-                mb = new UpliftBinomialMetrics(labels.domain(), customAuucThresholds).doAll(fr)._mb;
-            }
+            mb = new UpliftBinomialMetrics(labels.domain(), AUUC.calculateQuantileThresholds(auucNbins, predictedProbs)).doAll(fr)._mb;
             labels.remove();
             ModelMetricsBinomialUplift mm = (ModelMetricsBinomialUplift) mb.makeModelMetrics(null, fr, auucType);
             mm._description = "Computed on user-given predictions and labels.";

diff --git a/h2o-core/src/main/java/water/api/ModelMetricsHandler.java b/h2o-core/src/main/java/water/api/ModelMetricsHandler.java
@@ -37,7 +37,6 @@ public static final class ModelMetricsList extends Iced {
     public boolean _compare_abs;
     public String _auuc_type;
     public int _auuc_nbins;
-    public double[] _custom_auuc_thresholds;
 
     // Fetch all metrics that match model and/or frame
     ModelMetricsList fetch() {
@@ -238,7 +237,6 @@ public static final class ModelMetricsListSchemaV3 extends RequestSchemaV3<Model
       this.compare_abs = mml._compare_abs;
       this.auuc_type = mml._auuc_type;
       this.auuc_nbins = mml._auuc_nbins;
-      this.custom_auuc_thresholds = mml._custom_auuc_thresholds;
 
       if (null != mml._model_metrics) {
         this.model_metrics = new ModelMetricsBaseV3[mml._model_metrics.length];
@@ -314,7 +312,6 @@ public ModelMetricsListSchemaV3 score(int version, ModelMetricsListSchemaV3 s) {
     int auucNbins = parms._model._parms._auuc_nbins;
     if(s.auuc_type != null){
       parms._model._parms._auuc_type = AUUC.AUUCType.valueOf(s.auuc_type);
-      parms._model._parms._auuc_nbins = s.auuc_nbins;
     }
     parms._model.score(parms._frame, parms._predictions_name, null, true, CFuncRef.from(customMetricFunc)).remove(); // throw away predictions, keep metrics as a side-effect
     ModelMetricsListSchemaV3 mm = this.fetch(version, s);
@@ -378,10 +375,6 @@ public static final class ModelMetricsMakerSchemaV3 extends SchemaV3<ModelMetric
             level = API.Level.secondary, direction = API.Direction.INOUT, gridable = true)
     public int auuc_nbins;
 
-    @API(help = "Custom AUUC thresholds (for uplift binomial classification).",
-            level = API.Level.secondary, direction = API.Direction.INOUT, gridable = true)
-    public double[] custom_auuc_thresholds;
-
     @API(help="Model Metrics.", direction=API.Direction.OUTPUT)
     public ModelMetricsBaseV3 model_metrics;
   }
@@ -413,10 +406,6 @@ public ModelMetricsMakerSchemaV3 make(int version, ModelMetricsMakerSchemaV3 s)
       if (null == treatmentFrame) throw new H2OKeyNotFoundArgumentException("treatment_frame", "make", s.treatment_frame);
      treatment = treatmentFrame.anyVec();
       if(s.auuc_type == null) s.auuc_type = AUUC.AUUCType.AUTO;
-      if(s.custom_auuc_thresholds != null) {
-        if (s.custom_auuc_thresholds.length == 0)
-          throw new H2OIllegalArgumentException("custom_auuc_thresholds", "make", "The length of the array has to be higher than 0.");
-      }
       if(s.auuc_nbins < -1 || s.auuc_nbins == 0) throw new H2OIllegalArgumentException("auuc_bins", "make", "The value has to be -1 or higher than 0.");
     }
 
@@ -428,7 +417,7 @@ public ModelMetricsMakerSchemaV3 make(int version, ModelMetricsMakerSchemaV3 s)
       s.model_metrics = new ModelMetricsRegressionV3().fillFromImpl(mm);
     } else if (s.domain.length==2) {
       if (treatment != null) {
-        ModelMetricsBinomialUplift mm = ModelMetricsBinomialUplift.make(pred.anyVec(), act.anyVec(), treatment, s.domain, s.auuc_type, s.auuc_nbins, s.custom_auuc_thresholds);
+        ModelMetricsBinomialUplift mm = ModelMetricsBinomialUplift.make(pred.anyVec(), act.anyVec(), treatment, s.domain, s.auuc_type, s.auuc_nbins);
         s.model_metrics = new ModelMetricsBinomialUpliftV3().fillFromImpl(mm);
       } else {
         if (pred.numCols()!=1) {

diff --git a/h2o-py/h2o/h2o.py b/h2o-py/h2o/h2o.py
@@ -1975,7 +1975,7 @@ def load_dataset(relative_path):
 
 
 def make_metrics(predicted, actual, domain=None, distribution=None, weights=None, treatment=None, auc_type="NONE",
-                 auuc_type="AUTO", auuc_nbins=-1, custom_auuc_thresholds=None):
+                 auuc_type="AUTO", auuc_nbins=-1):
     """
     Create Model Metrics from predicted and actual values in H2O.
 
@@ -2006,8 +2006,6 @@ def make_metrics(predicted, actual, domain=None, distribution=None, weights=None
                
     :param auuc_nbins: For uplift binomial classification you have to specify number of bins to be used 
            for calculation the AUUC. Default is -1, which means 1000.
-    :param custom_auuc_thresholds Array of custom thresholds to calculate AUUC, if the thresholds are specified, 
-            the number of AUUC bins is equal to thresholds size    
     :examples:
 
     >>> fr = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate.csv.zip")
@@ -2043,14 +2041,14 @@ def make_metrics(predicted, actual, domain=None, distribution=None, weights=None
     assert_is_type(distribution, str, None)
     assert_satisfies(actual.ncol, actual.ncol == 1)
     assert_is_type(auc_type, str)
-    assert_is_type(custom_auuc_thresholds, [float], None)
     allowed_auc_types = ["MACRO_OVO", "MACRO_OVR", "WEIGHTED_OVO", "WEIGHTED_OVR", "AUTO", "NONE"]
     assert auc_type in allowed_auc_types, "auc_type should be "+(" ".join([str(type) for type in allowed_auc_types]))
     if domain is None and any(actual.isfactor()):
         domain = actual.levels()[0]
     params = {"domain": domain, "distribution": distribution}
     if weights is not None:
         params["weights_frame"] = weights.frame_id
+    params["auc_type"] = auc_type
     if treatment is not None:
         assert treatment.ncol == 1, "`treatment` frame should have exactly 1 column"
         params["treatment_frame"] = treatment.frame_id
@@ -2059,10 +2057,6 @@ def make_metrics(predicted, actual, domain=None, distribution=None, weights=None
         params["auuc_type"] = auuc_type
         assert auuc_nbins == -1 or auuc_nbins > 0, "auuc_nbis should be -1 or higner than 0."  
         params["auuc_nbins"] = auuc_nbins
-        if custom_auuc_thresholds is not None:
-            assert len(custom_auuc_thresholds) > 0, "custom_auuc_thresholds size should be higher than 0."
-            params["custom_auuc_thresholds"] = custom_auuc_thresholds
-    params["auc_type"] = auc_type    
     res = api("POST /3/ModelMetrics/predictions_frame/%s/actuals_frame/%s" % (predicted.frame_id, actual.frame_id),
               data=params)
     return res["model_metrics"]

diff --git a/h2o-py/h2o/model/model_base.py b/h2o-py/h2o/model/model_base.py
@@ -460,7 +460,7 @@ def training_model_metrics(self):
         return self._model_json["output"]["training_metrics"]._metric_json
 
     def model_performance(self, test_data=None, train=False, valid=False, xval=False, auc_type=None, 
-                          auuc_type=None, auuc_nbins=-1, custom_auuc_thresholds=None):
+                          auuc_type=None):
         """
         Generate model metrics for this model on ``test_data``.
 
@@ -517,16 +517,12 @@ def model_performance(self, test_data=None, train=False, valid=False, xval=False
                               data={"auc_type": auc_type})
             elif auuc_type is not None:
                 assert_is_type(auuc_type, Enum("AUTO", "qini", "gain", "lift"))
-                assert_is_type(custom_auuc_thresholds, [float], None)
-                if custom_auuc_thresholds is not None and len(custom_auuc_thresholds) == 0:
-                    print("WARNING: Model metrics cannot be calculated and metric_json is empty due to the custom_auuc_tresholds are empty.")
-                    return
                 if (self._model_json["treatment_column_name"] is not None) and not(self._model_json["treatment_column_name"] in test_data.names):
                     print("WARNING: Model metrics cannot be calculated and metric_json is empty due to the absence of the treatment column in your dataset.")
                     return
 
                 res = h2o.api("POST /3/ModelMetrics/models/%s/frames/%s" % (self.model_id, test_data.frame_id),
-                              data={"auuc_type": auuc_type, "auuc_nbins": auuc_nbins, "custom_auuc_thresholds": custom_auuc_thresholds})
+                              data={"auuc_type": auuc_type})
             else:
                 res = h2o.api("POST /3/ModelMetrics/models/%s/frames/%s" % (self.model_id, test_data.frame_id))
             # FIXME need to do the client-side filtering...  (https://github.com/h2oai/h2o-3/issues/13862)

diff --git a/h2o-py/tests/testdir_misc/pyunit_make_metrics.py b/h2o-py/tests/testdir_misc/pyunit_make_metrics.py
@@ -198,9 +198,9 @@ def pyunit_make_metrics_uplift():
     train[treatment_column] = train[treatment_column].asfactor()
     train[response_column] = train[response_column].asfactor()
 
-    test = h2o.import_file(pyunit_utils.locate("smalldata/uplift/upliftml_test.csv"))
-    test[treatment_column] = test[treatment_column].asfactor()
-    test[response_column] = test[response_column].asfactor()
+    valid = h2o.import_file(pyunit_utils.locate("smalldata/uplift/upliftml_test.csv"))
+    valid[treatment_column] = valid[treatment_column].asfactor()
+    valid[response_column] = valid[response_column].asfactor()
 
     nbins = 20
     model = H2OUpliftRandomForestEstimator(
@@ -211,57 +211,42 @@ def pyunit_make_metrics_uplift():
         ntrees=3
     )
 
-    model.train(y=response_column, x=feature_cols, training_frame=train, validation_frame=test)
+    model.train(y=response_column, x=feature_cols, training_frame=train, validation_frame=valid)
     # test on validation data, train metrics are affected by sample rate
     m0 = model.model_performance(valid=True)
-    predicted = h2o.assign(model.predict(test)[0], "pred")
-    actual = test[response_column]
-    treatment = test[treatment_column]
-    m1 = model.model_performance(test_data=test, auuc_type="AUTO", auuc_nbins=nbins)
+    predicted = h2o.assign(model.predict(valid)[0], "pred")
+    actual = valid[response_column]
+    treatment = valid[treatment_column]
+    m1 = model.model_performance(test_data=valid, auuc_type="AUTO") 
     m2 = h2o.make_metrics(predicted, actual, treatment=treatment, auuc_type="AUTO", auuc_nbins=nbins)
-    m3 = h2o.make_metrics(predicted, actual, treatment=treatment, auuc_type="AUTO", auuc_nbins=nbins, 
-                          custom_auuc_thresholds=m1.thresholds())
-    m4 = h2o.make_metrics(predicted, actual, treatment=treatment, auuc_type="AUTO", auuc_nbins=nbins, 
-                          custom_auuc_thresholds=model.default_auuc_thresholds())
+
     new_nbins = nbins - 10
-    m5 = h2o.make_metrics(predicted, actual, treatment=treatment, auuc_type="AUTO", auuc_nbins=new_nbins)
-    m6 = model.model_performance(test_data=test, auuc_type="AUTO", auuc_nbins=new_nbins)
+    m3 = h2o.make_metrics(predicted, actual, treatment=treatment, auuc_type="AUTO", auuc_nbins=new_nbins)
 
     print("Model AUUC: {}".format(model.auuc()))
     print("thresholds: {}".format(model.default_auuc_thresholds()))
     print("Model performance AUUC: {}".format(m0.auuc()))
     print("thresholds: {}".format(m0.thresholds()))
-    print("Model performance AUUC: {}".format(m1.auuc()))
+    print("Model performance AUUC recalculate with data: {}".format(m1.auuc()))
     print("thresholds: {}".format(m1.thresholds()))
-    print("Make AUUC with no custom thresholds: {}".format(m2.auuc()))
+    print("Make AUUC: {}".format(m2.auuc()))
     print("thresholds: {}".format(m2.thresholds()))
-    print("Make AUUC with custom thresholds from m1: {}".format(m3.auuc()))
+    print("Make AUUC with new number of bins: {}".format(m3.auuc()))
     print("thresholds: {}".format(m3.thresholds()))
-    print("Make AUUC with custom thresholds from model defaults: {}".format(m4.auuc()))
-    print("thresholds: {}".format(m4.thresholds()))
-    print("Make AUUC with no custom thresholds but change nbins parameter: {}".format(m5.auuc()))
-    print("thresholds: {}".format(m5.thresholds()))
-    print("Performance AUUC with no custom thresholds but change nbins parameter: {}".format(m6.auuc()))
-    print("thresholds: {}".format(m6.thresholds()))
 
     tol = 1e-5
 
     # default model auuc is calculated from train data, default thresholds are from validation data
     assert abs(model.auuc() - m0.auuc()) > tol 
-    # model performance calculates new thresholds but from the same data with the same number of bins, so AUUCs are same
+    # model performance uses default thresholds, so AUUCs are same
     assert abs(m0.auuc() - m1.auuc()) < tol
-    # make method calculates new thresholds but from the same data with the same number of bins, so AUUCs are same
+    # make method calculates new thresholds but from the same data with same nbins so AUUCs are same
     assert abs(m1.auuc() - m2.auuc()) < tol
-    # if we use thresholds from performance metric and use it as custom, it makes the same metrics
-    assert abs(m1.auuc() - m3.auuc()) < tol
-    # make methods with different nbins parameter changes thresholds and AUUC
-    assert abs(m3.auuc() - m5.auuc()) > tol
-    # performance methods with different nbins parameter changes thresholds and AUUC
-    assert abs(m3.auuc() - m6.auuc()) > tol
-    # make and performance method with the same nbins parameter and the same data calculates the same thresholds
-    assert abs(m5.auuc() - m6.auuc()) < tol
+    # make method with the new auuc_nbins parameter calculates the new thresholds
+    assert abs(m2.auuc() - m3.auuc()) > tol
 
     print("===========================")
+
 
 def suite_model_metrics():
 

diff --git a/h2o-r/h2o-package/R/models.R b/h2o-r/h2o-package/R/models.R
@@ -1039,7 +1039,7 @@ h2o.feature_frequencies <- feature_frequencies.H2OModel
 #' h2o.performance(model = prostate_gbm_balanced, train = TRUE)
 #' }
 #' @export
-h2o.performance <- function(model, newdata=NULL, train=FALSE, valid=FALSE, xval=FALSE, data=NULL, auc_type="NONE", auuc_type="NONE", auuc_nbins=-1) {
+h2o.performance <- function(model, newdata=NULL, train=FALSE, valid=FALSE, xval=FALSE, data=NULL, auc_type="NONE", auuc_type="NONE") {
 
   # data is now deprecated and the new arg name is newdata
   if (!is.null(data)) {
@@ -1082,10 +1082,7 @@ h2o.performance <- function(model, newdata=NULL, train=FALSE, valid=FALSE, xval=
         parms[["auuc_type"]] <- auuc_type
     } else if(!is.null(model@parameters$auuc_type) && model@parameters$auuc_type != "NONE"){
         parms[["auuc_type"]] <- model@parameters$auuc_type
-    }  
-    if(auuc_nbins > 0){
-        parms[["auuc_nbins"]] <- auuc_nbins
-    }  
+    }
     res <- .h2o.__remoteSend(method = "POST", .h2o.__MODEL_METRICS(model@model_id, newdata.id), .params = parms)
 
     ####
@@ -1144,7 +1141,7 @@ h2o.performance <- function(model, newdata=NULL, train=FALSE, valid=FALSE, xval=
 #' }
 #' @export
 h2o.make_metrics <- function(predicted, actuals, domain=NULL, distribution=NULL, weights=NULL, treatment=NULL, 
-                                auc_type="NONE", auuc_type="AUTO", auuc_nbins=-1, custom_auuc_thresholds=NULL) {
+                                auc_type="NONE", auuc_type="AUTO", auuc_nbins=-1) {
   predicted <- .validate.H2OFrame(predicted, required=TRUE)
   actuals <- .validate.H2OFrame(actuals, required=TRUE)
   weights <- .validate.H2OFrame(weights, required=FALSE)
@@ -1169,7 +1166,6 @@ h2o.make_metrics <- function(predicted, actuals, domain=NULL, distribution=NULL,
       }
       params$auuc_type <- auuc_type
       params$auuc_nbins <- auuc_nbins
-      params$custom_auuc_thresholds <- paste("[", paste(custom_auuc_thresholds, collapse = ", "),"]")
   }
   params$domain <- domain
   params$distribution <- distribution