Skip to content

Commit

Permalink
Refactor n_estimators to nlearners
Browse files Browse the repository at this point in the history
  • Loading branch information
valenad1 committed Sep 19, 2023
1 parent 41c2e2a commit 762445b
Show file tree
Hide file tree
Showing 7 changed files with 42 additions and 44 deletions.
12 changes: 6 additions & 6 deletions h2o-algos/src/main/java/hex/adaboost/AdaBoost.java
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,9 @@ public boolean haveMojo() {
@Override
public void init(boolean expensive) {
super.init(expensive);
if(_parms._n_estimators < 1 || _parms._n_estimators > MAX_ESTIMATORS)
if(_parms._nlearners < 1 || _parms._nlearners > MAX_ESTIMATORS)
error("n_estimators", "Parameter n_estimators must be in interval [1, "
+ MAX_ESTIMATORS + "] but it is " + _parms._n_estimators);
+ MAX_ESTIMATORS + "] but it is " + _parms._nlearners);
if (_parms._weak_learner == AdaBoostModel.Algorithm.AUTO) {
_parms._weak_learner = AdaBoostModel.Algorithm.DRF;
}
Expand Down Expand Up @@ -96,8 +96,8 @@ public void computeImpl() {
}

private void buildAdaboost() {
_model._output.alphas = new double[(int)_parms._n_estimators];
_model._output.models = new Key[(int)_parms._n_estimators];
_model._output.alphas = new double[(int)_parms._nlearners];
_model._output.models = new Key[(int)_parms._nlearners];

Frame _trainWithWeights;
if (_parms._weights_column == null) {
Expand All @@ -112,7 +112,7 @@ private void buildAdaboost() {
_trainWithWeights = _parms.train();
}

for (int n = 0; n < _parms._n_estimators; n++) {
for (int n = 0; n < _parms._nlearners; n++) {
Timer timer = new Timer();
ModelBuilder job = chooseWeakLearner(_trainWithWeights);
job._parms._seed += n;
Expand Down Expand Up @@ -233,7 +233,7 @@ public TwoDimTable createModelSummaryTable() {
"");
int row = 0;
int col = 0;
table.set(row, col++, _parms._n_estimators);
table.set(row, col++, _parms._nlearners);
table.set(row, col++, _parms._learn_rate);
table.set(row, col++, _parms._weak_learner.toString());
table.set(row, col, _parms._seed);
Expand Down
6 changes: 3 additions & 3 deletions h2o-algos/src/main/java/hex/adaboost/AdaBoostModel.java
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ public static class AdaBoostParameters extends Model.Parameters {
/**
* Number of weak learners to train. Defaults to 50.
*/
public int _n_estimators;
public int _nlearners;

/**
* Choose a weak learner type. Defaults to DRF.
Expand Down Expand Up @@ -121,12 +121,12 @@ public String javaName() {

@Override
public long progressUnits() {
return _n_estimators;
return _nlearners;
}

public AdaBoostParameters() {
super();
_n_estimators = 50;
_nlearners = 50;
_weak_learner = Algorithm.AUTO;
_learn_rate = 0.5;
}
Expand Down
4 changes: 2 additions & 2 deletions h2o-algos/src/main/java/hex/schemas/AdaBoostV3.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@ public static final class AdaBoostParametersV3 extends ModelParametersSchemaV3<A
"weights_column",

// AdaBoost specific
"n_estimators",
"nlearners",
"weak_learner",
"learn_rate",
"seed",
};

@API(help = "Number of AdaBoost weak learners.", gridable = true)
public int n_estimators;
public int nlearners;

@API(help = "Choose a weak learner type. Defaults to DRF.", gridable = true, values = {"AUTO", "DRF", "GLM", "GBM"})
public AdaBoostModel.Algorithm weak_learner;
Expand Down
28 changes: 13 additions & 15 deletions h2o-algos/src/test/java/hex/adaboost/AdaBoostTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import hex.Model;
import hex.genmodel.algos.tree.SharedTreeSubgraph;
import hex.tree.drf.DRFModel;
import hex.tree.gbm.GBM;
import hex.tree.gbm.GBMModel;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
Expand Down Expand Up @@ -49,7 +47,7 @@ public void testBasicTrain() {
AdaBoostModel.AdaBoostParameters p = new AdaBoostModel.AdaBoostParameters();
p._train = train._key;
p._seed = 0xDECAF;
p._n_estimators = 50;
p._nlearners = 50;
p._response_column = response;

AdaBoost adaBoost = new AdaBoost(p);
Expand Down Expand Up @@ -89,7 +87,7 @@ public void testBasicTrainGLM() {
AdaBoostModel.AdaBoostParameters p = new AdaBoostModel.AdaBoostParameters();
p._train = train._key;
p._seed = 0xDECAF;
p._n_estimators = 50;
p._nlearners = 50;
p._weak_learner = AdaBoostModel.Algorithm.GLM;
p._response_column = response;

Expand All @@ -113,7 +111,7 @@ public void testBasicTrainLarge() {
AdaBoostModel.AdaBoostParameters p = new AdaBoostModel.AdaBoostParameters();
p._train = train._key;
p._seed = 0xDECAF;
p._n_estimators = 50;
p._nlearners = 50;
p._response_column = response;

AdaBoost adaBoost = new AdaBoost(p);
Expand All @@ -136,7 +134,7 @@ public void testBasicTrainAndScore() {
AdaBoostModel.AdaBoostParameters p = new AdaBoostModel.AdaBoostParameters();
p._train = train._key;
p._seed = 0xDECAF;
p._n_estimators = 50;
p._nlearners = 50;
p._response_column = response;

AdaBoost adaBoost = new AdaBoost(p);
Expand Down Expand Up @@ -168,7 +166,7 @@ public void testBasicTrainAndScoreCategorical() {
AdaBoostModel.AdaBoostParameters p = new AdaBoostModel.AdaBoostParameters();
p._train = train._key;
p._seed = 0xDECAF;
p._n_estimators = 50;
p._nlearners = 50;
p._response_column = response;
p._categorical_encoding = Model.Parameters.CategoricalEncodingScheme.OneHotExplicit;

Expand Down Expand Up @@ -197,7 +195,7 @@ public void testBasicTrainAndScoreLarge() {
AdaBoostModel.AdaBoostParameters p = new AdaBoostModel.AdaBoostParameters();
p._train = train._key;
p._seed = 0xDECAF;
p._n_estimators = 50;
p._nlearners = 50;
p._response_column = response;

AdaBoost adaBoost = new AdaBoost(p);
Expand Down Expand Up @@ -226,7 +224,7 @@ public void testBasicTrainAirlines() {
AdaBoostModel.AdaBoostParameters p = new AdaBoostModel.AdaBoostParameters();
p._train = train._key;
p._seed = 0xDECAF;
p._n_estimators = 50;
p._nlearners = 50;
p._response_column = response;

AdaBoost adaBoost = new AdaBoost(p);
Expand Down Expand Up @@ -255,7 +253,7 @@ public void testBasicTrainHiggs() {
AdaBoostModel.AdaBoostParameters p = new AdaBoostModel.AdaBoostParameters();
p._train = train._key;
p._seed = 0xDECAF;
p._n_estimators = 50;
p._nlearners = 50;
p._response_column = response;

AdaBoost adaBoost = new AdaBoost(p);
Expand Down Expand Up @@ -330,7 +328,7 @@ public void testBasicTrainAndScoreWithExternalWeightsColumn() {
AdaBoostModel.AdaBoostParameters p = new AdaBoostModel.AdaBoostParameters();
p._train = train._key;
p._seed = 0xDECAF;
p._n_estimators = 10;
p._nlearners = 10;
p._response_column = response;

AdaBoost adaBoostReference = new AdaBoost(p);
Expand Down Expand Up @@ -374,7 +372,7 @@ public void testBasicTrainAndScoreWithCustomWeightsColumn() {
AdaBoostModel.AdaBoostParameters p = new AdaBoostModel.AdaBoostParameters();
p._train = train._key;
p._seed = 0xDECAF;
p._n_estimators = 10;
p._nlearners = 10;
p._response_column = response;

AdaBoost adaBoostReference = new AdaBoost(p);
Expand Down Expand Up @@ -419,7 +417,7 @@ public void testBasicTrainAndScoreWithDuplicatedWeightsColumn() {
AdaBoostModel.AdaBoostParameters p = new AdaBoostModel.AdaBoostParameters();
p._train = train._key;
p._seed = 0xDECAF;
p._n_estimators = 10;
p._nlearners = 10;
p._response_column = response;
p._ignore_const_cols = false;

Expand Down Expand Up @@ -447,7 +445,7 @@ public void testBasicTrainAndScoreGLM() {
AdaBoostModel.AdaBoostParameters p = new AdaBoostModel.AdaBoostParameters();
p._train = train._key;
p._seed = 0xDECAF;
p._n_estimators = 50;
p._nlearners = 50;
p._weak_learner = AdaBoostModel.Algorithm.GLM;
p._response_column = response;

Expand All @@ -473,7 +471,7 @@ public void testBasicTrainAndScoreGBM() {
AdaBoostModel.AdaBoostParameters p = new AdaBoostModel.AdaBoostParameters();
p._train = train._key;
p._seed = 0xDECAF;
p._n_estimators = 50;
p._nlearners = 50;
p._weak_learner = AdaBoostModel.Algorithm.GBM;
p._response_column = response;

Expand Down
20 changes: 10 additions & 10 deletions h2o-py/h2o/estimators/adaboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def __init__(self,
ignore_const_cols=True, # type: bool
categorical_encoding="auto", # type: Literal["auto", "enum", "one_hot_internal", "one_hot_explicit", "binary", "eigen", "label_encoder", "sort_by_response", "enum_limited"]
weights_column=None, # type: Optional[str]
n_estimators=50, # type: int
nlearners=50, # type: int
weak_learner="auto", # type: Literal["auto", "drf", "glm", "gbm"]
learn_rate=0.5, # type: float
seed=-1, # type: int
Expand Down Expand Up @@ -59,9 +59,9 @@ def __init__(self,
that row is zero and this is incorrect. To get an accurate prediction, remove all rows with weight == 0.
Defaults to ``None``.
:type weights_column: str, optional
:param n_estimators: Number of AdaBoost weak learners.
:param nlearners: Number of AdaBoost weak learners.
Defaults to ``50``.
:type n_estimators: int
:type nlearners: int
:param weak_learner: Choose a weak learner type. Defaults to DRF.
Defaults to ``"auto"``.
:type weak_learner: Literal["auto", "drf", "glm", "gbm"]
Expand All @@ -80,7 +80,7 @@ def __init__(self,
self.ignore_const_cols = ignore_const_cols
self.categorical_encoding = categorical_encoding
self.weights_column = weights_column
self.n_estimators = n_estimators
self.nlearners = nlearners
self.weak_learner = weak_learner
self.learn_rate = learn_rate
self.seed = seed
Expand Down Expand Up @@ -162,18 +162,18 @@ def weights_column(self, weights_column):
self._parms["weights_column"] = weights_column

@property
def n_estimators(self):
def nlearners(self):
"""
Number of AdaBoost weak learners.
Type: ``int``, defaults to ``50``.
"""
return self._parms.get("n_estimators")
return self._parms.get("nlearners")

@n_estimators.setter
def n_estimators(self, n_estimators):
assert_is_type(n_estimators, None, int)
self._parms["n_estimators"] = n_estimators
@nlearners.setter
def nlearners(self, nlearners):
assert_is_type(nlearners, None, int)
self._parms["nlearners"] = nlearners

@property
def weak_learner(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def adaboost():
train = h2o.import_file(pyunit_utils.locate("smalldata/prostate/prostate.csv"))
train["CAPSULE"] = train["CAPSULE"].asfactor()

adaboost_model = H2OAdaBoostEstimator(n_estimators=50, seed=0xBEEF, weak_learner="DRF", learn_rate=0.5)
adaboost_model = H2OAdaBoostEstimator(nlearners=50, seed=0xBEEF, weak_learner="DRF", learn_rate=0.5)
adaboost_model.train(training_frame=train, y="CAPSULE")
predict = adaboost_model.predict(train)

Expand Down
14 changes: 7 additions & 7 deletions h2o-r/h2o-package/R/adaboost.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#' well. During training, rows with higher weights matter more, due to the larger loss function pre-factor. If
#' you set weight = 0 for a row, the returned prediction frame at that row is zero and this is incorrect. To get
#' an accurate prediction, remove all rows with weight == 0.
#' @param n_estimators Number of AdaBoost weak learners. Defaults to 50.
#' @param nlearners Number of AdaBoost weak learners. Defaults to 50.
#' @param weak_learner Choose a weak learner type. Defaults to DRF. Must be one of: "AUTO", "DRF", "GLM", "GBM". Defaults to AUTO.
#' @param learn_rate Learning rate (from 0.0 to 1.0) Defaults to 0.5.
#' @param seed Seed for random numbers (affects certain parts of the algo that are stochastic and those might or might not be enabled by default).
Expand Down Expand Up @@ -56,7 +56,7 @@ h2o.adaBoost <- function(x,
ignore_const_cols = TRUE,
categorical_encoding = c("AUTO", "Enum", "OneHotInternal", "OneHotExplicit", "Binary", "Eigen", "LabelEncoder", "SortByResponse", "EnumLimited"),
weights_column = NULL,
n_estimators = 50,
nlearners = 50,
weak_learner = c("AUTO", "DRF", "GLM", "GBM"),
learn_rate = 0.5,
seed = -1)
Expand Down Expand Up @@ -89,8 +89,8 @@ h2o.adaBoost <- function(x,
parms$categorical_encoding <- categorical_encoding
if (!missing(weights_column))
parms$weights_column <- weights_column
if (!missing(n_estimators))
parms$n_estimators <- n_estimators
if (!missing(nlearners))
parms$nlearners <- nlearners
if (!missing(weak_learner))
parms$weak_learner <- weak_learner
if (!missing(learn_rate))
Expand All @@ -108,7 +108,7 @@ h2o.adaBoost <- function(x,
ignore_const_cols = TRUE,
categorical_encoding = c("AUTO", "Enum", "OneHotInternal", "OneHotExplicit", "Binary", "Eigen", "LabelEncoder", "SortByResponse", "EnumLimited"),
weights_column = NULL,
n_estimators = 50,
nlearners = 50,
weak_learner = c("AUTO", "DRF", "GLM", "GBM"),
learn_rate = 0.5,
seed = -1,
Expand Down Expand Up @@ -146,8 +146,8 @@ h2o.adaBoost <- function(x,
parms$categorical_encoding <- categorical_encoding
if (!missing(weights_column))
parms$weights_column <- weights_column
if (!missing(n_estimators))
parms$n_estimators <- n_estimators
if (!missing(nlearners))
parms$nlearners <- nlearners
if (!missing(weak_learner))
parms$weak_learner <- weak_learner
if (!missing(learn_rate))
Expand Down

0 comments on commit 762445b

Please sign in to comment.