Skip to content

Commit

Permalink
Expose weights column
Browse files Browse the repository at this point in the history
  • Loading branch information
valenad1 committed Sep 14, 2023
1 parent beeeaf6 commit 7e1e286
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 1 deletion.
3 changes: 2 additions & 1 deletion h2o-algos/src/main/java/hex/schemas/AdaBoostV3.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@ public static final class AdaBoostParametersV3 extends ModelParametersSchemaV3<A
"ignored_columns",
"ignore_const_cols",
"categorical_encoding",
"weights_column",

// Extended Isolation Forest specific
// AdaBoost specific
"n_estimators",
"weak_learner",
"learning_rate",
Expand Down
31 changes: 31 additions & 0 deletions h2o-py/h2o/estimators/adaboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def __init__(self,
ignored_columns=None, # type: Optional[List[str]]
ignore_const_cols=True, # type: bool
categorical_encoding="auto", # type: Literal["auto", "enum", "one_hot_internal", "one_hot_explicit", "binary", "eigen", "label_encoder", "sort_by_response", "enum_limited"]
weights_column=None, # type: Optional[str]
n_estimators=50, # type: int
weak_learner="auto", # type: Literal["auto", "drf", "glm"]
learning_rate=0.5, # type: float
Expand All @@ -49,6 +50,15 @@ def __init__(self,
Defaults to ``"auto"``.
:type categorical_encoding: Literal["auto", "enum", "one_hot_internal", "one_hot_explicit", "binary", "eigen", "label_encoder",
"sort_by_response", "enum_limited"]
:param weights_column: Column with observation weights. Giving some observation a weight of zero is equivalent
to excluding it from the dataset; giving an observation a relative weight of 2 is equivalent to repeating
that row twice. Negative weights are not allowed. Note: Weights are per-row observation weights and do
not increase the size of the data frame. This is typically the number of times a row is repeated, but
non-integer values are supported as well. During training, rows with higher weights matter more, due to
the larger loss function pre-factor. If you set weight = 0 for a row, the returned prediction frame at
that row is zero and this is incorrect. To get an accurate prediction, remove all rows with weight == 0.
Defaults to ``None``.
:type weights_column: str, optional
:param n_estimators: Number of AdaBoost weak learners.
Defaults to ``50``.
:type n_estimators: int
Expand All @@ -69,6 +79,7 @@ def __init__(self,
self.ignored_columns = ignored_columns
self.ignore_const_cols = ignore_const_cols
self.categorical_encoding = categorical_encoding
self.weights_column = weights_column
self.n_estimators = n_estimators
self.weak_learner = weak_learner
self.learning_rate = learning_rate
Expand Down Expand Up @@ -130,6 +141,26 @@ def categorical_encoding(self, categorical_encoding):
assert_is_type(categorical_encoding, None, Enum("auto", "enum", "one_hot_internal", "one_hot_explicit", "binary", "eigen", "label_encoder", "sort_by_response", "enum_limited"))
self._parms["categorical_encoding"] = categorical_encoding

@property
def weights_column(self):
"""
Column with observation weights. Giving some observation a weight of zero is equivalent to excluding it from the
dataset; giving an observation a relative weight of 2 is equivalent to repeating that row twice. Negative
weights are not allowed. Note: Weights are per-row observation weights and do not increase the size of the data
frame. This is typically the number of times a row is repeated, but non-integer values are supported as well.
During training, rows with higher weights matter more, due to the larger loss function pre-factor. If you set
weight = 0 for a row, the returned prediction frame at that row is zero and this is incorrect. To get an
accurate prediction, remove all rows with weight == 0.
Type: ``str``.
"""
return self._parms.get("weights_column")

@weights_column.setter
def weights_column(self, weights_column):
assert_is_type(weights_column, None, str)
self._parms["weights_column"] = weights_column

@property
def n_estimators(self):
"""
Expand Down
13 changes: 13 additions & 0 deletions h2o-r/h2o-package/R/adaboost.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,13 @@
#' @param ignore_const_cols \code{Logical}. Ignore constant columns. Defaults to TRUE.
#' @param categorical_encoding Encoding scheme for categorical features Must be one of: "AUTO", "Enum", "OneHotInternal", "OneHotExplicit",
#' "Binary", "Eigen", "LabelEncoder", "SortByResponse", "EnumLimited". Defaults to AUTO.
#' @param weights_column Column with observation weights. Giving some observation a weight of zero is equivalent to excluding it from
#' the dataset; giving an observation a relative weight of 2 is equivalent to repeating that row twice. Negative
#' weights are not allowed. Note: Weights are per-row observation weights and do not increase the size of the
#' data frame. This is typically the number of times a row is repeated, but non-integer values are supported as
#' well. During training, rows with higher weights matter more, due to the larger loss function pre-factor. If
#' you set weight = 0 for a row, the returned prediction frame at that row is zero and this is incorrect. To get
#' an accurate prediction, remove all rows with weight == 0.
#' @param n_estimators Number of AdaBoost weak learners. Defaults to 50.
#' @param weak_learner Weak learner Must be one of: "AUTO", "DRF", "GLM". Defaults to AUTO.
#' @param learning_rate Learning rate Defaults to 0.5.
Expand Down Expand Up @@ -48,6 +55,7 @@ h2o.adaBoost <- function(x,
model_id = NULL,
ignore_const_cols = TRUE,
categorical_encoding = c("AUTO", "Enum", "OneHotInternal", "OneHotExplicit", "Binary", "Eigen", "LabelEncoder", "SortByResponse", "EnumLimited"),
weights_column = NULL,
n_estimators = 50,
weak_learner = c("AUTO", "DRF", "GLM"),
learning_rate = 0.5,
Expand Down Expand Up @@ -79,6 +87,8 @@ h2o.adaBoost <- function(x,
parms$ignore_const_cols <- ignore_const_cols
if (!missing(categorical_encoding))
parms$categorical_encoding <- categorical_encoding
if (!missing(weights_column))
parms$weights_column <- weights_column
if (!missing(n_estimators))
parms$n_estimators <- n_estimators
if (!missing(weak_learner))
Expand All @@ -97,6 +107,7 @@ h2o.adaBoost <- function(x,
training_frame,
ignore_const_cols = TRUE,
categorical_encoding = c("AUTO", "Enum", "OneHotInternal", "OneHotExplicit", "Binary", "Eigen", "LabelEncoder", "SortByResponse", "EnumLimited"),
weights_column = NULL,
n_estimators = 50,
weak_learner = c("AUTO", "DRF", "GLM"),
learning_rate = 0.5,
Expand Down Expand Up @@ -133,6 +144,8 @@ h2o.adaBoost <- function(x,
parms$ignore_const_cols <- ignore_const_cols
if (!missing(categorical_encoding))
parms$categorical_encoding <- categorical_encoding
if (!missing(weights_column))
parms$weights_column <- weights_column
if (!missing(n_estimators))
parms$n_estimators <- n_estimators
if (!missing(weak_learner))
Expand Down

0 comments on commit 7e1e286

Please sign in to comment.