Skip to content

Commit d24260f

Browse files
authored
[R-package] require lgb.Dataset, remove support for passing 'colnames' and 'categorical_feature' for lgb.train() and lgb.cv() (#6714)
1 parent c6d90bc commit d24260f

13 files changed

+20
-163
lines changed

R-package/R/callback.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ CB_ENV <- R6::R6Class(
6767

6868
}
6969

70-
return(paste0(msg, collapse = " "))
70+
return(paste(msg, collapse = " "))
7171

7272
}
7373

R-package/R/lgb.Dataset.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -457,7 +457,7 @@ Dataset <- R6::R6Class(
457457
if (!.is_null_handle(x = private$handle)) {
458458

459459
# Merge names with tab separation
460-
merged_name <- paste0(as.list(private$colnames), collapse = "\t")
460+
merged_name <- paste(as.list(private$colnames), collapse = "\t")
461461
.Call(
462462
LGBM_DatasetSetFeatureNames_R
463463
, private$handle

R-package/R/lgb.convert_with_rules.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
vapply(
66
X = df
77
, FUN = function(x) {
8-
paste0(class(x), collapse = ",")
8+
paste(class(x), collapse = ",")
99
}
1010
, FUN.VALUE = character(1L)
1111
)

R-package/R/lgb.cv.R

+2-61
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@ CVBooster <- R6::R6Class(
2525
#' @description Cross validation logic used by LightGBM
2626
#' @inheritParams lgb_shared_params
2727
#' @param nfold the original dataset is randomly partitioned into \code{nfold} equal size subsamples.
28-
#' @param label Deprecated. See "Deprecated Arguments" section below.
29-
#' @param weight Deprecated. See "Deprecated Arguments" section below.
3028
#' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals}
3129
#' @param showsd \code{boolean}, whether to show standard deviation of cross validation.
3230
#' This parameter defaults to \code{TRUE}. Setting it to \code{FALSE} can lead to a
@@ -36,8 +34,6 @@ CVBooster <- R6::R6Class(
3634
#' @param folds \code{list} provides a possibility to use a list of pre-defined CV folds
3735
#' (each element must be a vector of test fold's indices). When folds are supplied,
3836
#' the \code{nfold} and \code{stratified} parameters are ignored.
39-
#' @param colnames Deprecated. See "Deprecated Arguments" section below.
40-
#' @param categorical_feature Deprecated. See "Deprecated Arguments" section below.
4137
#' @param callbacks List of callback functions that are applied at each iteration.
4238
#' @param reset_data Boolean, setting it to TRUE (not the default value) will transform the booster model
4339
#' into a predictor model which frees up memory and the original datasets
@@ -69,20 +65,12 @@ CVBooster <- R6::R6Class(
6965
#' )
7066
#' }
7167
#'
72-
#' @section Deprecated Arguments:
73-
#'
74-
#' A future release of \code{lightgbm} will require passing an \code{lgb.Dataset}
75-
#' to argument \code{'data'}. It will also remove support for passing arguments
76-
#' \code{'categorical_feature'}, \code{'colnames'}, \code{'label'}, and \code{'weight'}.
77-
#'
7868
#' @importFrom data.table data.table setorderv
7969
#' @export
8070
lgb.cv <- function(params = list()
8171
, data
8272
, nrounds = 100L
8373
, nfold = 3L
84-
, label = NULL
85-
, weight = NULL
8674
, obj = NULL
8775
, eval = NULL
8876
, verbose = 1L
@@ -92,8 +80,6 @@ lgb.cv <- function(params = list()
9280
, stratified = TRUE
9381
, folds = NULL
9482
, init_model = NULL
95-
, colnames = NULL
96-
, categorical_feature = NULL
9783
, early_stopping_rounds = NULL
9884
, callbacks = list()
9985
, reset_data = FALSE
@@ -104,33 +90,8 @@ lgb.cv <- function(params = list()
10490
if (nrounds <= 0L) {
10591
stop("nrounds should be greater than zero")
10692
}
107-
108-
# If 'data' is not an lgb.Dataset, try to construct one using 'label'
10993
if (!.is_Dataset(x = data)) {
110-
warning(paste0(
111-
"Passing anything other than an lgb.Dataset object to lgb.cv() is deprecated. "
112-
, "Either pass an lgb.Dataset object, or use lightgbm()."
113-
))
114-
if (is.null(label)) {
115-
stop("'label' must be provided for lgb.cv if 'data' is not an 'lgb.Dataset'")
116-
}
117-
data <- lgb.Dataset(data = data, label = label)
118-
}
119-
120-
# raise deprecation warnings if necessary
121-
# ref: https://github.com/microsoft/LightGBM/issues/6435
122-
args <- names(match.call())
123-
if ("categorical_feature" %in% args) {
124-
.emit_dataset_kwarg_warning("categorical_feature", "lgb.cv")
125-
}
126-
if ("colnames" %in% args) {
127-
.emit_dataset_kwarg_warning("colnames", "lgb.cv")
128-
}
129-
if ("label" %in% args) {
130-
.emit_dataset_kwarg_warning("label", "lgb.cv")
131-
}
132-
if ("weight" %in% args) {
133-
.emit_dataset_kwarg_warning("weight", "lgb.cv")
94+
stop("lgb.cv: data must be an lgb.Dataset instance")
13495
}
13596

13697
# set some parameters, resolving the way they were passed in with other parameters
@@ -214,37 +175,17 @@ lgb.cv <- function(params = list()
214175
data$construct()
215176

216177
# Check interaction constraints
217-
cnames <- NULL
218-
if (!is.null(colnames)) {
219-
cnames <- colnames
220-
} else if (!is.null(data$get_colnames())) {
221-
cnames <- data$get_colnames()
222-
}
223178
params[["interaction_constraints"]] <- .check_interaction_constraints(
224179
interaction_constraints = interaction_constraints
225-
, column_names = cnames
180+
, column_names = data$get_colnames()
226181
)
227182

228-
if (!is.null(weight)) {
229-
data$set_field(field_name = "weight", data = weight)
230-
}
231-
232183
# Update parameters with parsed parameters
233184
data$update_params(params = params)
234185

235186
# Create the predictor set
236187
data$.__enclos_env__$private$set_predictor(predictor = predictor)
237188

238-
# Write column names
239-
if (!is.null(colnames)) {
240-
data$set_colnames(colnames = colnames)
241-
}
242-
243-
# Write categorical features
244-
if (!is.null(categorical_feature)) {
245-
data$set_categorical_feature(categorical_feature = categorical_feature)
246-
}
247-
248189
if (!is.null(folds)) {
249190

250191
# Check for list of folds or for single value

R-package/R/lgb.train.R

+1-35
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
#' @inheritParams lgb_shared_params
77
#' @param valids a list of \code{lgb.Dataset} objects, used for validation
88
#' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals}
9-
#' @param colnames Deprecated. See "Deprecated Arguments" section below.
10-
#' @param categorical_feature Deprecated. See "Deprecated Arguments" section below.
119
#' @param callbacks List of callback functions that are applied at each iteration.
1210
#' @param reset_data Boolean, setting it to TRUE (not the default value) will transform the
1311
#' booster model into a predictor model which frees up memory and the
@@ -42,12 +40,6 @@
4240
#' )
4341
#' }
4442
#'
45-
#' @section Deprecated Arguments:
46-
#'
47-
#' A future release of \code{lightgbm} will remove support for passing arguments
48-
#' \code{'categorical_feature'} and \code{'colnames'}. Pass those things to
49-
#' \code{lgb.Dataset} instead.
50-
#'
5143
#' @export
5244
lgb.train <- function(params = list(),
5345
data,
@@ -59,8 +51,6 @@ lgb.train <- function(params = list(),
5951
record = TRUE,
6052
eval_freq = 1L,
6153
init_model = NULL,
62-
colnames = NULL,
63-
categorical_feature = NULL,
6454
early_stopping_rounds = NULL,
6555
callbacks = list(),
6656
reset_data = FALSE,
@@ -83,16 +73,6 @@ lgb.train <- function(params = list(),
8373
}
8474
}
8575

86-
# raise deprecation warnings if necessary
87-
# ref: https://github.com/microsoft/LightGBM/issues/6435
88-
args <- names(match.call())
89-
if ("categorical_feature" %in% args) {
90-
.emit_dataset_kwarg_warning("categorical_feature", "lgb.train")
91-
}
92-
if ("colnames" %in% args) {
93-
.emit_dataset_kwarg_warning("colnames", "lgb.train")
94-
}
95-
9676
# set some parameters, resolving the way they were passed in with other parameters
9777
# in `params`.
9878
# this ensures that the model stored with Booster$save() correctly represents
@@ -171,21 +151,12 @@ lgb.train <- function(params = list(),
171151

172152
# Construct datasets, if needed
173153
data$update_params(params = params)
174-
if (!is.null(categorical_feature)) {
175-
data$set_categorical_feature(categorical_feature)
176-
}
177154
data$construct()
178155

179156
# Check interaction constraints
180-
cnames <- NULL
181-
if (!is.null(colnames)) {
182-
cnames <- colnames
183-
} else if (!is.null(data$get_colnames())) {
184-
cnames <- data$get_colnames()
185-
}
186157
params[["interaction_constraints"]] <- .check_interaction_constraints(
187158
interaction_constraints = interaction_constraints
188-
, column_names = cnames
159+
, column_names = data$get_colnames()
189160
)
190161

191162
# Update parameters with parsed parameters
@@ -194,11 +165,6 @@ lgb.train <- function(params = list(),
194165
# Create the predictor set
195166
data$.__enclos_env__$private$set_predictor(predictor)
196167

197-
# Write column names
198-
if (!is.null(colnames)) {
199-
data$set_colnames(colnames)
200-
}
201-
202168
valid_contain_train <- FALSE
203169
train_data_name <- "train"
204170
reduced_valid_sets <- list()

R-package/R/utils.R

+4-20
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434

3535
# If a parameter has multiple values, join those values together with commas.
3636
# trimws() is necessary because format() will pad to make strings the same width
37-
val <- paste0(
37+
val <- paste(
3838
trimws(
3939
format(
4040
x = unname(params[[i]])
@@ -46,7 +46,7 @@
4646
if (nchar(val) <= 0L) next # Skip join
4747

4848
# Join key value
49-
pair <- paste0(c(param_names[[i]], val), collapse = "=")
49+
pair <- paste(c(param_names[[i]], val), collapse = "=")
5050
ret <- c(ret, pair)
5151

5252
}
@@ -55,7 +55,7 @@
5555
return("")
5656
}
5757

58-
return(paste0(ret, collapse = " "))
58+
return(paste(ret, collapse = " "))
5959

6060
}
6161

@@ -115,7 +115,7 @@
115115
# Turn indices 0-based and convert to string
116116
for (j in seq_along(interaction_constraints)) {
117117
interaction_constraints[[j]] <- paste0(
118-
"[", paste0(interaction_constraints[[j]] - 1L, collapse = ","), "]"
118+
"[", paste(interaction_constraints[[j]] - 1L, collapse = ","), "]"
119119
)
120120
}
121121
return(interaction_constraints)
@@ -258,19 +258,3 @@
258258
return(a == b)
259259
}
260260
}
261-
262-
# ref: https://github.com/microsoft/LightGBM/issues/6435
263-
.emit_dataset_kwarg_warning <- function(calling_function, argname) {
264-
msg <- sprintf(
265-
paste0(
266-
"Argument '%s' to %s() is deprecated and will be removed in a future release. "
267-
, "Set '%s' with lgb.Dataset() instead. "
268-
, "See https://github.com/microsoft/LightGBM/issues/6435."
269-
)
270-
, argname
271-
, calling_function
272-
, argname
273-
)
274-
warning(msg)
275-
return(invisible(NULL))
276-
}

R-package/man/lgb.cv.Rd

-20
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

R-package/man/lgb.train.Rd

-14
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

R-package/src/install.libs.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ inst_dir <- file.path(R_PACKAGE_SOURCE, "inst", fsep = "/")
5151
, "make this faster."
5252
))
5353
}
54-
cmd <- paste0(cmd, " ", paste0(args, collapse = " "))
54+
cmd <- paste0(cmd, " ", paste(args, collapse = " "))
5555
exit_code <- system(cmd)
5656
}
5757

0 commit comments

Comments
 (0)