@@ -53,6 +53,32 @@ impl Default for TreeMethod {
53
53
fn default ( ) -> Self { TreeMethod :: Auto }
54
54
}
55
55
56
+ impl From < String > for TreeMethod
57
+ {
58
+ fn from ( s : String ) -> Self
59
+ {
60
+ use std:: borrow:: Borrow ;
61
+ Self :: from ( s. borrow ( ) )
62
+ }
63
+ }
64
+
65
+ impl < ' a > From < & ' a str > for TreeMethod
66
+ {
67
+ fn from ( s : & ' a str ) -> Self
68
+ {
69
+ match s
70
+ {
71
+ "auto" => TreeMethod :: Auto ,
72
+ "exact" => TreeMethod :: Exact ,
73
+ "approx" => TreeMethod :: Approx ,
74
+ "hist" => TreeMethod :: Hist ,
75
+ "gpu_exact" => TreeMethod :: GpuExact ,
76
+ "gpu_hist" => TreeMethod :: GpuHist ,
77
+ _ => panic ! ( "no known tree_method for {}" , s)
78
+ }
79
+ }
80
+ }
81
+
56
82
/// Provides a modular way to construct and to modify the trees. This is an advanced parameter that is usually set
57
83
/// automatically, depending on some other parameters. However, it could be also set explicitly by a user.
58
84
#[ derive( Clone ) ]
@@ -191,7 +217,7 @@ pub struct TreeBoosterParameters {
191
217
///
192
218
/// * range: [0,∞]
193
219
/// * default: 0
194
- gamma : u32 ,
220
+ gamma : f32 ,
195
221
196
222
/// Maximum depth of a tree, increase this value will make the model more complex / likely to be overfitting.
197
223
/// 0 indicates no limit, limit is required for depth-wise grow policy.
@@ -208,7 +234,7 @@ pub struct TreeBoosterParameters {
208
234
///
209
235
/// * range: [0,∞]
210
236
/// * default: 1
211
- min_child_weight : u32 ,
237
+ min_child_weight : f32 ,
212
238
213
239
/// Maximum delta step we allow each tree’s weight estimation to be.
214
240
/// If the value is set to 0, it means there is no constraint. If it is set to a positive value,
@@ -218,7 +244,7 @@ pub struct TreeBoosterParameters {
218
244
///
219
245
/// * range: [0,∞]
220
246
/// * default: 0
221
- max_delta_step : u32 ,
247
+ max_delta_step : f32 ,
222
248
223
249
/// Subsample ratio of the training instance. Setting it to 0.5 means that XGBoost randomly collected half
224
250
/// of the data instances to grow trees and this will prevent overfitting.
@@ -239,15 +265,21 @@ pub struct TreeBoosterParameters {
239
265
/// * default: 1.0
240
266
colsample_bylevel : f32 ,
241
267
268
+ /// Subsample ratio of columns for each node.
269
+ ///
270
+ /// * range: (0.0, 1.0]
271
+ /// * default: 1.0
272
+ colsample_bynode : f32 ,
273
+
242
274
/// L2 regularization term on weights, increase this value will make model more conservative.
243
275
///
244
276
/// * default: 1
245
- lambda : u32 ,
277
+ lambda : f32 ,
246
278
247
279
/// L1 regularization term on weights, increase this value will make model more conservative.
248
280
///
249
281
/// * default: 0
250
- alpha : u32 ,
282
+ alpha : f32 ,
251
283
252
284
/// The tree construction algorithm used in XGBoost.
253
285
#[ builder( default = "TreeMethod::default()" ) ]
@@ -270,7 +302,7 @@ pub struct TreeBoosterParameters {
270
302
271
303
/// Sequence of tree updaters to run, providing a modular way to construct and to modify the trees.
272
304
///
273
- /// * default: [TreeUpdater::GrowColMaker, TreeUpdater::Prune ]
305
+ /// * default: vec![ ]
274
306
updater : Vec < TreeUpdater > ,
275
307
276
308
/// This is a parameter of the ‘refresh’ updater plugin. When this flag is true, tree leafs as well as tree nodes'
@@ -300,6 +332,11 @@ pub struct TreeBoosterParameters {
300
332
/// * default: 256
301
333
max_bin : u32 ,
302
334
335
+ /// Number of trees to train in parallel for boosted random forest.
336
+ ///
337
+ /// * default: 1
338
+ num_parallel_tree : u32 ,
339
+
303
340
/// The type of predictor algorithm to use. Provides the same results but allows the use of GPU or CPU.
304
341
///
305
342
/// * default: [`Predictor::Cpu`](enum.Predictor.html#variant.Cpu)
@@ -310,24 +347,26 @@ impl Default for TreeBoosterParameters {
310
347
fn default ( ) -> Self {
311
348
TreeBoosterParameters {
312
349
eta : 0.3 ,
313
- gamma : 0 ,
350
+ gamma : 0.0 ,
314
351
max_depth : 6 ,
315
- min_child_weight : 1 ,
316
- max_delta_step : 0 ,
352
+ min_child_weight : 1.0 ,
353
+ max_delta_step : 0.0 ,
317
354
subsample : 1.0 ,
318
355
colsample_bytree : 1.0 ,
319
356
colsample_bylevel : 1.0 ,
320
- lambda : 1 ,
321
- alpha : 0 ,
357
+ colsample_bynode : 1.0 ,
358
+ lambda : 1.0 ,
359
+ alpha : 0.0 ,
322
360
tree_method : TreeMethod :: default ( ) ,
323
361
sketch_eps : 0.03 ,
324
362
scale_pos_weight : 1.0 ,
325
- updater : vec ! [ TreeUpdater :: GrowColMaker , TreeUpdater :: Prune ] ,
363
+ updater : Vec :: new ( ) ,
326
364
refresh_leaf : true ,
327
365
process_type : ProcessType :: default ( ) ,
328
366
grow_policy : GrowPolicy :: default ( ) ,
329
367
max_leaves : 0 ,
330
368
max_bin : 256 ,
369
+ num_parallel_tree : 1 ,
331
370
predictor : Predictor :: default ( ) ,
332
371
}
333
372
}
@@ -347,19 +386,29 @@ impl TreeBoosterParameters {
347
386
v. push ( ( "subsample" . to_owned ( ) , self . subsample . to_string ( ) ) ) ;
348
387
v. push ( ( "colsample_bytree" . to_owned ( ) , self . colsample_bytree . to_string ( ) ) ) ;
349
388
v. push ( ( "colsample_bylevel" . to_owned ( ) , self . colsample_bylevel . to_string ( ) ) ) ;
389
+ v. push ( ( "colsample_bynode" . to_owned ( ) , self . colsample_bynode . to_string ( ) ) ) ;
350
390
v. push ( ( "lambda" . to_owned ( ) , self . lambda . to_string ( ) ) ) ;
351
391
v. push ( ( "alpha" . to_owned ( ) , self . alpha . to_string ( ) ) ) ;
352
392
v. push ( ( "tree_method" . to_owned ( ) , self . tree_method . to_string ( ) ) ) ;
353
393
v. push ( ( "sketch_eps" . to_owned ( ) , self . sketch_eps . to_string ( ) ) ) ;
354
394
v. push ( ( "scale_pos_weight" . to_owned ( ) , self . scale_pos_weight . to_string ( ) ) ) ;
355
- v. push ( ( "updater" . to_owned ( ) , self . updater . iter ( ) . map ( |u| u. to_string ( ) ) . collect :: < Vec < String > > ( ) . join ( "," ) ) ) ;
356
395
v. push ( ( "refresh_leaf" . to_owned ( ) , ( self . refresh_leaf as u8 ) . to_string ( ) ) ) ;
357
396
v. push ( ( "process_type" . to_owned ( ) , self . process_type . to_string ( ) ) ) ;
358
397
v. push ( ( "grow_policy" . to_owned ( ) , self . grow_policy . to_string ( ) ) ) ;
359
398
v. push ( ( "max_leaves" . to_owned ( ) , self . max_leaves . to_string ( ) ) ) ;
360
399
v. push ( ( "max_bin" . to_owned ( ) , self . max_bin . to_string ( ) ) ) ;
400
+ v. push ( ( "num_parallel_tree" . to_owned ( ) , self . num_parallel_tree . to_string ( ) ) ) ;
361
401
v. push ( ( "predictor" . to_owned ( ) , self . predictor . to_string ( ) ) ) ;
362
402
403
+ // Don't pass anything to XGBoost if the user didn't specify anything.
404
+ // This allows XGBoost to figure it out on it's own, and suppresses the
405
+ // warning message during training.
406
+ // See: https://github.com/davechallis/rust-xgboost/issues/7
407
+ if self . updater . len ( ) != 0
408
+ {
409
+ v. push ( ( "updater" . to_owned ( ) , self . updater . iter ( ) . map ( |u| u. to_string ( ) ) . collect :: < Vec < String > > ( ) . join ( "," ) ) ) ;
410
+ }
411
+
363
412
v
364
413
}
365
414
}
@@ -370,6 +419,7 @@ impl TreeBoosterParametersBuilder {
370
419
Interval :: new_open_closed ( 0.0 , 1.0 ) . validate ( & self . subsample , "subsample" ) ?;
371
420
Interval :: new_open_closed ( 0.0 , 1.0 ) . validate ( & self . colsample_bytree , "colsample_bytree" ) ?;
372
421
Interval :: new_open_closed ( 0.0 , 1.0 ) . validate ( & self . colsample_bylevel , "colsample_bylevel" ) ?;
422
+ Interval :: new_open_closed ( 0.0 , 1.0 ) . validate ( & self . colsample_bynode , "colsample_bynode" ) ?;
373
423
Interval :: new_open_open ( 0.0 , 1.0 ) . validate ( & self . sketch_eps , "sketch_eps" ) ?;
374
424
Ok ( ( ) )
375
425
}
0 commit comments