Skip to contents

This wrapper function automatically initializes the model by adding all numerical features as linear base-learner. Categorical features are dummy encoded and inserted using another linear base-learners without intercept. The function boostLinear does also train the model.

The returned object is an object of the Compboost class. This object can be used for further analyses (see ?Compboost for details).

Usage

boostLinear(
  data,
  target,
  optimizer = NULL,
  loss = NULL,
  learning_rate = 0.05,
  iterations = 100,
  trace = -1,
  intercept = TRUE,
  data_source = InMemoryData,
  df_cat = 2,
  oob_fraction = NULL,
  stop_args = NULL
)

Arguments

data

(data.frame)
A data frame containing the data.

target

(character(1) | ResponseRegr | ResponseBinaryClassif)
Character value containing the target variable or response object. Note that the loss must match the data type of the target.

optimizer

(OptimizerCoordinateDescent | OptimizerCoordinateDescentLineSearch | OptimizerAGBM | OptimizerCosineAnnealing)
An initialized S4 optimizer object (requires to call Optimizer*.new(..). See the respective help page for further information.

loss

(LossQuadratic | LossBinomial | LossHuber | LossAbsolute | LossQuantile)
An initialized S4 loss object (requires to call Loss*$new(...)). See the respective help page for further information.

learning_rate

(numeric(1))
Learning rate to shrink the parameter in each step.

iterations

(integer(1))
Number of iterations that are trained. If iterations == 0, the untrained object is returned. This can be useful if other base learners (e.g. an interaction via a tensor base learner) are added.

trace

(integer(1))
Integer indicating how often a trace should be printed. Specifying trace = 10, then every 10th iteration is printed. If no trace should be printed set trace = 0. Default is -1 which means that in total 40 iterations are printed.

intercept

(logical(1))
Internally used by BaselearnerPolynomial. This logical value indicates if each feature should get an intercept or not (default is TRUE).

data_source

(Data*)
Uninitialized Data* object which is used to store the data. At the moment just in memory training is supported.

df_cat

(numeric(1))
Degrees of freedom of the categorical base-learner.

oob_fraction

(numeric(1))
Fraction of how much data are used to track the out of bag risk.

stop_args

(list(2))
List containing two elements patience and eps_for_break which can be set to use early stopping on the left out data from setting oob_fraction. If ! is.null(stop_args), early stopping is triggered.

Value

A model of the Compboost class. This model is an R6 object which can be used for retraining, predicting, plotting, and anything described in ?Compboost.

Examples

mod = boostLinear(data = iris, target = "Sepal.Length", loss = LossQuadratic$new(),
  oob_fraction = 0.3)
#>   1/100   risk = 0.32  oob_risk = 0.29   
#>   2/100   risk = 0.3  oob_risk = 0.27   
#>   4/100   risk = 0.26  oob_risk = 0.23   
#>   6/100   risk = 0.23  oob_risk = 0.2   
#>   8/100   risk = 0.2  oob_risk = 0.18   
#>  10/100   risk = 0.18  oob_risk = 0.16   
#>  12/100   risk = 0.16  oob_risk = 0.14   
#>  14/100   risk = 0.15  oob_risk = 0.13   
#>  16/100   risk = 0.14  oob_risk = 0.12   
#>  18/100   risk = 0.13  oob_risk = 0.11   
#>  20/100   risk = 0.12  oob_risk = 0.11   
#>  22/100   risk = 0.11  oob_risk = 0.1   
#>  24/100   risk = 0.11  oob_risk = 0.096   
#>  26/100   risk = 0.1  oob_risk = 0.093   
#>  28/100   risk = 0.099  oob_risk = 0.09   
#>  30/100   risk = 0.097  oob_risk = 0.088   
#>  32/100   risk = 0.094  oob_risk = 0.086   
#>  34/100   risk = 0.092  oob_risk = 0.084   
#>  36/100   risk = 0.091  oob_risk = 0.082   
#>  38/100   risk = 0.089  oob_risk = 0.08   
#>  40/100   risk = 0.087  oob_risk = 0.078   
#>  42/100   risk = 0.086  oob_risk = 0.076   
#>  44/100   risk = 0.084  oob_risk = 0.075   
#>  46/100   risk = 0.083  oob_risk = 0.073   
#>  48/100   risk = 0.081  oob_risk = 0.071   
#>  50/100   risk = 0.08  oob_risk = 0.07   
#>  52/100   risk = 0.079  oob_risk = 0.069   
#>  54/100   risk = 0.078  oob_risk = 0.067   
#>  56/100   risk = 0.077  oob_risk = 0.066   
#>  58/100   risk = 0.076  oob_risk = 0.065   
#>  60/100   risk = 0.075  oob_risk = 0.064   
#>  62/100   risk = 0.074  oob_risk = 0.063   
#>  64/100   risk = 0.073  oob_risk = 0.062   
#>  66/100   risk = 0.073  oob_risk = 0.061   
#>  68/100   risk = 0.072  oob_risk = 0.06   
#>  70/100   risk = 0.071  oob_risk = 0.06   
#>  72/100   risk = 0.07  oob_risk = 0.059   
#>  74/100   risk = 0.07  oob_risk = 0.058   
#>  76/100   risk = 0.069  oob_risk = 0.057   
#>  78/100   risk = 0.069  oob_risk = 0.057   
#>  80/100   risk = 0.068  oob_risk = 0.056   
#>  82/100   risk = 0.068  oob_risk = 0.056   
#>  84/100   risk = 0.067  oob_risk = 0.055   
#>  86/100   risk = 0.067  oob_risk = 0.055   
#>  88/100   risk = 0.066  oob_risk = 0.054   
#>  90/100   risk = 0.066  oob_risk = 0.054   
#>  92/100   risk = 0.066  oob_risk = 0.053   
#>  94/100   risk = 0.065  oob_risk = 0.053   
#>  96/100   risk = 0.065  oob_risk = 0.053   
#>  98/100   risk = 0.065  oob_risk = 0.052   
#> 100/100   risk = 0.064  oob_risk = 0.052   
#> 
#> 
#> Train 100 iterations in 0 Seconds.
#> Final risk based on the train set: 0.064
#> 
mod$getBaselearnerNames()
#> [1] "Sepal.Width_linear"  "Petal.Length_linear" "Petal.Width_linear" 
#> [4] "Species_ridge"      
mod$getEstimatedCoef()
#> Depricated, use `$getCoef()` instead.
#> $Petal.Length_linear
#>            [,1]
#> [1,] -1.6141315
#> [2,]  0.4206597
#> attr(,"blclass")
#> [1] "Rcpp_BaselearnerPolynomial"
#> 
#> $Sepal.Width_linear
#>            [,1]
#> [1,] -1.1423498
#> [2,]  0.3755377
#> attr(,"blclass")
#> [1] "Rcpp_BaselearnerPolynomial"
#> 
#> $offset
#> [1] 5.871429
#> 
table(mod$getSelectedBaselearner())
#> 
#> Petal.Length_linear  Sepal.Width_linear 
#>                  66                  34 
mod$predict()
#>            [,1]
#>   [1,] 5.018253
#>   [2,] 4.830484
#>   [3,] 4.863525
#>   [4,] 5.055806
#>   [5,] 4.980699
#>   [6,] 5.022765
#>   [7,] 4.792930
#>   [8,] 4.910104
#>   [9,] 5.135426
#>  [10,] 5.064831
#>  [11,] 4.704286
#>  [12,] 5.121890
#>  [13,] 5.126402
#>  [14,] 5.018253
#>  [15,] 5.257112
#>  [16,] 5.172980
#>  [17,] 4.887543
#>  [18,] 5.069343
#>  [19,] 4.914616
#>  [20,] 5.064831
#>  [21,] 5.060319
#>  [22,] 4.980699
#>  [23,] 5.285641
#>  [24,] 4.821459
#>  [25,] 4.976187
#>  [26,] 4.525542
#>  [27,] 4.863525
#>  [28,] 5.102385
#>  [29,] 5.341244
#>  [30,] 4.830484
#>  [31,] 5.215046
#>  [32,] 4.905591
#>  [33,] 5.135426
#>  [34,] 6.293769
#>  [35,] 5.661323
#>  [36,] 6.101488
#>  [37,] 6.059422
#>  [38,] 6.331322
#>  [39,] 5.404415
#>  [40,] 6.139041
#>  [41,] 5.338332
#>  [42,] 6.008331
#>  [43,] 6.181107
#>  [44,] 5.718382
#>  [45,] 6.130017
#>  [46,] 6.134529
#>  [47,] 5.853604
#>  [48,] 5.694364
#>  [49,] 6.335835
#>  [50,] 6.115024
#>  [51,] 6.143553
#>  [52,] 6.012843
#>  [53,] 6.185619
#>  [54,] 6.344859
#>  [55,] 6.096975
#>  [56,] 5.563654
#>  [57,] 5.572679
#>  [58,] 5.769472
#>  [59,] 6.274264
#>  [60,] 6.134529
#>  [61,] 5.829587
#>  [62,] 6.176595
#>  [63,] 5.773984
#>  [64,] 5.895670
#>  [65,] 6.008331
#>  [66,] 6.012843
#>  [67,] 5.315771
#>  [68,] 5.891158
#>  [69,] 6.878180
#>  [70,] 6.274264
#>  [71,] 6.723453
#>  [72,] 6.559701
#>  [73,] 6.681387
#>  [74,] 7.017915
#>  [75,] 5.946760
#>  [76,] 6.854163
#>  [77,] 6.493618
#>  [78,] 6.462032
#>  [79,] 6.157090
#>  [80,] 6.555189
#>  [81,] 6.993897
#>  [82,] 6.044429
#>  [83,] 6.714428
#>  [84,] 6.227685
#>  [85,] 6.984873
#>  [86,] 6.190132
#>  [87,] 6.751982
#>  [88,] 6.840626
#>  [89,] 6.185619
#>  [90,] 6.302793
#>  [91,] 6.522147
#>  [92,] 6.732477
#>  [93,] 7.234213
#>  [94,] 6.311817
#>  [95,] 6.807585
#>  [96,] 6.747470
#>  [97,] 6.592743
#>  [98,] 6.260727
#>  [99,] 6.550677
#> [100,] 6.424479
#> [101,] 6.274264
#> [102,] 6.798560
#> [103,] 6.751982
#> [104,] 6.428991
#> [105,] 6.663338