Polynomial base learner — BaselearnerPolynomial • compboost

[BaselearnerPolynomial] creates a polynomial base learner object. The base learner takes one feature and calculates the polynomials (with intercept) $1 + x + x^2 + \dots + x^d$ for a given degree $d$.

Format

S4 object.

Arguments

data_source: (InMemoryData)
Data object which contains the raw data (see ?InMemoryData).
blearner_type: (character(1))
Type of the base learner (if not specified, blearner_type = paste0("poly", d) is used). The unique id of the base learner is defined by appending blearner_type to the feature name: paste0(data_source$getIdentifier(), "_", blearner_type).
degree: (integer(1))
Polynomial degree.
intercept: (logical(1))
Polynomial degree.
bin_root: (integer(1))
The binning root to reduce the data to $n^{1/\text{binroot}}$ data points (default bin_root = 1, which means no binning is applied). A value of bin_root = 2 is suggested for the best approximation error (cf. Wood et al. (2017) Generalized additive models for gigadata: modeling the UK black smoke network daily data).

Usage


BaselearnerPolynomial$new(data_source, list(degree, intercept, bin_root))
BaselearnerPolynomial$new(data_source, blearner_type, list(degree, intercept, bin_root))

Fields

This class doesn't contain public fields.

Methods

$summarizeFactory(): () -> ()
$transfromData(newdata): list(InMemoryData) -> matrix()
$getMeta(): () -> list()

Inherited methods from Baselearner

$getData(): () -> matrix()
$getDF(): () -> integer()
$getPenalty(): () -> numeric()
$getPenaltyMat(): () -> matrix()
$getFeatureName(): () -> character()
$getModelName(): () -> character()
$getBaselearnerId(): () -> character()

Examples

# Sample data:
x = runif(100)
y = 1 + 2*x + rnorm(100, 0, 0.2)
dat = data.frame(x, y)

# S4 wrapper

# Create new data object, a matrix is required as input:
data_mat = cbind(x)
data_source = InMemoryData$new(data_mat, "my_data_name")

# Create new linear base learner factory:
bl_lin = BaselearnerPolynomial$new(data_source,
  list(degree = 1))
bl_cub = BaselearnerPolynomial$new(data_source,
  list(intercept = FALSE, degree = 3, bin_root = 2))

# Get the transformed data:
head(bl_lin$getData())
#>      [,1]       [,2]
#> [1,]    1 0.70828422
#> [2,]    1 0.70091819
#> [3,]    1 0.22057668
#> [4,]    1 0.07709625
#> [5,]    1 0.58647345
#> [6,]    1 0.35549948
head(bl_cub$getData())
#>             [,1]         [,2]         [,3]
#> [1,] 0.003279946 1.075805e-05 3.528582e-08
#> [2,] 0.110713932 1.225757e-02 1.357084e-03
#> [3,] 0.218147917 4.758851e-02 1.038134e-02
#> [4,] 0.325581902 1.060036e-01 3.451285e-02
#> [5,] 0.433015887 1.875028e-01 8.119167e-02
#> [6,] 0.540449873 2.920861e-01 1.578579e-01

# Summarize factory:
bl_lin$summarizeFactory()
#> Linear base learner factory:
#> 	- Name of the used data: my_data_name
#> 	- Factory creates the following base learner: poly1

# Transform "new data":
newdata = list(InMemoryData$new(cbind(rnorm(5)), "my_data_name"))
bl_lin$transformData(newdata)
#> $design
#>      [,1]       [,2]
#> [1,]    1 -1.1363869
#> [2,]    1 -0.6597606
#> [3,]    1  0.1988824
#> [4,]    1 -0.5206944
#> [5,]    1  0.8838683
#> 
bl_cub$transformData(newdata)
#> $design
#>            [,1]      [,2]         [,3]
#> [1,] -1.1363869 1.2913752 -1.467501849
#> [2,] -0.6597606 0.4352841 -0.287183303
#> [3,]  0.1988824 0.0395542  0.007866632
#> [4,] -0.5206944 0.2711226 -0.141172025
#> [5,]  0.8838683 0.7812231  0.690498330
#> 

# R6 wrapper

cboost_lin = Compboost$new(dat, "y")
cboost_lin$addBaselearner("x", "lin", BaselearnerPolynomial, degree = 1)
cboost_lin$train(100, 0)
#> Train 100 iterations in 0 Seconds.
#> Final risk based on the train set: 0.021
#> 

cboost_cub = Compboost$new(dat, "y")
cboost_cub$addBaselearner("x", "cubic", BaselearnerPolynomial,
  intercept = FALSE, degree = 3, bin_root = 2)
cboost_cub$train(100, 0)
#> Train 100 iterations in 0 Seconds.
#> Final risk based on the train set: 0.065
#> 

# Access base learner directly from the API (n = sqrt(100) = 10 with binning):
head(cboost_lin$baselearner_list$x_lin$factory$getData())
#>      [,1]       [,2]
#> [1,]    1 0.70828422
#> [2,]    1 0.70091819
#> [3,]    1 0.22057668
#> [4,]    1 0.07709625
#> [5,]    1 0.58647345
#> [6,]    1 0.35549948
cboost_cub$baselearner_list$x_cubic$factory$getData()
#>              [,1]         [,2]         [,3]
#>  [1,] 0.003279946 1.075805e-05 3.528582e-08
#>  [2,] 0.110713932 1.225757e-02 1.357084e-03
#>  [3,] 0.218147917 4.758851e-02 1.038134e-02
#>  [4,] 0.325581902 1.060036e-01 3.451285e-02
#>  [5,] 0.433015887 1.875028e-01 8.119167e-02
#>  [6,] 0.540449873 2.920861e-01 1.578579e-01
#>  [7,] 0.647883858 4.197535e-01 2.719515e-01
#>  [8,] 0.755317843 5.705050e-01 4.309126e-01
#>  [9,] 0.862751829 7.443407e-01 6.421813e-01
#> [10,] 0.970185814 9.412605e-01 9.131976e-01

gg_lin = plotPEUni(cboost_lin, "x")
gg_cub = plotPEUni(cboost_cub, "x")

library(ggplot2)
library(patchwork)

(gg_lin | gg_cub) &
  geom_point(data = dat, aes(x = x, y = y - c(cboost_lin$offset)), alpha = 0.2)