Nothing
## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----setup--------------------------------------------------------------------
library(etree)
## -----------------------------------------------------------------------------
# Set seed
set.seed(123)
# Number of observation
n_obs <- 100
# Response variable for regression
resp_reg <- c(rnorm(n_obs / 4, mean = 0, sd = 1),
rnorm(n_obs / 4, mean = 2, sd = 1),
rnorm(n_obs / 4, mean = 4, sd = 1),
rnorm(n_obs / 4, mean = 6, sd = 1))
## -----------------------------------------------------------------------------
# Response variable for classification
resp_cls <- factor(c(sample(x = 1:4, size = n_obs / 4, replace = TRUE,
prob = c(0.85, 0.05, 0.05, 0.05)),
sample(x = 1:4, size = n_obs / 4, replace = TRUE,
prob = c(0.05, 0.85, 0.05, 0.05)),
sample(x = 1:4, size = n_obs / 4, replace = TRUE,
prob = c(0.05, 0.05, 0.85, 0.05)),
sample(x = 1:4, size = n_obs / 4, replace = TRUE,
prob = c(0.05, 0.05, 0.05, 0.85))))
## -----------------------------------------------------------------------------
# Numeric
x1 <- c(runif(n_obs / 4, min = 0, max = 0.55),
runif(n_obs / 4 * 3, min = 0.45, max = 1))
# Nominal
x2 <- factor(c(rbinom(n_obs / 4, 1, 0.1),
rbinom(n_obs / 4, 1, 0.9),
rbinom(n_obs / 2, 1, 0.1)))
# Functions
x3 <- c(fda.usc::rproc2fdata(n_obs / 2, seq(0, 1, len = 100), sigma = 1),
fda.usc::rproc2fdata(n_obs / 4, seq(0, 1, len = 100), sigma = 1,
mu = rep(1, 100)),
fda.usc::rproc2fdata(n_obs / 4, seq(0, 1, len = 100), sigma = 1))
# Graphs
x4 <- c(lapply(1:(n_obs / 4 * 3), function(j) igraph::sample_gnp(100, 0.1)),
lapply(1:(n_obs / 4), function(j) igraph::sample_gnp(100, 0.9)))
# Covariates list
cov_list <- list(X1 = x1, X2 = x2, X3 = x3, X4 = x4)
## -----------------------------------------------------------------------------
# Quick fit
ef_fit <- eforest(response = resp_reg,
covariates = cov_list,
ntrees = 12,
perf_metric = 'MSE')
## ---- fig.dim = c(25, 11)-----------------------------------------------------
# Plot of a tree from the ensemble
plot(ef_fit$ensemble[[4]])
## -----------------------------------------------------------------------------
# Retrieve performance metric
ef_fit$perf_metric
# OOB MSE for this ensemble
ef_fit$oob_score
## -----------------------------------------------------------------------------
# Predictions from the fitted object
pred <- predict(ef_fit)
print(pred)
## -----------------------------------------------------------------------------
# New set of covariates
n_obs <- 40
x1n <- c(runif(n_obs / 4, min = 0, max = 0.55),
runif(n_obs / 4 * 3, min = 0.45, max = 1))
x2n <- factor(c(rbinom(n_obs / 4, 1, 0.1),
rbinom(n_obs / 4, 1, 0.9),
rbinom(n_obs / 2, 1, 0.1)))
x3n <- c(fda.usc::rproc2fdata(n_obs / 2, seq(0, 1, len = 100), sigma = 1),
fda.usc::rproc2fdata(n_obs / 4, seq(0, 1, len = 100), sigma = 1,
mu = rep(1, 100)),
fda.usc::rproc2fdata(n_obs / 4, seq(0, 1, len = 100), sigma = 1))
x4n <- c(lapply(1:(n_obs / 4 * 3), function(j) igraph::sample_gnp(100, 0.1)),
lapply(1:(n_obs / 4), function(j) igraph::sample_gnp(100, 0.9)))
new_cov_list <- list(X1 = x1n, X2 = x2n, X3 = x3n, X4 = x4n)
# New response
new_resp_reg <- c(rnorm(n_obs / 4, mean = 0, sd = 1),
rnorm(n_obs / 4, mean = 2, sd = 1),
rnorm(n_obs / 4, mean = 4, sd = 1),
rnorm(n_obs / 4, mean = 6, sd = 1))
# Predictions
new_pred <- predict(ef_fit,
newdata = new_cov_list)
print(new_pred)
## -----------------------------------------------------------------------------
# MSE between the new response and its average
mean((new_resp_reg - mean(new_resp_reg)) ^ 2)
# MSE between the new response and predictions with the new set of covariates
mean((new_resp_reg - new_pred) ^ 2)
## ---- fig.dim = c(7, 6)-------------------------------------------------------
# Quick fit
ef_fit <- eforest(response = resp_cls,
covariates = cov_list,
ntrees = 12,
split_type = 'coeff')
## -----------------------------------------------------------------------------
# Predictions from the fitted object
pred <- predict(ef_fit)
print(pred)
## -----------------------------------------------------------------------------
# New response
new_resp_cls <- factor(c(sample(x = 1:4, size = n_obs / 4, replace = TRUE,
prob = c(0.85, 0.05, 0.05, 0.05)),
sample(x = 1:4, size = n_obs / 4, replace = TRUE,
prob = c(0.05, 0.85, 0.05, 0.05)),
sample(x = 1:4, size = n_obs / 4, replace = TRUE,
prob = c(0.05, 0.05, 0.85, 0.05)),
sample(x = 1:4, size = n_obs / 4, replace = TRUE,
prob = c(0.05, 0.05, 0.05, 0.85))))
# Predictions
new_pred <- predict(ef_fit,
newdata = new_cov_list)
print(new_pred)
# Confusion matrix between the new response and predictions from the fitted tree
table(new_pred, new_resp_cls, dnn = c('Predicted', 'True'))
# Misclassification error for predictions on the new set of covariates
sum(new_pred != new_resp_cls) / length(new_resp_cls)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.