Nothing
## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----setup--------------------------------------------------------------------
library(etree)
## -----------------------------------------------------------------------------
# Set seed
set.seed(123)
# Number of observation
n_obs <- 100
# Response variable for regression
resp_reg <- c(rnorm(n_obs / 4, mean = 0, sd = 1),
rnorm(n_obs / 4, mean = 2, sd = 1),
rnorm(n_obs / 4, mean = 4, sd = 1),
rnorm(n_obs / 4, mean = 6, sd = 1))
## -----------------------------------------------------------------------------
# Response variable for classification
resp_cls <- factor(c(sample(x = 1:4, size = n_obs / 4, replace = TRUE,
prob = c(0.85, 0.05, 0.05, 0.05)),
sample(x = 1:4, size = n_obs / 4, replace = TRUE,
prob = c(0.05, 0.85, 0.05, 0.05)),
sample(x = 1:4, size = n_obs / 4, replace = TRUE,
prob = c(0.05, 0.05, 0.85, 0.05)),
sample(x = 1:4, size = n_obs / 4, replace = TRUE,
prob = c(0.05, 0.05, 0.05, 0.85))))
## -----------------------------------------------------------------------------
# Numeric
x1 <- c(runif(n_obs / 4, min = 0, max = 0.55),
runif(n_obs / 4 * 3, min = 0.45, max = 1))
# Nominal
x2 <- factor(c(rbinom(n_obs / 4, 1, 0.1),
rbinom(n_obs / 4, 1, 0.9),
rbinom(n_obs / 2, 1, 0.1)))
# Functions
x3 <- c(fda.usc::rproc2fdata(n_obs / 2, seq(0, 1, len = 100), sigma = 1),
fda.usc::rproc2fdata(n_obs / 4, seq(0, 1, len = 100), sigma = 1,
mu = rep(1, 100)),
fda.usc::rproc2fdata(n_obs / 4, seq(0, 1, len = 100), sigma = 1))
# Graphs
x4 <- c(lapply(1:(n_obs / 4 * 3), function(j) igraph::sample_gnp(100, 0.1)),
lapply(1:(n_obs / 4), function(j) igraph::sample_gnp(100, 0.9)))
# Covariates list
cov_list <- list(X1 = x1, X2 = x2, X3 = x3, X4 = x4)
## -----------------------------------------------------------------------------
# Quick fit
etree_fit <- etree(response = resp_reg,
covariates = cov_list)
## ---- fig.dim = c(7.5, 6)-----------------------------------------------------
# Plot
plot(etree_fit)
## ---- fig.dim = c(9, 6)-------------------------------------------------------
# Fit with smaller minbucket
etree_fit1 <- etree(response = resp_reg,
covariates = cov_list,
minbucket = 1)
# Plot
plot(etree_fit1)
## ---- fig.dim = c(12, 7)------------------------------------------------------
# Fit with larger alpha
etree_fit2 <- etree(response = resp_reg,
covariates = cov_list,
alpha = 1)
# Plot
plot(etree_fit2)
## ---- fig.dim = c(7.5, 5)-----------------------------------------------------
# Fit with 'cluster'
etree_fit3 <- etree(response = resp_reg,
covariates = cov_list,
split_type = 'cluster')
# Plot
plot(etree_fit3)
## -----------------------------------------------------------------------------
# Print the fitted tree
print(etree_fit)
## -----------------------------------------------------------------------------
# Print a subtree
print(etree_fit[5])
## -----------------------------------------------------------------------------
# Equivalent method to print a subtree
print(etree_fit[[5]])
## ---- fig.dim = c(7, 4)-------------------------------------------------------
# Plot a subtree
plot(etree_fit[5])
## -----------------------------------------------------------------------------
# Get all nodes' ids
nodeids(etree_fit)
## -----------------------------------------------------------------------------
# Get all terminal nodes' ids
nodeids(etree_fit, terminal = TRUE)
## -----------------------------------------------------------------------------
# Get nodes' ids of a subtree
nodeids(etree_fit, from = 5)
## -----------------------------------------------------------------------------
# Select inner nodes' ids
tnodes <- nodeids(etree_fit, terminal = TRUE)
nodes <- 1:max(tnodes)
inodes <- nodes[-tnodes]
# Retrieve pvalues
nodeapply(etree_fit, ids = inodes, FUN = function (n) n$info$pvalue)
## -----------------------------------------------------------------------------
# Total number of nodes
length(etree_fit)
# Depth
depth(etree_fit)
# Width
width(etree_fit)
## -----------------------------------------------------------------------------
# Predictions from the fitted object
pred <- predict(etree_fit)
print(pred)
## -----------------------------------------------------------------------------
# MSE between the response and its average
mean((resp_reg - mean(resp_reg)) ^ 2)
# MSE between the response and predictions from the fitted tree
mean((resp_reg - pred) ^ 2)
## -----------------------------------------------------------------------------
# New set of covariates
n_obs <- 40
x1n <- c(runif(n_obs / 4, min = 0, max = 0.55),
runif(n_obs / 4 * 3, min = 0.45, max = 1))
x2n <- factor(c(rbinom(n_obs / 4, 1, 0.1),
rbinom(n_obs / 4, 1, 0.9),
rbinom(n_obs / 2, 1, 0.1)))
x3n <- c(fda.usc::rproc2fdata(n_obs / 2, seq(0, 1, len = 100), sigma = 1),
fda.usc::rproc2fdata(n_obs / 4, seq(0, 1, len = 100), sigma = 1,
mu = rep(1, 100)),
fda.usc::rproc2fdata(n_obs / 4, seq(0, 1, len = 100), sigma = 1))
x4n <- c(lapply(1:(n_obs / 4 * 3), function(j) igraph::sample_gnp(100, 0.1)),
lapply(1:(n_obs / 4), function(j) igraph::sample_gnp(100, 0.9)))
new_cov_list <- list(X1 = x1n, X2 = x2n, X3 = x3n, X4 = x4n)
# New response
new_resp_reg <- c(rnorm(n_obs / 4, mean = 0, sd = 1),
rnorm(n_obs / 4, mean = 2, sd = 1),
rnorm(n_obs / 4, mean = 4, sd = 1),
rnorm(n_obs / 4, mean = 6, sd = 1))
# Predictions
new_pred <- predict(etree_fit,
newdata = new_cov_list)
print(new_pred)
## -----------------------------------------------------------------------------
# MSE between the new response and its average
mean((new_resp_reg - mean(new_resp_reg)) ^ 2)
# MSE between the new response and predictions with the new set of covariates
mean((new_resp_reg - new_pred) ^ 2)
## ---- fig.dim = c(7, 6)-------------------------------------------------------
# Quick fit
etree_fit <- etree(response = resp_cls,
covariates = cov_list)
# Plot
plot(etree_fit)
## -----------------------------------------------------------------------------
# Predictions from the fitted object
pred <- predict(etree_fit)
print(pred)
## -----------------------------------------------------------------------------
# Confusion matrix between the response and predictions from the fitted tree
table(pred, resp_cls, dnn = c('Predicted', 'True'))
# Misclassification error for predictions from the fitted tree
sum(pred != resp_cls) / length(resp_cls)
## -----------------------------------------------------------------------------
# New response
new_resp_cls <- factor(c(sample(x = 1:4, size = n_obs / 4, replace = TRUE,
prob = c(0.85, 0.05, 0.05, 0.05)),
sample(x = 1:4, size = n_obs / 4, replace = TRUE,
prob = c(0.05, 0.85, 0.05, 0.05)),
sample(x = 1:4, size = n_obs / 4, replace = TRUE,
prob = c(0.05, 0.05, 0.85, 0.05)),
sample(x = 1:4, size = n_obs / 4, replace = TRUE,
prob = c(0.05, 0.05, 0.05, 0.85))))
# Predictions
new_pred <- predict(etree_fit,
newdata = new_cov_list)
print(new_pred)
# Confusion matrix between the new response and predictions from the fitted tree
table(new_pred, new_resp_cls, dnn = c('Predicted', 'True'))
# Misclassification error for predictions on the new set of covariates
sum(new_pred != new_resp_cls) / length(new_resp_cls)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.