Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----echo = TRUE, eval = FALSE------------------------------------------------
# install.packages("sgboost")
## ----eval = FALSE-------------------------------------------------------------
# remotes::install_github(
# "FabianObster/sgboost",
# build_vignettes = TRUE, dependencies = TRUE
# )
## ----warning=FALSE, message=FALSE---------------------------------------------
library(mboost)
library(sgboost)
library(dplyr)
library(ggplot2)
## ----warning=FALSE,message=FALSE, eval=TRUE-----------------------------------
set.seed(10)
n <- 100
p <- 200
X <- matrix(data = rnorm(n * p, mean = 0, sd = 1), nrow = n, ncol = p)
beta_star <- c(
rep(5, 5), c(5, -5, 2, 0, 0), rep(-5, 5),
c(2, -3, 8, 0, 0), rep(0, (p - 20))
)
groups <- rep(1:(p / 5), each = 5)
# Linear regression model
eps <- rnorm(n, mean = 0, sd = 1)
y <- X %*% beta_star + eps
# Logistic regression model
pr <- 1 / (1 + exp(-X %*% beta_star))
y_binary <- as.factor(rbinom(n, 1, pr))
# Input data.frames
df <- X %>%
as.data.frame() %>%
mutate_all(function(x) {
as.numeric(scale(x))
}) %>%
mutate(y = as.numeric(y), y_binary = y_binary)
group_df <- data.frame(
group_name = groups,
variable_name = head(colnames(df), -2)
)
## ----eval=TRUE----------------------------------------------------------------
sgb_formula_linear <- create_formula(
alpha = 0.4, group_df = group_df, outcome_name = "y", intercept = FALSE,
group_name = "group_name", var_name = "variable_name",
)
sgb_formula_binary <- create_formula(
alpha = 0.4, group_df = group_df, outcome_name = "y_binary", intercept = FALSE,
group_name = "group_name", var_name = "variable_name",
)
## ----eval=TRUE----------------------------------------------------------------
sgb_model_linear <- mboost(
formula = sgb_formula_linear, data = df,
control = boost_control(nu = 1, mstop = 600)
)
# cv_sgb_model_linear <- cvrisk(sgb_model_linear,
# folds = cv(model.weights(sgb_model_linear),
# type = 'kfold', B = 10))
sgb_model_binary <- mboost(
formula = sgb_formula_binary, data = df, family = Binomial(),
control = boost_control(nu = 1, mstop = 600)
)
# cv_sgb_model_binary <- cvrisk(sgb_model_binary,
# folds = cv(model.weights(sgb_model_linear),
# type = 'kfold', B = 10))
# mstop(cv_sgb_model_linear)
# mstop(cv_sgb_model_binary)
# plot(cv_sgb_model_linear)
# plot(cv_sgb_model_binary)
sgb_model_linear <- sgb_model_linear[320]
sgb_model_binary <- sgb_model_binary[540]
## ----eval=TRUE, fig.align='center', fig.height=5, fig.width=7-----------------
get_varimp(sgb_model = sgb_model_linear)$varimp %>% slice(1:10)
get_varimp(sgb_model = sgb_model_linear)$group_importance
plot_varimp(sgb_model = sgb_model_linear, n_predictors = 15)
## ----eval=TRUE, fig.align='center', fig.height=5, fig.width=7-----------------
get_coef(sgb_model = sgb_model_linear)$aggregate %>% slice(1:10)
get_coef(sgb_model = sgb_model_linear)$raw %>% slice(1:10)
## ----eval=TRUE, fig.align='center', fig.height=5, fig.width=7-----------------
plot_effects(sgb_model = sgb_model_binary, n_predictors = 10, base_size = 10)
plot_effects(sgb_model = sgb_model_binary, n_predictors = 10, plot_type = "clock", base_size = 10)
plot_effects(sgb_model = sgb_model_binary, n_predictors = 10, plot_type = "scatter", base_size = 10)
## ----eval=TRUE, fig.align='center', fig.height=5, fig.width=7-----------------
plot_path(sgb_model = sgb_model_linear[100])
plot_path(sgb_model = sgb_model_binary[100])
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.