Nothing
## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----setup--------------------------------------------------------------------
library("flevr")
## ----load-biomarker-data------------------------------------------------------
# load the dataset
data("biomarkers")
library("dplyr")
# set up vector "y" of outcomes and matrix "x" of features
cc <- complete.cases(biomarkers)
y_cc <- biomarkers$mucinous[cc]
x_cc <- biomarkers %>%
na.omit() %>%
select(starts_with("lab"), starts_with("cea"))
x_df <- as.data.frame(x_cc)
x_names <- names(x_df)
## ----fit-sl-------------------------------------------------------------------
set.seed(1234)
# fit a Super Learner ensemble; note its simplicity, for speed
library("SuperLearner")
learners <- c("SL.glm", "SL.ranger.imp", "SL.glmnet")
V <- 2
fit <- SuperLearner::SuperLearner(Y = y_cc, X = x_df,
SL.library = learners,
cvControl = list(V = V),
family = "binomial")
# check the SL weights
fit$coef
# extract importance based on the whole Super Learner
sl_importance_all <- extract_importance_SL(
fit = fit, feature_names = x_names, import_type = "all"
)
sl_importance_all
## ----sl-best-alg--------------------------------------------------------------
sl_importance_best <- extract_importance_SL(
fit = fit, feature_names = x_names, import_type = "best"
)
sl_importance_best
## ----extrinsic-selection------------------------------------------------------
extrinsic_selected <- extrinsic_selection(
fit = fit, feature_names = x_names, threshold = 5, import_type = "all"
)
extrinsic_selected
## ----impute-setup-------------------------------------------------------------
n_imp <- 2
## ----impute, eval = FALSE-----------------------------------------------------
# library("mice")
# set.seed(20231121)
# mi_biomarkers <- mice::mice(data = biomarkers, m = n_imp, printFlag = FALSE)
# imputed_biomarkers <- mice::complete(mi_biomarkers, action = "long") %>%
# rename(imp = .imp, id = .id)
## ----extrinsic-selection-with-missing-data, eval = FALSE----------------------
# set.seed(20231121)
# # set up a list to collect selected sets
# all_selected_vars <- vector("list", length = 5)
# # for each imputed dataset, do extrinsic selection
# for (i in 1:n_imp) {
# # fit a Super Learner
# these_data <- imputed_biomarkers %>%
# filter(imp == i)
# this_y <- these_data$mucinous
# this_x <- these_data %>%
# select(starts_with("lab"), starts_with("cea"))
# this_x_df <- as.data.frame(this_x)
# fit <- SuperLearner::SuperLearner(Y = this_y, X = this_x_df,
# SL.library = learners,
# cvControl = list(V = V),
# family = "binomial")
# # do extrinsic selection
# all_selected_vars[[i]] <- extrinsic_selection(
# fit = fit, feature_names = x_names, threshold = 5, import_type = "all"
# )$selected
# }
# # perform extrinsic variable selection
# selected_vars <- pool_selected_sets(sets = all_selected_vars, threshold = 1 / n_imp)
# x_names[selected_vars]
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.