inst/doc/extrinsic_selection.R

## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup--------------------------------------------------------------------
library("flevr")

## ----load-biomarker-data------------------------------------------------------
# load the dataset
data("biomarkers")
library("dplyr")
# set up vector "y" of outcomes and matrix "x" of features
cc <- complete.cases(biomarkers)
y_cc <- biomarkers$mucinous[cc]
x_cc <- biomarkers %>%
  na.omit() %>%
  select(starts_with("lab"), starts_with("cea"))
x_df <- as.data.frame(x_cc)
x_names <- names(x_df)

## ----fit-sl-------------------------------------------------------------------
set.seed(1234)
# fit a Super Learner ensemble; note its simplicity, for speed
library("SuperLearner")
learners <- c("SL.glm", "SL.ranger.imp", "SL.glmnet")
V <- 2
fit <- SuperLearner::SuperLearner(Y = y_cc, X = x_df,
                                  SL.library = learners,
                                  cvControl = list(V = V),
                                  family = "binomial")
# check the SL weights
fit$coef
# extract importance based on the whole Super Learner
sl_importance_all <- extract_importance_SL(
  fit = fit, feature_names = x_names, import_type = "all"
)
sl_importance_all

## ----sl-best-alg--------------------------------------------------------------
sl_importance_best <- extract_importance_SL(
  fit = fit, feature_names = x_names, import_type = "best"
)
sl_importance_best

## ----extrinsic-selection------------------------------------------------------
extrinsic_selected <- extrinsic_selection(
  fit = fit, feature_names = x_names, threshold = 5, import_type = "all"
)
extrinsic_selected

## ----impute-setup-------------------------------------------------------------
n_imp <- 2

## ----impute, eval = FALSE-----------------------------------------------------
#  library("mice")
#  set.seed(20231121)
#  mi_biomarkers <- mice::mice(data = biomarkers, m = n_imp, printFlag = FALSE)
#  imputed_biomarkers <- mice::complete(mi_biomarkers, action = "long") %>%
#    rename(imp = .imp, id = .id)

## ----extrinsic-selection-with-missing-data, eval = FALSE----------------------
#  set.seed(20231121)
#  # set up a list to collect selected sets
#  all_selected_vars <- vector("list", length = 5)
#  # for each imputed dataset, do extrinsic selection
#  for (i in 1:n_imp) {
#    # fit a Super Learner
#    these_data <- imputed_biomarkers %>%
#      filter(imp == i)
#    this_y <- these_data$mucinous
#    this_x <- these_data %>%
#      select(starts_with("lab"), starts_with("cea"))
#    this_x_df <- as.data.frame(this_x)
#    fit <- SuperLearner::SuperLearner(Y = this_y, X = this_x_df,
#                                    SL.library = learners,
#                                    cvControl = list(V = V),
#                                    family = "binomial")
#    # do extrinsic selection
#    all_selected_vars[[i]] <- extrinsic_selection(
#      fit = fit, feature_names = x_names, threshold = 5, import_type = "all"
#    )$selected
#  }
#  # perform extrinsic variable selection
#  selected_vars <- pool_selected_sets(sets = all_selected_vars, threshold = 1 / n_imp)
#  x_names[selected_vars]

Try the flevr package in your browser

Any scripts or data that you put into this service are public.

flevr documentation built on June 22, 2024, 7:33 p.m.