inst/doc/TRexSelector_usage_and_simulations.R

## ----include = FALSE----------------------------------------------------------
# Store user's options()
old_options <- options()

library(knitr)
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.align = "center",
  fig.retina = 2,
  out.width = "85%",
  dpi = 96
  # pngquant = "--speed=1"
)
options(width = 80)

## ----eval=FALSE---------------------------------------------------------------
#  # Option 1: Install stable version from CRAN
#  install.packages("tlars")
#  
#  # Option 2: install developer version from GitHub
#  install.packages("devtools")
#  devtools::install_github("jasinmachkour/tlars")

## ----eval=FALSE---------------------------------------------------------------
#  # Option 1: Install stable version from CRAN
#  install.packages("TRexSelector")
#  
#  # Option 2: install developer version from GitHub
#  install.packages("devtools")
#  devtools::install_github("jasinmachkour/TRexSelector")

## ----eval=FALSE---------------------------------------------------------------
#  library(TRexSelector)
#  help(package = "TRexSelector")
#  ?trex
#  ?random_experiments
#  ?lm_dummy
#  ?add_dummies
#  ?add_dummies_GVS
#  ?FDP
#  ?TPP
#  # etc.

## ----eval=FALSE---------------------------------------------------------------
#  citation("TRexSelector")

## -----------------------------------------------------------------------------
library(TRexSelector)

# Setup
n <- 75 # number of observations
p <- 150 # number of variables
num_act <- 3 # number of true active variables
beta <- c(rep(1, times = num_act), rep(0, times = p - num_act)) # coefficient vector
true_actives <- which(beta > 0) # indices of true active variables
num_dummies <- p # number of dummy predictors (also referred to as dummies)

# Generate Gaussian data
set.seed(123)
X <- matrix(stats::rnorm(n * p), nrow = n, ncol = p)
y <- X %*% beta + stats::rnorm(n)

## -----------------------------------------------------------------------------
# Seed
set.seed(1234)

# Numerical zero
eps <- .Machine$double.eps

# Variable selection via T-Rex
res <- trex(X = X, y = y, tFDR = 0.05, verbose = FALSE)
selected_var <- which(res$selected_var > eps)
paste0("True active variables: ", paste(as.character(true_actives), collapse = ", "))
paste0("Selected variables: ", paste(as.character(selected_var), collapse = ", "))

## -----------------------------------------------------------------------------
# Computations might take up to 10 minutes... Please wait... 

# Numerical zero
eps <- .Machine$double.eps

# Seed
set.seed(1234)

# Setup
n <- 100 # number of observations
p <- 150 # number of variables

# Parameters
num_act <- 10 # number of true active variables
beta <- rep(0, times = p) # coefficient vector (all zeros first)
beta[sample(seq(p), size = num_act, replace = FALSE)] <- 1 # coefficient vector (active variables with non-zero coefficients)
true_actives <- which(beta > 0) # indices of true active variables
tFDR_vec <- c(0.1, 0.15, 0.2, 0.25) # target FDR levels
MC <- 100 # number of Monte Carlo runs per stopping point

# Initialize results vectors
FDP <- matrix(NA, nrow = MC, ncol = length(tFDR_vec))
TPP <- matrix(NA, nrow = MC, ncol = length(tFDR_vec))

# Run simulations
for (t in seq_along(tFDR_vec)) {
  for (mc in seq(MC)) {
    
    # Generate Gaussian data
    X <- matrix(stats::rnorm(n * p), nrow = n, ncol = p)
    y <- X %*% beta + stats::rnorm(n)
    
    # Run T-Rex selector
    res <- trex(X = X, y = y, tFDR = tFDR_vec[t], verbose = FALSE)
    selected_var <- which(res$selected_var > eps)
    
    # Results
    FDP[mc, t] <- length(setdiff(selected_var, true_actives)) / max(1, length(selected_var))
    TPP[mc, t] <- length(intersect(selected_var, true_actives)) / max(1, length(true_actives))
  }
}

# Compute estimates of FDR and TPR by averaging FDP and TPP over MC Monte Carlo runs
FDR <- colMeans(FDP)
TPR <- colMeans(TPP)

## ----FDR_and_TPR, echo=FALSE, fig.align='center', message=FALSE, fig.width=12, fig.height=5, out.width = "95%"----
# Plot results
library(ggplot2)
library(patchwork)
tFDR_vec_percent <- 100 * tFDR_vec
plot_data <- data.frame(tFDR_vec = tFDR_vec_percent,
                        FDR = 100 * FDR,
                        TPR = 100 * TPR) # data frame containing data to be plotted (FDR and TPR in %)

# FDR vs. tFDR
FDR_vs_tFDR <-
  ggplot(plot_data, aes(x = tFDR_vec_percent, y = FDR)) +
    labs(x = "Target FDR",
         y = "FDR") +
    scale_x_continuous(breaks = tFDR_vec_percent, minor_breaks = c(), limits = c(tFDR_vec_percent[1], tFDR_vec_percent[length(tFDR_vec_percent)])) +
    scale_y_continuous(breaks = seq(0, 100, by = 10), minor_breaks = c(), limits = c(0, 100)) +
    geom_line(size = 1.5, colour = "#336C68") +
    geom_abline(slope = 1, colour = "red", linetype = 2, size = 1) +
    geom_point(size = 2.5, colour = "#336C68") +
    theme_bw(base_size = 16) +
    theme(panel.background = element_rect(fill = "white", color = "black", size = 1)) +
    coord_fixed(ratio =  0.75 * (tFDR_vec_percent[length(tFDR_vec_percent)] - tFDR_vec_percent[1]) / (100 - 0))

# TPR vs. tFDR
TPR_vs_tFDR <- 
  ggplot(plot_data, aes(x = tFDR_vec_percent, y = TPR)) +
    labs(x = "Target FDR",
         y = "TPR") +
    scale_x_continuous(breaks = tFDR_vec_percent, minor_breaks = c(), limits = c(tFDR_vec_percent[1], tFDR_vec_percent[length(tFDR_vec_percent)])) +
    scale_y_continuous(breaks = seq(0, 100, by = 10), minor_breaks = c(), limits = c(0, 100)) +
    geom_line(size = 1.5, colour = "#336C68") +
    geom_point(size = 2.5, colour = "#336C68") +
    theme_bw(base_size = 16) +
    theme(panel.background = element_rect(fill = "white", color = "black", size = 1)) +
    coord_fixed(ratio =  0.75 * (tFDR_vec_percent[length(tFDR_vec_percent)] - tFDR_vec_percent[1]) / (100 - 0))
FDR_vs_tFDR + TPR_vs_tFDR

## ----T-Rex_framework, echo=FALSE, fig.cap="Figure 1: Simplified overview of the T-Rex framework.", out.width = '85%'----
knitr::include_graphics("./figures/T-Rex_framework.png")

## ----EnlargedPredictorMatrix, echo=FALSE, fig.cap="Figure 2: The enlarged predictor matrix (predictor matrix with dummies).", out.width = '65%'----
knitr::include_graphics("./figures/predictor_matrix_with_dummies.png")

## ----include = FALSE----------------------------------------------------------
# Reset user's options()
options(old_options)

Try the TRexSelector package in your browser

Any scripts or data that you put into this service are public.

TRexSelector documentation built on May 29, 2024, 2:57 a.m.