# The 'bootstrap.en_lr' function
# Written by Kevin Potter
# email: kevin.w.potter@gmail.com
# Please email me directly if you
# have any questions or comments
# Last updated 2018-10-04
# Table of contents
# 1) bootstrap.en_lr
###
### 1) bootstrap.en_lr
###
bootstrap.en_lr = function( fit, y, X,
param, nRep = 1000, ci_width = .95 ) {
# Purpose:
# Computes p-values (with confidence intervals) for glmnet
# coefficients using Monte Carlo sampling.
# Arguments:
# fit - The output from the 'glmnet' function
# y - The dependent variable
# X - The matrix of predictors
# param - The best parameters for the mixing probability
# and the penalty term
# nRep - Number of iterations for Monte Carlo sampling
# ci_width - Width of confidence interval around p-values
# Returns:
# A list containing
# 1) A data frame with the original coefficient values,
# the frequency for which they were exceeded by the
# coefficients based on the shuffled data, the estimated
# p-value, and the lower and upper boundaries of the
# confidence interval for the p-values.
# 2) The duration it took the algorithm to run.
# 3) The width of the confidence intervals around the p-values.
# Estimate run_time
start = ert()
# Create index to shuffle
i = 1:length(y)
# Create matrix to store estimates
all_cf = matrix( 0, ncol(X), nRep )
rownames( all_cf ) = colnames( X )
# Extract parameters for glmnet estimation
best_alpha = param[1]
best_lambda = param[2]
# Extract original estimates
orig_cf = rep( 0, ncol(X) )
names( orig_cf ) = colnames(X)
cf = coef( fit, s = best_lambda )
orig_cf[ rownames( cf )[-1] ] = cf[-1,1]
# Loop over iterations
for ( nr in 1:nRep ) {
si = sample( i, replace = T )
btstrp = glmnet::glmnet(
x = X,
y = y[si],
family = 'binomial',
standardize = T,
alpha = best_alpha,
lambda = fit$lambda
)
cf = coef( btstrp, s = best_lambda )
sel = rownames( cf )[-1]
all_cf[sel,nr] = cf[-1,1]
}
# Report original coefficients
out = data.frame(
Coefficients = orig_cf
)
# Frequency that estimates based on reshuffled
# data exceed original values
out$Frequency_exceed = rowSums( all_cf >= orig_cf )
# Compute approximate p-value
out$p_value = out$Frequency_exceed/nRep
# Confidence interval around p-values
interval = (1 - ci_width)/2
interval = c( interval, interval + ci_width )
out$p_value_LB = qbinom( interval[1], nRep,
out$Frequency_exceed/nRep )/nRep
out$p_value_UB = qbinom( interval[2], nRep,
out$Frequency_exceed/nRep )/nRep
sel = out$p_value > .5
if ( any(sel) ) {
out$p_value[sel] = 1 - out$p_value[sel]
out$p_value_LB[sel] = 1 - out$p_value_LB[sel]
out$p_value_UB[sel] = 1 - out$p_value_UB[sel]
}
run_time = ert(start)
out = list(
CT = out,
run_time = run_time,
CI_width = ci_width
)
return( out )
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.