coinflibs: Conditional Inference after Likelihood-based Selection

This function takes an limitObject, which is produced by the calculate_limits (or a list of limitObjects) and calculates the selective p-value / confidence intervals based on the given limitation and the true residual standard deviation sd. Since the true standard deviation is usually unknown in practice one can plug in an estimate for the true standard deviation. This approach, however, strongly depends on the goodness of the estimate and in particular produces unreliable results if the number of covariates is relatively large in comparison to the number of observations.

1	calculate_selinf(limitObject, y, sd, alpha = 0.05, ...)

`limitObject`	either an object of `class` `limitObject` (the result of the `calculate_limits` function) or a `list` of `limitObject`s
`y`	response vector
`sd`	standard deviation of error used for the p-value calculation (see details)
`alpha`	value for `(1-alpha)`-confidence interval construction. Defaults to `0.05`.
`...`	options passed to `ci_tnorm`.

library(MASS)
# Fit initial model
cpus$perf <- log10(cpus$perf)
mod <- lm(perf ~ .-name, data = cpus)

# use the stepAIC function to find the best model in a backward 
# stepwise search
cpus.lm <- stepAIC(mod, trace = FALSE, direction = "backward")

# recalculate all visited models
lom <- c(lapply(colnames(model.matrix(mod)), function(x) 
  update(mod, as.formula(paste0("perf ~ .-", x)))), list(mod))
  
# extract the components of all visited models
compsAIC <- extract_components(listOfModels = lom,
                               response = cpus$perf,
                               what = c("aic"))
                                 
# perform likelihood ratio test at level 
alpha = 0.001

# check for non-significant variables
coefTable <- summary(cpus.lm)$coefficients
drop <- rownames(coefTable)[alpha < coefTable[,4]]

# drop non-significant variable
cpus.lm2 <- update(cpus.lm, as.formula(paste0(".~.-",drop)))

# extract components associated with the LRT comparison
compsLRT <- extract_components(list(cpus.lm, cpus.lm2),
                               response = cpus$perf, 
                               what = "lrt",
                               alpha = alpha)
                               
# naive inference
unadj_pvs <- summary(cpus.lm2)$coefficients[,4]

# now extract testvector, calculate limits and perform selective tests
# test vectors

vTs <- extract_testvec(cpus.lm2)

# calculate limits
limitsAIC <- calculate_limits(compsAIC, vTs)
limitsLRT <- calculate_limits(compsLRT, vTs)

# check restriction on p-values separately
cbind(
calculate_selinf(limitObject = limitsAIC, y = cpus$perf, 
sd = summary(cpus.lm2)$sigma),
unadjusted_pval = unadj_pvs
)

cbind(
calculate_selinf(limitObject = limitsLRT, y = cpus$perf, 
sd = summary(cpus.lm2)$sigma),
unadjusted_pval = unadj_pvs
)

# calculate p-values (does automatically combine limitObjects)
res <- calculate_selinf(limitObject = list(limitsAIC, limitsLRT), 
                       y = cpus$perf, 
                       # plugin estimate for true value
                       sd = summary(cpus.lm2)$sigma)
                       
cbind(res, unadjusted_pval = unadj_pvs)


#########################################################
### Another example for a forward stepwise AIC search ###
#########################################################

library(MASS)
# use the cpus data
data("cpus")

# Fit initial model
cpus$perf <- log10(cpus$perf)
cpus$cach <- as.factor(cpus$cach)
cpus$name <- NULL
currentmod <- lm(perf ~ 1, data = cpus)

# make a stepwise AIC-based forward search
# for all variables in the pool of possible covariates
varsInPool <- colnames(cpus)[-7]

# since the stepAIC function does not provide the models
# fitted in each step, we have to do the search 'manually'
improvement <- TRUE
listOfModelComps <- list() 

# do the forward stepwise AIC search...
while(improvement & length(varsInPool)>0){

 # compute all other models
 allOtherMods <- lapply(varsInPool, function(thisvar) 
                        update(currentmod, as.formula(paste0(". ~ . + ", thisvar))))
                        
 # store all models that were examined in this step  
 listOfModels <- append(allOtherMods, list(currentmod))
 # save this list for later
 listOfModelComps <- append(listOfModelComps, list(listOfModels))
 
 # check the AIC of all models
 aics <- sapply(listOfModels, AIC)
 # what is the best model?
 (wmaic <- which.min(aics))
 # is there any improvement?
 if(wmaic == length(listOfModels)) improvement <- FALSE
 # redefine the current (best) model
 currentmod <- listOfModels[[wmaic]]
 # and update the variables available
 varsInPool <- varsInPool[-wmaic]

}

# variables left, which did not improve the model
varsInPool
# the final model call
currentmod$call

# get the test vector from the current model
vTs <- extract_testvec(limo = currentmod)

# extract list of model components in each step when comparisons
# are done based on the AIC
listOfComps <- lapply(listOfModelComps, function(lom) 
 extract_components(listOfModels = lom, response = cpus$perf, what = "aic"))
 
# calculate the truncation limits for each of the comparisons in each iteration
listOfLimits <- lapply(listOfComps, function(lom) 
  calculate_limits(comps = lom, vTs = vTs))
  
# now compute selective inference, which adjust for the forward stepwise AIC search 
# by supplying the lists of limits
calculate_selinf(limitObject = listOfLimits,
                 y = cpus$perf, 
                 sd = sigma(currentmod))
                 
########################################################
# now do that with the function provided in the package
########################################################

#' library(MASS)
# use the cpus data
data("cpus")

# Fit initial model
cpus$perf <- log10(cpus$perf)
cpus$cach <- as.factor(cpus$cach)
cpus$name <- NULL
currentmod <- lm(perf ~ 1, data = cpus)

res <- forwardAIC_adjustedInference(yname = "perf",
                                    data = cpus,
                                    mod = currentmod,
                                    var = NULL)
                 
res$inf