Description Usage Arguments Examples
This function takes an limitObject
, which is produced by the
calculate_limits
(or a list of limitObject
s) and calculates
the selective p-value / confidence intervals based on the given limitation and the true
residual standard deviation sd
. Since the true standard deviation is usually unknown in practice
one can plug in an estimate for the true standard deviation. This approach, however,
strongly depends on the goodness of the estimate and in particular produces unreliable results
if the number of covariates is relatively large in comparison to the number of observations.
1 | calculate_selinf(limitObject, y, sd, alpha = 0.05, ...)
|
limitObject |
either an object of |
y |
response vector |
sd |
standard deviation of error used for the p-value calculation (see details) |
alpha |
value for |
... |
options passed to |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 | library(MASS)
# Fit initial model
cpus$perf <- log10(cpus$perf)
mod <- lm(perf ~ .-name, data = cpus)
# use the stepAIC function to find the best model in a backward
# stepwise search
cpus.lm <- stepAIC(mod, trace = FALSE, direction = "backward")
# recalculate all visited models
lom <- c(lapply(colnames(model.matrix(mod)), function(x)
update(mod, as.formula(paste0("perf ~ .-", x)))), list(mod))
# extract the components of all visited models
compsAIC <- extract_components(listOfModels = lom,
response = cpus$perf,
what = c("aic"))
# perform likelihood ratio test at level
alpha = 0.001
# check for non-significant variables
coefTable <- summary(cpus.lm)$coefficients
drop <- rownames(coefTable)[alpha < coefTable[,4]]
# drop non-significant variable
cpus.lm2 <- update(cpus.lm, as.formula(paste0(".~.-",drop)))
# extract components associated with the LRT comparison
compsLRT <- extract_components(list(cpus.lm, cpus.lm2),
response = cpus$perf,
what = "lrt",
alpha = alpha)
# naive inference
unadj_pvs <- summary(cpus.lm2)$coefficients[,4]
# now extract testvector, calculate limits and perform selective tests
# test vectors
vTs <- extract_testvec(cpus.lm2)
# calculate limits
limitsAIC <- calculate_limits(compsAIC, vTs)
limitsLRT <- calculate_limits(compsLRT, vTs)
# check restriction on p-values separately
cbind(
calculate_selinf(limitObject = limitsAIC, y = cpus$perf,
sd = summary(cpus.lm2)$sigma),
unadjusted_pval = unadj_pvs
)
cbind(
calculate_selinf(limitObject = limitsLRT, y = cpus$perf,
sd = summary(cpus.lm2)$sigma),
unadjusted_pval = unadj_pvs
)
# calculate p-values (does automatically combine limitObjects)
res <- calculate_selinf(limitObject = list(limitsAIC, limitsLRT),
y = cpus$perf,
# plugin estimate for true value
sd = summary(cpus.lm2)$sigma)
cbind(res, unadjusted_pval = unadj_pvs)
#########################################################
### Another example for a forward stepwise AIC search ###
#########################################################
library(MASS)
# use the cpus data
data("cpus")
# Fit initial model
cpus$perf <- log10(cpus$perf)
cpus$cach <- as.factor(cpus$cach)
cpus$name <- NULL
currentmod <- lm(perf ~ 1, data = cpus)
# make a stepwise AIC-based forward search
# for all variables in the pool of possible covariates
varsInPool <- colnames(cpus)[-7]
# since the stepAIC function does not provide the models
# fitted in each step, we have to do the search 'manually'
improvement <- TRUE
listOfModelComps <- list()
# do the forward stepwise AIC search...
while(improvement & length(varsInPool)>0){
# compute all other models
allOtherMods <- lapply(varsInPool, function(thisvar)
update(currentmod, as.formula(paste0(". ~ . + ", thisvar))))
# store all models that were examined in this step
listOfModels <- append(allOtherMods, list(currentmod))
# save this list for later
listOfModelComps <- append(listOfModelComps, list(listOfModels))
# check the AIC of all models
aics <- sapply(listOfModels, AIC)
# what is the best model?
(wmaic <- which.min(aics))
# is there any improvement?
if(wmaic == length(listOfModels)) improvement <- FALSE
# redefine the current (best) model
currentmod <- listOfModels[[wmaic]]
# and update the variables available
varsInPool <- varsInPool[-wmaic]
}
# variables left, which did not improve the model
varsInPool
# the final model call
currentmod$call
# get the test vector from the current model
vTs <- extract_testvec(limo = currentmod)
# extract list of model components in each step when comparisons
# are done based on the AIC
listOfComps <- lapply(listOfModelComps, function(lom)
extract_components(listOfModels = lom, response = cpus$perf, what = "aic"))
# calculate the truncation limits for each of the comparisons in each iteration
listOfLimits <- lapply(listOfComps, function(lom)
calculate_limits(comps = lom, vTs = vTs))
# now compute selective inference, which adjust for the forward stepwise AIC search
# by supplying the lists of limits
calculate_selinf(limitObject = listOfLimits,
y = cpus$perf,
sd = sigma(currentmod))
########################################################
# now do that with the function provided in the package
########################################################
#' library(MASS)
# use the cpus data
data("cpus")
# Fit initial model
cpus$perf <- log10(cpus$perf)
cpus$cach <- as.factor(cpus$cach)
cpus$name <- NULL
currentmod <- lm(perf ~ 1, data = cpus)
res <- forwardAIC_adjustedInference(yname = "perf",
data = cpus,
mod = currentmod,
var = NULL)
res$inf
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.