calculate_selinf: Calculate p-values / confidence intervals after...

Description Usage Arguments Examples

Description

This function takes an limitObject, which is produced by the calculate_limits (or a list of limitObjects) and calculates the selective p-value / confidence intervals based on the given limitation and the true residual standard deviation sd. Since the true standard deviation is usually unknown in practice one can plug in an estimate for the true standard deviation. This approach, however, strongly depends on the goodness of the estimate and in particular produces unreliable results if the number of covariates is relatively large in comparison to the number of observations.

Usage

1
calculate_selinf(limitObject, y, sd, alpha = 0.05, ...)

Arguments

limitObject

either an object of class limitObject (the result of the calculate_limits function) or a list of limitObjects

y

response vector

sd

standard deviation of error used for the p-value calculation (see details)

alpha

value for (1-alpha)-confidence interval construction. Defaults to 0.05.

...

options passed to ci_tnorm.

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
library(MASS)
# Fit initial model
cpus$perf <- log10(cpus$perf)
mod <- lm(perf ~ .-name, data = cpus)

# use the stepAIC function to find the best model in a backward 
# stepwise search
cpus.lm <- stepAIC(mod, trace = FALSE, direction = "backward")

# recalculate all visited models
lom <- c(lapply(colnames(model.matrix(mod)), function(x) 
  update(mod, as.formula(paste0("perf ~ .-", x)))), list(mod))
  
# extract the components of all visited models
compsAIC <- extract_components(listOfModels = lom,
                               response = cpus$perf,
                               what = c("aic"))
                                 
# perform likelihood ratio test at level 
alpha = 0.001

# check for non-significant variables
coefTable <- summary(cpus.lm)$coefficients
drop <- rownames(coefTable)[alpha < coefTable[,4]]

# drop non-significant variable
cpus.lm2 <- update(cpus.lm, as.formula(paste0(".~.-",drop)))

# extract components associated with the LRT comparison
compsLRT <- extract_components(list(cpus.lm, cpus.lm2),
                               response = cpus$perf, 
                               what = "lrt",
                               alpha = alpha)
                               
# naive inference
unadj_pvs <- summary(cpus.lm2)$coefficients[,4]

# now extract testvector, calculate limits and perform selective tests
# test vectors

vTs <- extract_testvec(cpus.lm2)

# calculate limits
limitsAIC <- calculate_limits(compsAIC, vTs)
limitsLRT <- calculate_limits(compsLRT, vTs)

# check restriction on p-values separately
cbind(
calculate_selinf(limitObject = limitsAIC, y = cpus$perf, 
sd = summary(cpus.lm2)$sigma),
unadjusted_pval = unadj_pvs
)

cbind(
calculate_selinf(limitObject = limitsLRT, y = cpus$perf, 
sd = summary(cpus.lm2)$sigma),
unadjusted_pval = unadj_pvs
)

# calculate p-values (does automatically combine limitObjects)
res <- calculate_selinf(limitObject = list(limitsAIC, limitsLRT), 
                       y = cpus$perf, 
                       # plugin estimate for true value
                       sd = summary(cpus.lm2)$sigma)
                       
cbind(res, unadjusted_pval = unadj_pvs)


#########################################################
### Another example for a forward stepwise AIC search ###
#########################################################

library(MASS)
# use the cpus data
data("cpus")

# Fit initial model
cpus$perf <- log10(cpus$perf)
cpus$cach <- as.factor(cpus$cach)
cpus$name <- NULL
currentmod <- lm(perf ~ 1, data = cpus)

# make a stepwise AIC-based forward search
# for all variables in the pool of possible covariates
varsInPool <- colnames(cpus)[-7]

# since the stepAIC function does not provide the models
# fitted in each step, we have to do the search 'manually'
improvement <- TRUE
listOfModelComps <- list() 

# do the forward stepwise AIC search...
while(improvement & length(varsInPool)>0){

 # compute all other models
 allOtherMods <- lapply(varsInPool, function(thisvar) 
                        update(currentmod, as.formula(paste0(". ~ . + ", thisvar))))
                        
 # store all models that were examined in this step  
 listOfModels <- append(allOtherMods, list(currentmod))
 # save this list for later
 listOfModelComps <- append(listOfModelComps, list(listOfModels))
 
 # check the AIC of all models
 aics <- sapply(listOfModels, AIC)
 # what is the best model?
 (wmaic <- which.min(aics))
 # is there any improvement?
 if(wmaic == length(listOfModels)) improvement <- FALSE
 # redefine the current (best) model
 currentmod <- listOfModels[[wmaic]]
 # and update the variables available
 varsInPool <- varsInPool[-wmaic]

}

# variables left, which did not improve the model
varsInPool
# the final model call
currentmod$call

# get the test vector from the current model
vTs <- extract_testvec(limo = currentmod)

# extract list of model components in each step when comparisons
# are done based on the AIC
listOfComps <- lapply(listOfModelComps, function(lom) 
 extract_components(listOfModels = lom, response = cpus$perf, what = "aic"))
 
# calculate the truncation limits for each of the comparisons in each iteration
listOfLimits <- lapply(listOfComps, function(lom) 
  calculate_limits(comps = lom, vTs = vTs))
  
# now compute selective inference, which adjust for the forward stepwise AIC search 
# by supplying the lists of limits
calculate_selinf(limitObject = listOfLimits,
                 y = cpus$perf, 
                 sd = sigma(currentmod))
                 
########################################################
# now do that with the function provided in the package
########################################################

#' library(MASS)
# use the cpus data
data("cpus")

# Fit initial model
cpus$perf <- log10(cpus$perf)
cpus$cach <- as.factor(cpus$cach)
cpus$name <- NULL
currentmod <- lm(perf ~ 1, data = cpus)

res <- forwardAIC_adjustedInference(yname = "perf",
                                    data = cpus,
                                    mod = currentmod,
                                    var = NULL)
                 
res$inf

davidruegamer/coinflibs documentation built on May 14, 2019, 10:33 a.m.