#' Conditional inference
#'
#' Conditional inference for lm and glm models
#' @param object An lm() or glm() object
#' @param cond.data Optional, a dataframe for the conditioning set; set as all covariates in the lm() or glm() object formula if not provided
#' @param param Optional, a vector of coefficients to conduct conditional inference; fit all coefficients if not provided; can be a mixture of string name and index
#' @param alg Optinal, a string for name of algorithm, current options are 'loess' and 'grf'
#' @param random.seed Optional, random seed for sample splitting
#' @param other.params Optional, other parameters for the regression algorithm; can include span and degree for loess
#' @param folds Optional, a list of two folds of indices for sample splitting; can be useful to control sample splitting
#' @param verbose Optional, whether or not to print summary of inference; TRUE by default
#' @return Standard error for conditional parameter, super-population parameter, fitted empirical parameter, confidence interval for conditional parameter
#' @examples
#' X = matrix(rnorm(1000*10), nrow=1000)
#' Y = X %*% matrix(c(1,2,3,rep(0,10-3)), ncol=1) + rnorm(1000) * 0.1
#' Z = data.frame(X[,1:2])
#' lm.mdl = lm(Y~., data = data.frame(X))
#' cond.inf(lm.mdl, Z, c("X2",2))
#'
#' X = matrix(rnorm(1000*10), nrow=1000)
#' logit.x = X %*% matrix(c(1,2,3,rep(0,10-3)), ncol=1) + X[,1]**2 + rnorm(1000) * 0.1
#' Y = rbinom(n, 1, exp(logit.x)/(1+exp(logit.x)))
#' Z = data.frame(X[,1:2])
#' glm.mdl = glm(Y~., data = data.frame(X), family='binomial')
#' cond.inf(glm.mdl, cond.data=Z)
#' cond.inf(glm.mdl, cond.data=Z, c(1, "X1", "X2"), alg='grf')
#'
#' @export
cond.inf <- function(object,cond.data=NULL,param=NULL,alg="loess",
random.seed=NULL,other.params=NULL,
folds=NULL,verbose=TRUE){
infl = influence(object)
coefs = coef(object)
if (!is.null(random.seed)){
set.seed(random.seed)
}
# if cond.data is not given, then use the covariates in the formula
if (is.null(cond.data)){
all.coef = names(object$coefficients)[2:length(object$coefficients)]
cond.data = object$model[all.coef]
}
# conditional inference for all coefficients by default
if (is.null(param)){param = 1:length(object$coefficients)}
names = param
for (i.par in 1:length(param)){
if (!is.na(suppressWarnings(as.integer(param[i.par])))){
names[i.par] = names(object$coefficients)[as.integer(param[i.par])]
}
}
n = length(object$fitted.values)
infl.vals = matrix(n * infl$coefficients[,names], ncol=length(param))
fitted.coef = coefs[names]
hat.sigmas = rep(0, length(param))
sup.sds = rep(0, length(param))
ci.lows = rep(0, length(param))
ci.his = rep(0, length(param))
# sample splitting for cross fitting
if (is.null(folds)){
fold1 = sample(1:n, floor(n/2))
}else{
fold1 = folds[[1]]
}
fold2 = setdiff(1:n, fold1)
cond.data1 = data.frame(cond.data[fold1,])
colnames(cond.data1) = colnames(cond.data)
cond.data2 = data.frame(cond.data[fold2,])
colnames(cond.data2) = colnames(cond.data)
for (i.par in 1:length(param)){
fit.Zr = infl.vals
# LOESS regression
if (alg == 'loess'){
loess.span = ifelse(is.null(other.params$span), 0.75, other.params$span)
loess.deg = ifelse(is.null(other.params$degree), 2, other.params$degree)
# cross-fit nonparametric regression models
Zr.1 = loess(infl.vals[fold2,i.par]~., data=cond.data2,
span=loess.span, degree=loess.deg)
Zr.2 = loess(infl.vals[fold1,i.par]~., data=cond.data1,
span=loess.span, degree=loess.deg)
fit.Zr[fold1,i.par] = predict(Zr.1, cond.data1)
fit.Zr[fold2,i.par] = predict(Zr.2, cond.data2)
}
# random forest regression
if (alg == 'grf'){
# cross-fit nonparametric regression models
Zr.1 = regression_forest(cond.data2, infl.vals[fold2,i.par], num.threads=1)
Zr.2 = regression_forest(cond.data1, infl.vals[fold1,i.par], num.threads=1)
fit.Zr[fold1,i.par] = predict(Zr.1, cond.data1)$predictions
fit.Zr[fold2,i.par] = predict(Zr.2, cond.data2)$predictions
}
hat.sigmas[i.par] = min(sd(infl.vals[,i.par]),
sd(infl.vals[,i.par] - fit.Zr[,i.par], na.rm=TRUE))
sup.sds[i.par] = sd(infl.vals[,i.par])
ci.lows[i.par] = fitted.coef[i.par] - qnorm(0.975) * hat.sigmas[i.par] / sqrt(n)
ci.his[i.par] = fitted.coef[i.par] + qnorm(0.975) * hat.sigmas[i.par] / sqrt(n)
}
# print the results
sup.p_vals = 2 * pnorm(abs(fitted.coef) / (sup.sds/sqrt(n)), lower.tail=FALSE)
cond.p_vals = 2 * pnorm(abs(fitted.coef) / (hat.sigmas/sqrt(n)), lower.tail=FALSE)
ret_table = cbind(fitted.coef, hat.sigmas, cond.p_vals, sup.sds, sup.p_vals)
colnames(ret_table) = c("Estimate", "Cond. Std. Error", "Cond. Pr(>|z|)",
"Sup. Std. Error", "Sup. Pr(>|z|)")
rownames(ret_table) = names
if (verbose){
cat("\n")
cat("Summary of conditional inference")
cat("\n\n")
print(ret_table)
cat("\n")
}
invisible(list("cond.std.err" = hat.sigmas, "std.err" = sup.sds,
"fitted.coef" = fitted.coef,
"cond.ci.low" = ci.lows, "cond.ci.upp" = ci.his,
"summary" = ret_table))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.