#' All possible subsets of regressions
#'
#' \code{meta_ols} returns the coefficients and t-values of all possible regression combinations
#'
#' This is a functions that provides the regression output of all combinations of k variables.
#' Note that this is not intended to improve predictions, but instead aims to investigate the
#' impact of the inclusion of a specific variable in a regression and as such can even be seen
#' as a meta-analysis looking at the response surface of all individual variables in the dataset.
#'
#' @param formula An R formula
#' @param fixed A vector of strings for variables that do not need to vary (default = NA)
#' @param data The dataframe to be used
#' @param k Number of variables in each regression that vary (default = 4)
#'
#' @return A list of two dataframes, one with the coefficients and one with t-values of the variables
#' @seealso \code{\link{lm}} \code{\link{coef}}
#' @importFrom stats lm coef model.frame model.matrix
#' @importFrom utils combn
#' @export
#'
#' @examples
#' result <- meta_ols(mpg ~., data = mtcars, k = 4)
meta_ols <- function(formula, fixed = NA, data, k = 4){
X <- model.matrix(formula, data = data)
y <- as.matrix(model.frame(formula, data = data)[1])
X <- X[ ,2:ncol(X)] # remove constant
if ( is.na(fixed[1]) ){
K <- ncol(X)
f <- 0
X_var <- X
names_X <- colnames(X_var)
} else {
d_fixed <- data[fixed]
fixed_index <- colnames(d_fixed)
var_index <- !colnames(X) %in% fixed
X_var <- X[ , var_index]
f <- length(fixed_index)
K <- ncol(X_var)
names_X <- c(fixed_index, colnames(X_var))
}
models <- combn(K, k)
nr_poss <- ncol(models)
coeff_mat <- matrix(NA, nrow = nr_poss, ncol = f + K + 1 )
t_mat <- matrix(NA, nrow = nr_poss, ncol = f + K + 1 )
if ( is.na(fixed[1]) ){
for(i in 1:ncol(models)){
m <- lm(y ~ X_var[ ,models[ ,i]])
coeff_mat[i , c(1 , (models[ ,i] + 1) ) ] <- m$coefficients
t_mat[i , c(1 , (models[ ,i] + 1 ) ) ] <- m$coefficients/coef(summary(m))[,2]
}
} else {
for(i in 1:ncol(models)){
m <- lm(y ~ as.matrix(d_fixed[, fixed_index]) + X_var[ ,models[ ,i]])
coeff_mat[i , c(1: (1+f) , (models[ ,i] + 1 + f) ) ] <- m$coefficients
t_mat[i , c(1: (1+f) , (models[ ,i] + 1 + f) ) ] <- m$coefficients/coef(summary(m))[,2]
}
}
colnames(coeff_mat) <- c("constant", names_X)
colnames(t_mat) <- c("constant", names_X)
return(list(coeff_mat, t_mat))
}
#' How many regressions are needed
#'
#' \code{how_many_reg} returns the number of regressions
#'
#' This functions gives an indication of the time needed for the total analysis
#'
#' @param formula An R formula
#' @param fixed A vector of strings for variables that do not need to vary (default = NA)
#' @param data The dataframe to be used
#' @param k Number of variables in each regression that vary (default = 4)
#'
#' @return Number of possible combinations
#' @export
#'
#' @examples
#' how_many_reg(mpg ~., data = mtcars, k = 4)
how_many_reg <- function(formula, fixed = NA, data, k = 4){
X <- model.matrix(formula, data = data)
if ( is.na(fixed[1]) ){
n <- ncol(X)- 1 - length(fixed)
} else {
n <- ncol(X) - 1
}
choose( n , k )
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.