R/meta_ols.R

#' All possible subsets of regressions
#'
#' \code{meta_ols} returns the coefficients and t-values of all possible regression combinations
#'
#' This is a functions that provides the regression output of all combinations of k variables.
#' Note that this is not intended to improve predictions, but instead aims to investigate the
#' impact of the inclusion of a specific variable in a regression and as such can even be seen
#' as a meta-analysis looking at the response surface of all individual variables in the dataset.
#'
#' @param formula An R formula
#' @param fixed A vector of strings for variables that do not need to vary (default = NA)
#' @param data The dataframe to be used
#' @param k Number of variables in each regression that vary (default = 4)
#'
#' @return A list of two dataframes, one with the coefficients and one with t-values of the variables
#' @seealso \code{\link{lm}} \code{\link{coef}}
#' @importFrom stats lm coef model.frame model.matrix
#' @importFrom utils combn
#' @export
#'
#' @examples
#' result <- meta_ols(mpg ~., data = mtcars, k = 4)
meta_ols <- function(formula, fixed = NA, data, k = 4){

  X <- model.matrix(formula, data = data)
  y <- as.matrix(model.frame(formula, data = data)[1])

  X <- X[ ,2:ncol(X)] # remove constant

  if ( is.na(fixed[1]) ){
    K <- ncol(X)
    f <- 0
    X_var <- X
    names_X <- colnames(X_var)
  } else {
    d_fixed <- data[fixed]
    fixed_index <- colnames(d_fixed)
    var_index <- !colnames(X) %in% fixed
    X_var <- X[ , var_index]
    f <- length(fixed_index)
    K <- ncol(X_var)
    names_X <- c(fixed_index, colnames(X_var))
  }

  models <- combn(K, k)

  nr_poss <- ncol(models)
  coeff_mat   <- matrix(NA, nrow = nr_poss, ncol = f + K + 1 )
  t_mat      <- matrix(NA, nrow = nr_poss, ncol = f + K + 1 )

  if ( is.na(fixed[1]) ){
    for(i in 1:ncol(models)){
      m <- lm(y ~ X_var[ ,models[ ,i]])
      coeff_mat[i , c(1 , (models[ ,i] + 1) ) ]  <- m$coefficients
      t_mat[i , c(1 , (models[ ,i] + 1 ) ) ]  <- m$coefficients/coef(summary(m))[,2]
    }
  } else {
    for(i in 1:ncol(models)){
      m <- lm(y ~ as.matrix(d_fixed[, fixed_index]) + X_var[ ,models[ ,i]])
      coeff_mat[i , c(1: (1+f) , (models[ ,i] + 1 + f) ) ]  <- m$coefficients
      t_mat[i , c(1: (1+f) , (models[ ,i] + 1 + f) ) ]  <- m$coefficients/coef(summary(m))[,2]
    }
  }

  colnames(coeff_mat) <- c("constant", names_X)
  colnames(t_mat) <- c("constant", names_X)

  return(list(coeff_mat, t_mat))

}

#' How many regressions are needed
#'
#' \code{how_many_reg} returns the number of regressions
#'
#' This functions gives an indication of the time needed for the total analysis
#'
#' @param formula An R formula
#' @param fixed A vector of strings for variables that do not need to vary (default = NA)
#' @param data The dataframe to be used
#' @param k Number of variables in each regression that vary (default = 4)
#'
#' @return Number of possible combinations
#' @export
#'
#' @examples
#' how_many_reg(mpg ~., data = mtcars, k = 4)
how_many_reg <- function(formula, fixed = NA, data, k = 4){

  X <- model.matrix(formula, data = data)

  if ( is.na(fixed[1]) ){
    n <- ncol(X)- 1 - length(fixed)
  } else {
    n <- ncol(X) - 1
  }
  choose( n , k )
}
Thdegraaff/metareg documentation built on May 6, 2019, 8:02 p.m.