faux: Simulation for Factorial Designs

Documented in add_contrast contr_code_anova contr_code_difference contr_code_helmert contr_code_poly contr_code_sum contr_code_treatment get_contrast_vals

#' Anova code a factor
#' 
#' Anova coding (also called deviation or simple coding) sets the grand mean as the intercept. 
#' Each contrast compares one level with the reference level (base).
#'
#' @param fct the factor to contrast code (or a vector)
#' @param levels the levels of the factor in order
#' @param base the index of the level to use as baseline
#' @param colnames optional list of column names for the added contrasts
#'
#' @return the factor with contrasts set
#' @export
#'
#' @examples
#' df <- sim_design(between = list(pet = c("cat", "dog")), 
#'                  mu = c(10, 20), plot = FALSE)
#' df$pet <- contr_code_anova(df$pet)
#' lm(y ~ pet, df) %>% broom::tidy()
#' 
#' df <- sim_design(between = list(pet = c("cat", "dog", "ferret")), 
#'                  mu = c(2, 4, 9), empirical = TRUE, plot = FALSE)
#'                  
#' df$pet <- contr_code_anova(df$pet, base = 1)
#' lm(y ~ pet, df) %>% broom::tidy()
#' 
#' df$pet <- contr_code_anova(df$pet, base = 2)
#' lm(y ~ pet, df) %>% broom::tidy()
#' 
#' df$pet <- contr_code_anova(df$pet, base = "ferret")
#' lm(y ~ pet, df) %>% broom::tidy()
contr_code_anova <- function(fct, levels = NULL, base = 1, colnames = NULL) {
  # make sure fct is a factor with correct levels
  if (is.null(levels)) levels <- levels(fct) %||% sort(unique(fct))
  fct <- factor(fct, levels)
  
  # create coding matrix
  n <- length(levels)
  if (!is.numeric(base)) base <- which(levels == base)
  tr_code <- stats::contr.treatment(n, base)
  my_code <- tr_code - 1/n
  
  # create column names
  colnames <- colnames %||% paste0("." , levels[-base], "-", levels[base])
  n_cols <- ncol(my_code)
  if (length(colnames) != n_cols) colnames <- paste0(rep(colnames, n_cols), 1:n_cols)
  dimnames(my_code) <- list(levels, colnames)
  
  # set contrast and return factor
  stats::contrasts(fct) <- my_code
  fct
}


#' Sum code a factor
#' 
#' Sum coding sets the grand mean as the intercept. 
#' Each contrast compares one level with the grand mean.
#'
#' @param fct the factor to contrast code (or a vector)
#' @param levels the levels of the factor in order
#' @param omit the level to omit (defaults to the last level)
#' @param colnames optional list of column names for the added contrasts
#'
#' @return the factor with contrasts set
#'
#' @export
#'
#' @examples
#' df <- sim_design(between = list(pet = c("cat", "dog", "bird", "ferret")), 
#'                  mu = c(2, 4, 9, 13), empirical = TRUE, plot = FALSE)
#' 
#' df$pet <- contr_code_sum(df$pet)
#' lm(y ~ pet, df) %>% broom::tidy()
#' 
#' df$pet <- contr_code_sum(df$pet, omit = "cat")
#' lm(y ~ pet, df) %>% broom::tidy()
#' 
#' df$pet <- contr_code_sum(df$pet, omit = 1)
#' lm(y ~ pet, df) %>% broom::tidy()
contr_code_sum <- function(fct, levels = NULL, omit = length(levels), colnames = NULL) {
  # make sure fct is a factor with correct levels
  if (is.null(levels)) levels <- levels(fct) %||% sort(unique(fct))
  fct <- factor(fct, levels)
  
  # create coding matrix
  n <- length(levels)
  if (!is.numeric(omit)) omit <- which(levels == omit)
  my_code <- stats::contr.sum(n)
  if (n != omit) {
    reorder <- c(setdiff(1:n, omit), omit)
    reorder <- sapply(1:n, function(x) which(x == reorder))
    my_code <- my_code[reorder, , drop = FALSE]
  }
  
  # create column names
  colnames <- colnames %||% paste0("." , levels[-omit], "-intercept")
  n_cols <- ncol(my_code)
  if (length(colnames) != n_cols) colnames <- paste0(rep(colnames, n_cols), 1:n_cols)
  dimnames(my_code) <- list(levels, colnames)
  
  # set contrast and return factor
  stats::contrasts(fct) <- my_code
  fct
}


#' Treatment code a factor
#' 
#' Treatment coding sets the mean of the reference level (base) as the intercept. 
#' Each contrast compares one level with the reference level.
#'
#' @param fct the factor to contrast code (or a vector)
#' @param levels the levels of the factor in order
#' @param base the index of the level to use as baseline
#' @param colnames optional list of column names for the added contrasts
#'
#' @return the factor with contrasts set
#' @export
#'
#' @examples
#' df <- sim_design(between = list(pet = c("cat", "dog")), 
#'                  mu = c(10, 20), plot = FALSE)
#' df$pet <- contr_code_treatment(df$pet)
#' lm(y ~ pet, df) %>% broom::tidy()
#' 
#' df <- sim_design(between = list(pet = c("cat", "dog", "ferret")), 
#'                  mu = c(2, 4, 9), empirical = TRUE, plot = FALSE)
#'                  
#' df$pet <- contr_code_treatment(df$pet)
#' lm(y ~ pet, df) %>% broom::tidy()
#' 
#' df$pet <- contr_code_treatment(df$pet, base = 2)
#' lm(y ~ pet, df) %>% broom::tidy()
#' 
#' df$pet <- contr_code_treatment(df$pet, base = "ferret")
#' lm(y ~ pet, df) %>% broom::tidy()
contr_code_treatment <- function(fct, levels = NULL, base = 1, colnames = NULL) {
  # make sure fct is a factor with correct levels
  if (is.null(levels)) levels <- levels(fct) %||% sort(unique(fct))
  fct <- factor(fct, levels)
  
  # create coding matrix
  n <- length(levels)
  if (!is.numeric(base)) base <- which(levels == base)
  my_code <- stats::contr.treatment(n, base)
  
  # create column names
  colnames <- colnames %||% paste0("." , levels[-base], "-", levels[base])
  n_cols <- ncol(my_code)
  if (length(colnames) != n_cols) colnames <- paste0(rep(colnames, n_cols), 1:n_cols)
  dimnames(my_code) <- list(levels, colnames)
  
  # set contrast and return factor
  stats::contrasts(fct) <- my_code
  fct
}



#' Helmert code a factor
#' 
#' Helmert coding sets the grand mean as the intercept. 
#' Each contrast compares one level with the mean of previous levels.
#'
#' @param fct the factor to contrast code (or a vector)
#' @param levels the levels of the factor in order
#' @param colnames optional list of column names for the added contrasts
#'
#' @return the factor with contrasts set
#' @export
#'
#' @examples
#' df <- sim_design(between = list(pet = c("cat", "dog")), 
#'                  mu = c(10, 20), plot = FALSE)
#' df$pet <- contr_code_helmert(df$pet)
#' lm(y ~ pet, df) %>% broom::tidy()
#' 
#' df <- sim_design(between = list(pet = c("cat", "dog", "ferret")), 
#'                  mu = c(2, 4, 9), empirical = TRUE, plot = FALSE)
#'                  
#' df$pet <- contr_code_helmert(df$pet)
#' lm(y ~ pet, df) %>% broom::tidy()
#' 
#' # reorder the levels to change the comparisons
#' df$pet <- contr_code_helmert(df$pet, levels = c("dog", "cat", "ferret"))
#' lm(y ~ pet, df) %>% broom::tidy()
#' 
#' df$pet <- contr_code_helmert(df$pet, levels = c("ferret", "dog", "cat"))
#' lm(y ~ pet, df) %>% broom::tidy()
contr_code_helmert <- function(fct, levels = NULL, colnames = NULL) {
  # make sure fct is a factor with correct levels
  if (is.null(levels)) levels <- levels(fct) %||% sort(unique(fct))
  fct <- factor(fct, levels)
  
  # create coding matrix
  n <- length(levels)
  my_code <- stats::contr.helmert(n)
  for (i in 1:(n-1)) {
    my_code[, i] <- my_code[, i] / (i + 1)
  }
  
  # create column names
  comparison <- lapply(1:(n-1), function(x) {
    paste(levels[1:x], collapse = ".")
  })
  colnames <- colnames %||% paste0("." , levels[-1], "-", comparison)
  n_cols <- ncol(my_code)
  if (length(colnames) != n_cols) colnames <- paste0(rep(colnames, n_cols), 1:n_cols)
  dimnames(my_code) <- list(levels, colnames)
  
  # set contrast and return factor
  stats::contrasts(fct) <- my_code
  fct
}


#' Polynomial code a factor
#' 
#' Polynomial coding sets the grand mean as the intercept. 
#' Each contrast tests a trend (linear, quadratic, cubic, etc.). This is only suitable for ordered factors.
#'
#' @param fct the factor to contrast code (or a vector)
#' @param levels the levels of the factor in order
#' @param colnames optional list of column names for the added contrasts
#'
#' @return the factor with contrasts set
#' @export
#'
#' @examples
#' df <- sim_design(within = list(time = 1:6), 
#'                  mu = 1:6 + (1:6-3.5)^2, 
#'                  long = TRUE, plot = FALSE)
#'                  
#' df$time <- contr_code_poly(df$time)
#' lm(y ~ time, df) %>% broom::tidy()
#' 
contr_code_poly <- function(fct, levels = NULL, colnames = NULL) {
  # make sure fct is an ordered factor with correct levels
  if (is.null(levels)) levels <- levels(fct) %||% sort(unique(fct))
  fct <- factor(fct, levels, ordered = TRUE)
  
  # create coding matrix
  n <- length(levels)
  my_code <- stats::contr.poly(n)
  
  # create column names
  colnames <- colnames %||% paste0("^", 1:(n-1))
  n_cols <- ncol(my_code)
  if (length(colnames) != n_cols) colnames <- paste0(rep(colnames, n_cols), 1:n_cols)
  dimnames(my_code) <- list(levels, colnames)
  
  # set contrast and return factor
  stats::contrasts(fct) <- my_code
  fct
}


#' Difference code a factor
#' 
#' Difference coding sets the grand mean as the intercept. 
#' Each contrast compares one level with the previous level.
#'
#' @param fct the factor to contrast code (or a vector)
#' @param levels the levels of the factor in order
#' @param colnames optional list of column names for the added contrasts
#'
#' @return the factor with contrasts set
#' @export
#'
#' @examples
#' df <- sim_design(between = list(pet = c("cat", "dog", "ferret")), 
#'                  mu = c(2, 4, 9), empirical = TRUE, plot = FALSE)
#'                  
#' df$pet <- contr_code_difference(df$pet)
#' lm(y ~ pet, df) %>% broom::tidy()
#' 
contr_code_difference <- function(fct, levels = NULL, colnames = NULL) {
  # make sure fct is a factor with correct levels
  if (is.null(levels)) levels <- levels(fct) %||% sort(unique(fct))
  fct <- factor(fct, levels)
  
  # create coding matrix
  n <- length(levels)
  dif_code <- .col(n:(n-1))
  upper.tri <- !lower.tri(dif_code)
  dif_code[upper.tri] <- dif_code[upper.tri] - n
  my_code <- dif_code / n
  
  # create column names
  colnames <- colnames %||% paste0("." , levels[2:n], "-", levels[1:(n-1)])
  n_cols <- ncol(my_code)
  if (length(colnames) != n_cols) colnames <- paste0(rep(colnames, n_cols), 1:n_cols)
  dimnames(my_code) <- list(levels, colnames)
  
  # set contrast and return factor
  stats::contrasts(fct) <- my_code
  fct
}

#' Add a contrast to a data frame
#'
#' @param data the data frame
#' @param col the column to recode
#' @param contrast the contrast to recode to
#' @param levels the levels of the factor in order
#' @param ... arguments to pass to the contrast function (base or omit)
#' @param add_cols whether to just add the contrast to the existing column or also to create new explicit columns in the dataset (default)
#' @param colnames optional list of column names for the added contrasts
#'
#' @return the data frame with the recoded column and added columns (if add_cols == TRUE)
#' @export
#'
#' @examples
#' df <- sim_design(between = list(time = 1:6), plot = FALSE) %>%
#'    add_contrast("time", "poly")
#' 
#' # test all polynomial contrasts
#' lm(y ~ time, df) %>% broom::tidy()
#' 
#' # test only the linear and quadratic contrasts
#' lm(y ~ `time^1` + `time^2`, df) %>% broom::tidy()
add_contrast <- function(data, col, contrast = c("anova", "sum", "treatment", "helmert", "poly", "difference"), levels = NULL, ..., add_cols = TRUE, colnames = NULL) {
  fct <- data[[col]]
  if (is.null(fct)) stop("The column ", col, " doesn't exist", call. = FALSE)
  contrast <- match.arg(contrast)
  
  f <- match.fun(paste0("contr_code_", contrast))
  newfct <- f(fct, levels, colnames = colnames, ...)
  data[col] <- newfct
  
  if (isTRUE(add_cols)) {
    newvals <- get_contrast_vals(newfct)
    if (is.null(colnames)) {
      colnames(newvals) <- paste0(col, colnames(newvals))
    }
    
    data[colnames(newvals)] <- newvals
  }

  data
}

#' Get contrast values
#' 
#' Get a data frame of contrast values from a factor vector
#'
#' @param v a factor vector
#'
#' @return a data frame
#' @export
#'
#' @examples
#' dat <- sim_design(
#'   between = list(group = c("A", "B")), 
#'   n = 5, plot = FALSE)
#' 
#' get_contrast_vals(dat$group)
#' 
get_contrast_vals <- function(v) {
  contr <- stats::contrasts(v)
  recodes <- apply(contr, 2, function(x) {
    recodings <- stats::setNames(as.vector(x), rownames(contr))
    dplyr::recode(v, !!!recodings)
  })
  as.data.frame(recodes)
}

debruine/faux documentation built on Jan. 18, 2025, 2:29 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

debruine/faux
Simulation for Factorial Designs

R/contrasts.R
In debruine/faux: Simulation for Factorial Designs

Defines functions get_contrast_vals add_contrast contr_code_difference contr_code_poly contr_code_helmert contr_code_treatment contr_code_sum contr_code_anova

Documented in add_contrast contr_code_anova contr_code_difference contr_code_helmert contr_code_poly contr_code_sum contr_code_treatment get_contrast_vals

R Package Documentation

Browse R Packages

We want your feedback!

debruine/faux Simulation for Factorial Designs

R/contrasts.R In debruine/faux: Simulation for Factorial Designs

Defines functions get_contrast_vals add_contrast contr_code_difference contr_code_poly contr_code_helmert contr_code_treatment contr_code_sum contr_code_anova

Documented in add_contrast contr_code_anova contr_code_difference contr_code_helmert contr_code_poly contr_code_sum contr_code_treatment get_contrast_vals

R Package Documentation

Browse R Packages

We want your feedback!

debruine/faux
Simulation for Factorial Designs

R/contrasts.R
In debruine/faux: Simulation for Factorial Designs