R/bruceR-stats_3_manova.R

Defines functions EMMEANS MANOVA fix_long_data levene_test

Documented in EMMEANS MANOVA

#### Demo Data ####


# library(rio)
# between.1 = import("data-raw/between.1.sav", haven=F); names(between.1)[2]="SCORE"
# between.2 = import("data-raw/between.2.sav", haven=F)
# between.3 = import("data-raw/between.3.sav", haven=F)
# usethis::use_data(between.1, overwrite=TRUE)
# usethis::use_data(between.2, overwrite=TRUE)
# usethis::use_data(between.3, overwrite=TRUE)
# within.1 = import("data-raw/within.1.sav", haven=F)
# within.2 = import("data-raw/within.2.sav", haven=F)
# within.3 = import("data-raw/within.3.sav", haven=F)
# usethis::use_data(within.1, overwrite=TRUE)
# usethis::use_data(within.2, overwrite=TRUE)
# usethis::use_data(within.3, overwrite=TRUE)
# mixed.2_1b1w = import("data-raw/mixed.2_1b1w.sav", haven=F)
# mixed.3_1b2w = import("data-raw/mixed.3_1b2w.sav", haven=F)
# mixed.3_2b1w = import("data-raw/mixed.3_2b1w.sav", haven=F)
# usethis::use_data(mixed.2_1b1w, overwrite=TRUE)
# usethis::use_data(mixed.3_1b2w, overwrite=TRUE)
# usethis::use_data(mixed.3_2b1w, overwrite=TRUE)


#' Demo data.
#'
#' @description
#' Demo datasets of multi-factor ANOVA as examples to show how the functions
#' \code{\link{MANOVA}} and \code{\link{EMMEANS}} work.
#'
#' @format
#' \describe{
#'   \item{\strong{1. Between-Subjects Design}}{
#'     \itemize{
#'       \item \code{between.1} - A(4)
#'       \item \code{between.2} - A(2) * B(3)
#'       \item \code{between.3} - A(2) * B(2) * C(2)
#'     }
#'   }
#'   \item{\strong{2. Within-Subjects Design}}{
#'     \itemize{
#'       \item \code{within.1} - A(4)
#'       \item \code{within.2} - A(2) * B(3)
#'       \item \code{within.3} - A(2) * B(2) * C(2)
#'     }
#'   }
#'   \item{\strong{3. Mixed Design}}{
#'     \itemize{
#'       \item \code{mixed.2_1b1w} - A(2, between) * B(3, within)
#'       \item \code{mixed.3_1b2w} - A(2, between) * B(2, within) * C(2, within)
#'       \item \code{mixed.3_2b1w} - A(2, between) * B(2, within) * C(2, between)
#'     }
#'   }
#' }
#'
#' @source
#' \href{https://book.douban.com/subject/1195181/}{Multi-Factor Experimental Design in Psychology and Education}
#'
#' @name bruceR-demodata
#' @keywords internal
#' @aliases
#' between.1 between.2 between.3
#' mixed.2_1b1w mixed.3_1b2w mixed.3_2b1w
#' within.1 within.2 within.3
NULL




#### MANOVA ####


## Levene's Test for Homogeneity of Variance
levene_test = function(data, id, dvs, ivs.between) {
  ## data should be wide-format
  Print("\n\n\nLevene\u2019s Test for Homogeneity of Variance:")
  if(is.null(ivs.between)) {
    Print("No between-subjects factors. No need to do the Levene\u2019s test.")
  } else {
    data = data.table::as.data.table(data[c(id, dvs, ivs.between)])
    data = unique(data, by=id) %>% as.data.frame()
    for(iv in ivs.between)
      data[[iv]] = as.factor(data[[iv]])
    levene = data.frame()
    for(dv in dvs) {
      f = as.formula(Glue("{dv} ~ {paste(ivs.between, collapse='*')}"))
      lev = car::leveneTest(f, data, center=mean)
      lev = cbind(lev[1, "F value"],
                  lev[1, "Df"],
                  lev[2, "Df"],
                  lev[1, "Pr(>F)"]) %>% as.data.frame()
      names(lev) = c("Levene\u2019s F", "df1", "df2", "pval")
      row.names(lev) = paste("DV:", dv)
      levene = rbind(levene, lev)
    }
    print_table(levene, digits=c(3, 0, 0, 0))
  }
}


fix_long_data = function(data.long, ivs) {
  # Ensure Factorized Variables
  for(iv in ivs) {
    data.long[[iv]] = as.factor(data.long[[iv]])
    suppressWarnings({
      levels = levels(data.long[[iv]])
      levels.num = as.numeric(as.character(levels))
    })
    if(all(is.na(levels.num))==FALSE) {
      # numeric levels ==> character levels ("varnum")
      data.long[[iv]] = factor(data.long[[iv]],
                               levels=levels,
                               labels=paste0(iv, levels))
    }
  }
  return(data.long)
}


#' Multi-factor ANOVA.
#'
#' @description
#' Multi-factor ANOVA (between-subjects, within-subjects, and mixed designs),
#' with and without covariates (ANCOVA).
#'
#' This function is based on and extends \code{\link[afex:aov_car]{afex::aov_ez()}}.
#' You only need to specify the data, dependent variable(s), and factors
#' (between-subjects and/or within-subjects).
#' Almost all results you need will be displayed together,
#' including effect sizes (partial \eqn{\eta^2}) and their confidence intervals (CIs).
#' 90\% CIs for partial \eqn{\eta^2} (two-sided) are reported, following Steiger (2004).
#' In addition to partial \eqn{\eta^2}, it also reports generalized \eqn{\eta^2}, following Olejnik & Algina (2003).
#'
#' How to prepare your data and specify the arguments of \code{MANOVA}?
#' \itemize{
#'   \item \strong{Wide-format data} (one person in one row, and repeated measures in multiple columns):
#'   \describe{
#'     \item{Betweem-subjects design}{\code{MANOVA(data=, dv=, between=, ...)}}
#'     \item{Within-subjects design}{\code{MANOVA(data=, dvs=, dvs.pattern=, within=, ...)}}
#'     \item{Mixed design}{\code{MANOVA(data=, dvs=, dvs.pattern=, between=, within=, ...)}}
#'   }
#'   \item \strong{Long-format data} (one person in multiple rows, and repeated measures in one column):
#'   \describe{
#'     \item{Betweem-subjects design}{(not applicable)}
#'     \item{Within-subjects design}{\code{MANOVA(data=, subID=, dv=, within=, ...)}}
#'     \item{Mixed design}{\code{MANOVA(data=, subID=, dv=, between=, within=, ...)}}
#'   }
#' }
#'
#' @details
#' If observations are not uniquely identified in user-defined long-format data,
#' the function takes averages across those multiple observations for each case.
#' In technical details, it specifies \code{fun_aggregate=mean} in \code{\link[afex:aov_car]{afex::aov_ez()}}
#' and \code{values_fn=mean} in \code{\link[tidyr:pivot_wider]{tidyr::pivot_wider()}}.
#'
#' @param data Data frame. Both \strong{wide-format} and \strong{long-format} are supported.
#' @param subID Subject ID (the column name). Only necessary for \strong{long-format} data.
#' @param dv Dependent variable.
#' \itemize{
#'   \item For \strong{wide-format} data, \code{dv} only can be used for between-subjects designs.
#'   For within-subjects and mixed designs, please use \code{dvs} and \code{dvs.pattern}.
#'   \item For \strong{long-format} data, \code{dv} is the outcome variable.
#' }
#' @param dvs Repeated measures. Only for \strong{wide-format} data (within-subjects or mixed designs).
#'
#' Can be:
#' \itemize{
#'   \item \code{"start:stop"} to specify the range of variables
#'   (sensitive to the order of variables):
#'
#'   e.g., \code{"A1B1:A2B3"} is matched to all variables in the data
#'   between \code{"A1B1"} and \code{"A2B3"}
#'
#'   \item a character vector to directly specify variables
#'   (insensitive to the order of variables):
#'
#'   e.g., \code{c("Cond1", "Cond2", "Cond3")} or \code{cc("Cond1, Cond2, Cond3")}
#'
#'   See \code{\link{cc}} for its usage.
#' }
#' @param dvs.pattern If you use \code{dvs}, you should also specify the pattern of variable names
#' using \emph{regular expression}.
#'
#' Examples:
#' \itemize{
#'   \item \code{"Cond(.)"} extracts levels from \code{"Cond1", "Cond2", "Cond3", ...}
#'   You may rename the factor using the \code{within} argument (e.g., \code{within="Condition"})
#'   \item \code{"X(..)Y(..)"} extracts levels from \code{"X01Y01", "X02Y02", "XaaYbc", ...}
#'   \item \code{"X(.+)Y(.+)"} extracts levels from \code{"X1Y1", "XaYb", "XaY002", ...}
#' }
#'
#' Tips on regular expression:
#' \itemize{
#'   \item \code{"(.)"} extracts any single character (number, letter, and other symbols)
#'   \item \code{"(.+)"} extracts >= 1 character(s)
#'   \item \code{"(.*)"} extracts >= 0 character(s)
#'   \item \code{"([0-9])"} extracts any single number
#'   \item \code{"([a-z])"} extracts any single letter
#'   \item More information: \href{https://regexr.com/}{Link 1 (in English)} and
#'         \href{https://www.jb51.net/shouce/jquery1.82/regexp.html}{Link 2 (in Chinese)}
#' }
#'
#' @param between Between-subjects factor(s). Multiple variables should be included in a character vector \code{c()}.
#' @param within Within-subjects factor(s). Multiple variables should be included in a character vector \code{c()}.
#' @param covariate Covariates. Multiple variables should be included in a character vector \code{c()}.
#' @param ss.type Type of sums of squares (SS) for ANOVA. Defaults to \code{"III"}.
#' Possible values are \code{"II"}, \code{"III"}, \code{2}, or \code{3}.
#' @param sph.correction [Only for repeated measures with >= 3 levels]
#'
#' Sphericity correction method for adjusting the degrees of freedom (\emph{df}) when the sphericity assumption is violated. Defaults to \code{"none"}.
#' If Mauchly's test of sphericity is significant, you may set it to \code{"GG"} (Greenhouse-Geisser) or \code{"HF"} (Huynh-Feldt).
#' @param aov.include Include the \code{aov} object in the returned object?
#' Defaults to \code{FALSE}, as suggested by \code{\link[afex:aov_car]{afex::aov_ez()}}
#' (please see the \code{include_aov} argument in this help page, which provides a detailed explanation).
#' If \code{TRUE}, you should also specify \code{model.type="univariate"} in \code{\link{EMMEANS}}.
#' @param digits Number of decimal places of output. Defaults to \code{3}.
#' @param file File name of MS Word (\code{.doc}).
## @param which.observed \strong{[only effective for computing generalized \eqn{\eta^2}]}
##
## Factors that are observed or measured (e.g., gender, age group, measured covariates) but not experimentally manipulated. Defaults to \code{NULL}.
## The generalized \eqn{\eta^2} requires correct specification of the observed (vs. manipulated) variables.
## (If all the variables in \code{between} and \code{within} are set to \code{observed}, then generalized \eqn{\eta^2} will be equal to \eqn{\eta^2}.)
#'
#' @return
#' A result object (list) returned by
#' \code{\link[afex:aov_car]{afex::aov_ez()}},
#' along with several other elements:
#' \code{between}, \code{within},
#' \code{data.wide}, \code{data.long}.
#'
#' @section Interaction Plot:
#' You can save the returned object and use the \code{\link[emmeans:emmip]{emmeans::emmip()}} function
#' to create an interaction plot (based on the fitted model and a formula specification).
#' For usage, please see the help page of \code{\link[emmeans:emmip]{emmeans::emmip()}}.
#' It returns an object of class \code{ggplot}, which can be easily modified and saved using \code{ggplot2} syntax.
#'
#' @examples
#' #### Between-Subjects Design ####
#' \donttest{
#' between.1
#' MANOVA(between.1, dv="SCORE", between="A")
#'
#' between.2
#' MANOVA(between.2, dv="SCORE", between=c("A", "B"))
#'
#' between.3
#' MANOVA(between.3, dv="SCORE", between=c("A", "B", "C"))
#'
#' ## How to create an interaction plot using `emmeans::emmip()`?
#' ## See help page for its usage: ?emmeans::emmip()
#' m = MANOVA(between.2, dv="SCORE", between=c("A", "B"))
#' emmip(m, ~ A | B, CIs=TRUE)
#' emmip(m, ~ B | A, CIs=TRUE)
#' emmip(m, B ~ A, CIs=TRUE)
#' emmip(m, A ~ B, CIs=TRUE)
#'
#'
#' #### Within-Subjects Design ####
#'
#' within.1
#' MANOVA(within.1, dvs="A1:A4", dvs.pattern="A(.)",
#'        within="A")
#' ## the same:
#' MANOVA(within.1, dvs=c("A1", "A2", "A3", "A4"), dvs.pattern="A(.)",
#'        within="MyFactor")  # renamed the within-subjects factor
#'
#' within.2
#' MANOVA(within.2, dvs="A1B1:A2B3", dvs.pattern="A(.)B(.)",
#'        within=c("A", "B"))
#'
#' within.3
#' MANOVA(within.3, dvs="A1B1C1:A2B2C2", dvs.pattern="A(.)B(.)C(.)",
#'        within=c("A", "B", "C"))
#'
#'
#' #### Mixed Design ####
#'
#' mixed.2_1b1w
#' MANOVA(mixed.2_1b1w, dvs="B1:B3", dvs.pattern="B(.)",
#'        between="A", within="B")
#' MANOVA(mixed.2_1b1w, dvs="B1:B3", dvs.pattern="B(.)",
#'        between="A", within="B", sph.correction="GG")
#'
#' mixed.3_1b2w
#' MANOVA(mixed.3_1b2w, dvs="B1C1:B2C2", dvs.pattern="B(.)C(.)",
#'        between="A", within=c("B", "C"))
#'
#' mixed.3_2b1w
#' MANOVA(mixed.3_2b1w, dvs="B1:B2", dvs.pattern="B(.)",
#'        between=c("A", "C"), within="B")
#'
#'
#' #### Other Examples ####
#'
#' data.new = mixed.3_1b2w
#' names(data.new) = c("Group", "Cond_01", "Cond_02", "Cond_03", "Cond_04")
#' MANOVA(data.new,
#'        dvs="Cond_01:Cond_04",
#'        dvs.pattern="Cond_(..)",
#'        between="Group",
#'        within="Condition")  # rename the factor
#'
#' # ?afex::obk.long
#' MANOVA(afex::obk.long,
#'        subID="id",
#'        dv="value",
#'        between=c("treatment", "gender"),
#'        within=c("phase", "hour"),
#'        cov="age",
#'        sph.correction="GG")
#' }
#' @references
#' Olejnik, S., & Algina, J. (2003). Generalized eta and omega squared statistics: Measures of effect size for some common research designs.
#' \emph{Psychological Methods, 8}(4), 434--447.
#'
#' Steiger, J. H. (2004). Beyond the F test: Effect size confidence intervals and tests of close fit in the analysis of variance and contrast analysis.
#' \emph{Psychological Methods, 9}(2), 164--182.
#'
#' @seealso \code{\link{TTEST}}, \code{\link{EMMEANS}}, \code{\link{bruceR-demodata}}
#'
#' @export
MANOVA = function(
    data, subID=NULL, dv=NULL,
    dvs=NULL, dvs.pattern=NULL,
    between=NULL, within=NULL, covariate=NULL,
    ss.type="III",
    sph.correction="none",
    # which.observed=NULL,
    aov.include=FALSE,
    digits=3,
    file=NULL
) {
  ## Initialize
  data = as.data.frame(data)
  if(is.null(within)) {
    if(is.null(between)) {
      stop("Either `between` or `within` or both should be specified.\nSee: help(MANOVA)", call.=FALSE)
    } else {
      design = "Between-Subjects Design"
      format = "wide"
      if(is.null(dv))
        stop("`dv` should be specified.\nSee: help(MANOVA)", call.=FALSE)
      if(!is.null(dvs) | !is.null(dvs.pattern))
        stop("`dvs` should not be used for between-subjects designs. Please use `dv` instead.\nSee: help(MANOVA)", call.=FALSE)
    }
  } else {
    if(is.null(between)) {
      design = "Within-Subjects Design"
    } else {
      design = "Mixed Design"
    }
    if((!is.null(dv) & is.null(subID)) | (!is.null(dvs) & is.null(dvs.pattern))) {
      stop(Glue("
      Wrong usage of MANOVA().
      - For wide-format data, please specify both `dvs` and `dvs.pattern`.
      - For long-format data, please specify both `dv` and `subID`.
      See: help(MANOVA)"), call.=FALSE)
    }
    format = ifelse(!is.null(dv) & !is.null(subID),
                    "long", "wide")
  }
  if(is.null(subID)) {
    data$bruceR.ID = 1:nrow(data)
    subID = "bruceR.ID"
  }

  ## Wide and Long Data
  ## Wide: dv (between) | dvs, dvs.pattern
  ## Long: dv (within)
  if(is.null(dv)) {
    data.wide = data
    if(length(dvs)==1 & any(grepl(":", dvs))) {
      DVS = dv.vars = convert2vars(data, varrange=dvs)$vars.raw
      message("\n", Glue("Note:\ndvs=\"{dvs}\" is matched to variables:"),
              "\n", paste(DVS, collapse=", "))
    } else {
      DVS = dv.vars = dvs
    }
    dv = "bruceR.Y"  # "Y" will generate an error when dvs are like "X1Y1"
    data.long = tidyr::pivot_longer(
      data, cols=dv.vars,
      names_to=within,
      names_pattern=dvs.pattern,
      values_to=dv) %>% as.data.frame()
    data.long = fix_long_data(data.long, c(between, within))
  } else {
    dv.vars = dv
    data.long = data
    data.long = fix_long_data(data.long, c(between, within))
    if(is.null(within)) {
      data.wide = data.long
      DVS = dv
    } else {
      data.wide = tidyr::pivot_wider(
        data.long[c(subID, dv, between, within, covariate)],
        names_from=within,
        values_from=dv,
        values_fn=mean) %>% as.data.frame()
      message("
    * Data are aggregated to mean (across items/trials)
    if there are >=2 observations per subject and cell.
    You may use Linear Mixed Model to analyze the data,
    e.g., with subjects and items as level-2 clusters.\n")
      DVS = base::setdiff(names(data.wide), c(subID, between, covariate))
    }
  }

  ## Main ANOVA Function
  try({
    err = TRUE
    suppressMessages({
      aov.ez = afex::aov_ez(
        data=data.long,
        id=subID,  # "bruceR.ID"
        dv=dv,  # "bruceR.Y"
        between=between,
        within=within,
        covariate=covariate,
        type=ss.type,
        # observed=which.observed,
        anova_table=list(correction=sph.correction, es="ges"),
        fun_aggregate=mean,
        include_aov=aov.include,
        factorize=FALSE,
        print.formula=FALSE)
    })
    err = FALSE
  }, silent=TRUE)
  if(err) {
    cat("\n")
    stop("
    Failed to perform MANOVA.
    Please follow the correct usage.
    See: help(MANOVA)", call.=FALSE)
  }
  at = aov.ez$anova_table
  names(at)[1:2] = c("df1", "df2")
  at$MS = at$`F`*at$`MSE`
  eta2 = effectsize::F_to_eta2(at$`F`, at$df1, at$df2,
                               ci=0.90, alternative="two.sided")
  at$p.eta2 = cc_m_ci(eta2$Eta2_partial, eta2$CI_low, eta2$CI_high, digits) %>%
    str_replace_all("0\\.", ".")
  at$g.eta2 = str_replace_all(formatF(at$ges, digits), "0\\.", ".")
  at = at[c("MS", "MSE", "df1", "df2", "F", "Pr(>F)", "p.eta2", "g.eta2")]
  names(at)[7:8] = c("\u03b7\u00b2p [90% CI of \u03b7\u00b2p]",
                     "\u03b7\u00b2G")
  row.names(at) = row.names(aov.ez$anova_table) %>%
    str_replace_all(":", " * ")
  df.nsmall = ifelse(sph.correction=="none", 0, digits)
  at.nsmalls = c(digits, digits, df.nsmall, df.nsmall, digits, 0, 0, 0)

  ## Descriptive Statistics
  nsub = nrow(data.wide)
  ncom = complete.cases(data.long[c(between, within, covariate)])
  nmis = length(ncom)-sum(ncom)
  N.info = Glue("{nsub}{ifelse(nmis>0, Glue(' ({nmis} missing observations deleted)'), '')}")
  data.long$bruceR.dv = data.long[[dv]]
  nmsd = plyr::ddply(
    .data=aov.ez$data$long,
    .variables=plyr::as.quoted(c(between, within)),
    .fun=summarise,
    bruceR.Mean=mean(!!sym(dv), na.rm=TRUE),
    bruceR.S.D.=sd(!!sym(dv), na.rm=TRUE),
    bruceR.n=length(!!sym(dv)))
  ncol.nmsd = length(nmsd)
  names(nmsd)[1:(ncol.nmsd-3)] = "\"" %^% names(nmsd)[1:(ncol.nmsd-3)] %^% "\""
  names(nmsd)[(ncol.nmsd-2):ncol.nmsd] = c("Mean", "S.D.", "n")

  cat("\n")
  Print("<<cyan ====== ANOVA ({design}) ======>>")
  cat("\n")
  Print("Descriptives:")
  print_table(nmsd, row.names=FALSE,
              digits=c(rep(digits, ncol.nmsd-1), 0))
  Print("Total sample size: <<italic N>> = {N.info}")
  cat("\n")

  nmsd$Mean = formatF(nmsd$Mean, digits)
  nmsd$S.D. = formatF(nmsd$S.D., digits)
  names(nmsd)[(ncol.nmsd-2):ncol.nmsd] = c("<i>M</i>", "<i>SD</i>", "<i>n</i>")
  nmsd.html = paste0(
    "<p><br/><br/></p>",
    "<p><b>Descriptive Statistics:</b></p>",
    df_to_html(
      nmsd,
      align.head=c(rep("left", times=ncol.nmsd-3),
                   rep("right"), times=3),
      align.text=c(rep("left", times=ncol.nmsd-3),
                   rep("right"), times=3))$TABLE,
    "<p>Total sample size: <i>N</i> = ", N.info, "</p>"
  )

  DEP = ifelse(is.null(within), dv, paste(dv.vars, collapse=", "))
  BET = ifelse(is.null(between), "\u2013", paste(between, collapse=", "))
  WIT = ifelse(is.null(within), "\u2013", paste(within, collapse=", "))
  COV = ifelse(is.null(covariate), "\u2013", paste(covariate, collapse=", "))
  Print("
  ANOVA Table:
  Dependent variable(s):      {DEP}
  Between-subjects factor(s): {BET}
  Within-subjects factor(s):  {WIT}
  Covariate(s):               {COV}
  ")
  print_table(at, digits=at.nsmalls)
  if(sph.correction %in% c("GG", "HF")) {
    sph.text=switch(sph.correction,
                    "GG"="GG (Greenhouse-Geisser)",
                    "HF"="HF (Huynh-Feldt)")
    Print("<<green Sphericity correction method: {sph.text}>>")
  }
  Print("<<blue MSE = mean square error (the residual variance of the linear model)>>")

  ## All Other Effect-Size Measures (deprecated; please use `effectsize` package)
  # https://github.com/strengejacke/sjstats/blob/master/R/anova_stats.R#L116
  Print("<<magenta
  \u03b7\u00b2p = partial eta-squared = SS / (SS + SSE) = F * df1 / (F * df1 + df2)
  \u03c9\u00b2p = partial omega-squared = (F - 1) * df1 / (F * df1 + df2 + 1)
  \u03b7\u00b2G = generalized eta-squared (see Olejnik & Algina, 2003)
  Cohen\u2019s <<italic f>>\u00b2 = \u03b7\u00b2p / (1 - \u03b7\u00b2p)
  >>")

  ## Levene's Test for Homogeneity of Variance
  try({ levene_test(data.wide, subID, DVS, between) })

  ## Mauchly's Test of Sphericity
  if(!is.null(within)) {
    Print("\n\n\nMauchly\u2019s Test of Sphericity:")
    suppressWarnings({
      sph = summary(aov.ez$Anova)$sphericity.tests
    })
    if(length(sph)==0) {
      Print("The repeated measures have only two levels. The assumption of sphericity is always met.")
    } else {
      class(sph) = "matrix"
      sph = as.data.frame(sph)
      names(sph) = c("Mauchly's W", "pval")
      row.names(sph) = str_replace_all(row.names(sph), ":", " * ")
      print_table(sph, digits=4)
      if(min(sph[,2])<.05 & sph.correction=="none") {
        Print("<<red The sphericity assumption is violated.
              You may specify: sph.correction=\"GG\" (or =\"HF\")>>")
      }
    }
  }
  cat("\n")

  if(!is.null(file)) {
    print_table(
      at,
      digits=at.nsmalls,
      col.names=c("<i>MS</i>",
                  "<i>MSE</i>",
                  "<i>df</i><sub>1</sub>",
                  "<i>df</i><sub>2</sub>",
                  "<i>F</i>",
                  "<i>p</i>",
                  " ",
                  "\u03b7<sup>2</sup><sub>p</sub> [90% CI]",
                  "\u03b7<sup>2</sup><sub>G</sub>"),
      file=file,
      file.align.text=c("left",
                        "right", "right",
                        "right", "right",
                        "right", "right", "left",
                        "right", "right"),
      title=paste0(
        "<b>ANOVA Table:</b></p>\n<p>",
        "<pre>Dependent variable(s):&#9;", DEP, "</pre></p>\n<p>",
        "<pre>Between-subjects factor(s):&#9;", BET, "</pre></p>\n<p>",
        "<pre>Within-subjects factor(s):&#9;", WIT, "</pre></p>\n<p>",
        "<pre>Covariate(s):&#9;&#9;", COV, "</pre>"
      ),
      note=paste0(
        "<i>Note</i>. MSE = Mean Square Error.",
        ifelse(
          sph.correction %in% c("GG", "HF"),
          " Sphericity correction method: " %^% sph.text %^% ".",
          "")
      ),
      append=nmsd.html)
  }

  ## Return
  aov.ez$between = between
  aov.ez$within = within
  aov.ez$data.wide = data.wide
  aov.ez$data.long = data.long
  invisible(aov.ez)
}


#' Simple-effect analysis and post-hoc multiple comparison.
#'
#' @description
#' Perform (1) simple-effect (and simple-simple-effect) analyses,
#' including both simple main effects and simple interaction effects,
#' and (2) post-hoc multiple comparisons (e.g., pairwise, sequential, polynomial),
#' with \emph{p} values adjusted for factors with >= 3 levels.
#'
#' This function is based on and extends
#' (1) \code{\link[emmeans:joint_tests]{emmeans::joint_tests()}},
#' (2) \code{\link[emmeans:emmeans]{emmeans::emmeans()}}, and
#' (3) \code{\link[emmeans:contrast]{emmeans::contrast()}}.
#' You only need to specify the model object, to-be-tested effect(s), and moderator(s).
#' Almost all results you need will be displayed together,
#' including effect sizes (partial \eqn{\eta^2} and Cohen's \emph{d}) and their confidence intervals (CIs).
#' 90\% CIs for partial \eqn{\eta^2} and 95\% CIs for Cohen's \emph{d} are reported.
#'
#' By default, the \emph{root mean square error} (RMSE) is used to compute the pooled \emph{SD} for Cohen's \emph{d}.
#' Specifically, it uses:
#' \enumerate{
#'   \item the square root of \emph{mean square error} (MSE) for between-subjects designs;
#'   \item the square root of \emph{mean variance of all paired differences of the residuals of repeated measures} for within-subjects and mixed designs.
#' }
#'
#' \strong{\emph{Disclaimer}:}
#' There is substantial disagreement on the appropriate pooled \emph{SD} to use in computing the effect size.
#' For alternative methods, see \code{\link[emmeans:eff_size]{emmeans::eff_size()}} and \code{\link[effectsize:t_to_r]{effectsize::t_to_d()}}.
#' Users should \emph{not} take the default output as the only right results and are completely responsible for specifying \code{sd.pooled}.
#'
#' @section Interaction Plot (See Examples Below):
#' You can save the returned object and use the \code{\link[emmeans:emmip]{emmeans::emmip()}} function
#' to create an interaction plot (based on the fitted model and a formula).
#' See examples below for the usage.
#'
#' Note: \code{\link[emmeans:emmip]{emmeans::emmip()}} returns a \code{ggplot} object,
#' which can be modified and saved with \code{ggplot2} syntax.
#'
#' @section Statistical Details:
#'
#' Some may confuse the statistical terms "simple effects", "post-hoc tests", and "multiple comparisons".
#' Such a confusion is not uncommon. Here I explain what these terms actually refer to.
#' \describe{
#'   \item{\strong{1. Simple Effect}}{
#'     When we speak of "simple effect", we are referring to ...
#'     \itemize{
#'       \item simple main effect
#'       \item simple interaction effect (only for designs with 3 or more factors)
#'       \item simple simple effect (only for designs with 3 or more factors)
#'     }
#'     When the interaction effect in ANOVA is significant,
#'     we should then perform a "simple-effect analysis".
#'     In regression, we call this "simple-slope analysis".
#'     They are identical in statistical principles.
#'
#'     In a two-factors design, we only test \strong{"simple main effect"}.
#'     That is, at different levels of a factor "B", the main effects of "A" would be different.
#'     However, in a three-factors (or more) design, we may also test \strong{"simple interaction effect"} and \strong{"simple simple effect"}.
#'     That is, at different combinations of levels of factors "B" and "C", the main effects of "A" would be different.
#'
#'     To note, simple effects \emph{per se} never require \emph{p}-value adjustment, because what we test in simple-effect analyses are still "omnibus \emph{F}-tests".
#'   }
#'   \item{\strong{2. Post-Hoc Test}}{
#'     The term "post-hoc" means that the tests are performed after ANOVA. Given this, some may (wrongly) regard simple-effect analyses also as a kind of post-hoc tests.
#'     However, these two terms should be distinguished. In many situations,
#'     "post-hoc tests" only refer to \strong{"post-hoc comparisons"} using \emph{t}-tests and some \emph{p}-value adjustment techniques.
#'     We need post-hoc comparisons \strong{only when there are factors with 3 or more levels}.
#'
#'     Post-hoc tests are totally \strong{independent of} whether there is a significant interaction effect. \strong{It only deals with factors with multiple levels.}
#'     In most cases, we use pairwise comparisons to do post-hoc tests. See the next part for details.
#'   }
#'   \item{\strong{3. Multiple Comparison}}{
#'     As mentioned above, multiple comparisons are indeed post-hoc tests but have no relationship with simple-effect analyses.
#'     Post-hoc multiple comparisons are \strong{independent of} interaction effects and simple effects.
#'     Furthermore, if a simple main effect contains 3 or more levels, we also need to do multiple comparisons \emph{within} the simple-effect analysis.
#'     In this situation, we also need \emph{p}-value adjustment with methods such as Bonferroni, Tukey's HSD (honest significant difference), FDR (false discovery rate), and so forth.
#'
#'     Options for multiple comparison:
#'     \itemize{
#'       \item \code{"pairwise"} - Pairwise comparisons (default is "higher level - lower level")
#'       \item \code{"seq"} or \code{"consec"} - Consecutive (sequential) comparisons
#'       \item \code{"poly"} - Polynomial contrasts (linear, quadratic, cubic, quartic, ...)
#'       \item \code{"eff"} - Effect contrasts (vs. the grand mean)
#'     }
#'   }
#' }
#'
#' @param model The model object returned by \code{\link{MANOVA}}.
#' @param effect Effect(s) you want to test.
#' If set to a character string (e.g., \code{"A"}),
#' it reports the results of omnibus test or simple main effect.
#' If set to a character vector (e.g., \code{c("A", "B")}),
#' it also reports the results of simple interaction effect.
#' @param by Moderator variable(s). Defaults to \code{NULL}.
#' @param contrast Contrast method for multiple comparisons.
#' Defaults to \code{"pairwise"}.
#'
#' Alternatives can be \code{"pairwise"} (\code{"revpairwise"}),
#' \code{"seq"} (\code{"consec"}), \code{"poly"}, \code{"eff"}.
#' For details, see \code{?emmeans::`contrast-methods`}.
#' @param reverse The order of levels to be contrasted.
#' Defaults to \code{TRUE} (higher level vs. lower level).
#' @param p.adjust Adjustment method of \emph{p} values for multiple comparisons.
#' Defaults to \code{"bonferroni"}.
#' For polynomial contrasts, defaults to \code{"none"}.
#'
#' Alternatives can be \code{"none"}, \code{"fdr"}, \code{"hochberg"},
#' \code{"hommel"}, \code{"holm"}, \code{"tukey"}, \code{"mvt"},
#' \code{"dunnettx"}, \code{"sidak"}, \code{"scheffe"}, \code{"bonferroni"}.
#' For details, see \code{\link[stats:p.adjust]{stats::p.adjust()}} and
#' \code{\link[emmeans:summary.emmGrid]{emmeans::summary()}}.
#' @param sd.pooled By default, it uses \strong{\code{sqrt(MSE)}} (root mean square error, RMSE)
#' as the pooled \emph{SD} to compute Cohen's \emph{d}.
#' Users may specify this argument as the \emph{SD} of a reference group,
#' or use \code{\link[effectsize:sd_pooled]{effectsize::sd_pooled()}} to obtain a pooled \emph{SD}.
#' For an issue about the computation method of Cohen's \emph{d}, see \emph{Disclaimer} above.
#' @param model.type \code{"multivariate"} returns the results of pairwise comparisons identical to SPSS,
#' which uses the \code{lm} (rather than \code{aov}) object of the \code{model}
#' for \code{\link[emmeans:joint_tests]{emmeans::joint_tests()}} and \code{\link[emmeans:emmeans]{emmeans::emmeans()}}.
#'
#' \code{"univariate"} requires also specifying \code{aov.include=TRUE} in \code{\link{MANOVA}}
#' (not recommended by the \code{afex} package; for details, see \code{\link[afex:aov_car]{afex::aov_ez()}}).
#' @param digits Number of decimal places of output. Defaults to \code{3}.
#'
#' @return
#' The same model object as returned by
#' \code{\link{MANOVA}} (for recursive use),
#' along with a list of tables:
#' \code{sim} (simple effects),
#' \code{emm} (estimated marginal means),
#' \code{con} (contrasts).
#'
#' Each \code{EMMEANS(...)} appends one list to the returned object.
#'
#' @examples
#' #### Between-Subjects Design ####
#' \donttest{
#' between.1
#' MANOVA(between.1, dv="SCORE", between="A") %>%
#'   EMMEANS("A")
#' MANOVA(between.1, dv="SCORE", between="A") %>%
#'   EMMEANS("A", p.adjust="tukey")
#' MANOVA(between.1, dv="SCORE", between="A") %>%
#'   EMMEANS("A", contrast="seq")
#' MANOVA(between.1, dv="SCORE", between="A") %>%
#'   EMMEANS("A", contrast="poly")
#'
#' between.2
#' MANOVA(between.2, dv="SCORE", between=c("A", "B")) %>%
#'   EMMEANS("A", by="B") %>%
#'   EMMEANS("B", by="A")
#' ## How to create an interaction plot using `emmeans::emmip()`?
#' ## See help page: ?emmeans::emmip()
#' m = MANOVA(between.2, dv="SCORE", between=c("A", "B"))
#' emmip(m, ~ A | B, CIs=TRUE)
#' emmip(m, ~ B | A, CIs=TRUE)
#' emmip(m, B ~ A, CIs=TRUE)
#' emmip(m, A ~ B, CIs=TRUE)
#'
#' between.3
#' MANOVA(between.3, dv="SCORE", between=c("A", "B", "C")) %>%
#'   EMMEANS("A", by="B") %>%
#'   EMMEANS(c("A", "B"), by="C") %>%
#'   EMMEANS("A", by=c("B", "C"))
#' ## Just to name a few...
#' ## You may test other combinations...
#'
#'
#' #### Within-Subjects Design ####
#'
#' within.1
#' MANOVA(within.1, dvs="A1:A4", dvs.pattern="A(.)",
#'        within="A") %>%
#'   EMMEANS("A")
#'
#' within.2
#' MANOVA(within.2, dvs="A1B1:A2B3", dvs.pattern="A(.)B(.)",
#'        within=c("A", "B")) %>%
#'   EMMEANS("A", by="B") %>%
#'   EMMEANS("B", by="A")  # singular error matrix
#' # :::::::::::::::::::::::::::::::::::::::
#' # This would produce a WARNING because of
#' # the linear dependence of A2B2 and A2B3.
#' # See: Corr(within.2[c("A2B2", "A2B3")])
#'
#' within.3
#' MANOVA(within.3, dvs="A1B1C1:A2B2C2", dvs.pattern="A(.)B(.)C(.)",
#'        within=c("A", "B", "C")) %>%
#'   EMMEANS("A", by="B") %>%
#'   EMMEANS(c("A", "B"), by="C") %>%
#'   EMMEANS("A", by=c("B", "C"))
#' ## Just to name a few...
#' ## You may test other combinations...
#'
#'
#' #### Mixed Design ####
#'
#' mixed.2_1b1w
#' MANOVA(mixed.2_1b1w, dvs="B1:B3", dvs.pattern="B(.)",
#'        between="A", within="B", sph.correction="GG") %>%
#'   EMMEANS("A", by="B") %>%
#'   EMMEANS("B", by="A")
#'
#' mixed.3_1b2w
#' MANOVA(mixed.3_1b2w, dvs="B1C1:B2C2", dvs.pattern="B(.)C(.)",
#'        between="A", within=c("B", "C")) %>%
#'   EMMEANS("A", by="B") %>%
#'   EMMEANS(c("A", "B"), by="C") %>%
#'   EMMEANS("A", by=c("B", "C"))
#' ## Just to name a few...
#' ## You may test other combinations...
#'
#' mixed.3_2b1w
#' MANOVA(mixed.3_2b1w, dvs="B1:B2", dvs.pattern="B(.)",
#'        between=c("A", "C"), within="B") %>%
#'   EMMEANS("A", by="B") %>%
#'   EMMEANS("A", by="C") %>%
#'   EMMEANS(c("A", "B"), by="C") %>%
#'   EMMEANS("B", by=c("A", "C"))
#' ## Just to name a few...
#' ## You may test other combinations...
#'
#'
#' #### Other Examples ####
#'
#' air = airquality
#' air$Day.1or2 = ifelse(air$Day %% 2 == 1, 1, 2) %>%
#'   factor(levels=1:2, labels=c("odd", "even"))
#' MANOVA(air, dv="Temp", between=c("Month", "Day.1or2"),
#'        covariate=c("Solar.R", "Wind")) %>%
#'   EMMEANS("Month", contrast="seq") %>%
#'   EMMEANS("Month", by="Day.1or2", contrast="poly")
#' }
#' @seealso \code{\link{TTEST}}, \code{\link{MANOVA}}, \code{\link{bruceR-demodata}}
#'
#' @export
EMMEANS = function(
    model, effect=NULL, by=NULL,
    contrast="pairwise",
    reverse=TRUE,
    p.adjust="bonferroni",
    sd.pooled=NULL,
    model.type="multivariate",
    digits=3
) {
  # IMPORTANT: If include 'aov', the 'emmeans' results of
  # within-subjects design will not be equal to those in SPSS!
  # So we do not include 'aov' object but instead use 'lm' and 'mlm'
  # objects to do the follow-up 'emmeans' analyses!

  # Bug: For within-sub 'effect' and between-sub 'by' in mixed design
  # if(!is.null(effect) & !is.null(model$within) & effect %anyin% model$within &
  #    !is.null(by) & !is.null(model$between) & by %anyin% model$between) {
  #   model=model.raw
  # }

  if(is.null(model))
    stop("`model` is invalid. Run MANOVA() without EMMEANS() to check.", call.=FALSE)

  if(!inherits(model, "afex_aov"))
    stop("EMMEANS() should be used with MANOVA()!\nSee: help(MANOVA)", call.=FALSE)

  effect.text = paste(effect, collapse='\" & \"')
  Print("<<cyan ------ EMMEANS (effect = \"{effect.text}\") ------>>")
  cat("\n")

  ## Simple Effect (omnibus)
  # see 'weights' in ?emmeans
  suppressMessages({
    sim = emmeans::joint_tests(
      object=model, by=by,
      weights="equal",
      model=model.type)
  })
  if(is.null(sim))
    stop("`model` or `by` is invalid. Please check your code.\nSee: help(MANOVA)", call.=FALSE)
  sim$note = NULL
  names(sim)[1] = "Effect"
  sim$Effect = str_replace_all(sim$Effect, ":", " * ")
  eta2 = effectsize::F_to_eta2(sim$F.ratio, sim$df1, sim$df2,
                               ci=0.90, alternative="two.sided")
  sim$p.eta2 = cc_m_ci(eta2$Eta2_partial, eta2$CI_low, eta2$CI_high, digits) %>%
    str_replace_all("0\\.", ".")
  if(length(by)>0) {
    vns = names(sim)[2:(length(by)+1)]
    names(sim)[2:(length(by)+1)] = "\"" %^% vns %^% "\""
  }
  names(sim)[(length(by)+4):(length(by)+6)] =
    c("F", "pval", "\u03b7\u00b2p [90% CI of \u03b7\u00b2p]")

  Print("Joint Tests of \"{effect.text}\":")
  print_table(sim, digits=c(rep(0, length(by)+3),
                            digits, 0, 0),
              row.names=FALSE)
  Print("
  <<italic Note>>. Simple effects of <<italic repeated measures>> with 3 or more levels
  are <<italic different>> from the results obtained with SPSS MANOVA syntax.
  ")
  cat("\n")

  ## SPSS GLM EMMEANS Univariate/Multivariate Tests
  try({
    phtest = phia::testInteractions(
      model=model$lm,
      across=effect,
      fixed=by,
      idata=model$Anova$idata,
      adjustment="none")
    if(grepl("Multivariate", attr(phtest, "heading"))) {
      pht = as.data.frame(phtest)[c(
        "test stat",
        "num Df",
        "den Df",
        "approx F",
        "Pr(>F)")]
      names(pht) = c(
        "Pillai\u2019s trace",
        "Hypoth. df",
        "Error df",
        "Exact F",
        "pval")
      row.names(pht) = str_replace_all(row.names(pht), " : ", " & ") %^%
        ": " %^% Glue("\"{effect.text}\"")
      print_table(pht, digits=digits,
                  title=Glue("Multivariate Tests of \"{effect.text}\":"),
                  note=Glue("<<italic Note>>. Identical to the results obtained with SPSS GLM EMMEANS syntax."))
      cat("\n")
    } else {
      pht = as.data.frame(phtest)[c(
        "Sum of Sq", "Df", "F", "Pr(>F)")]
      pht$MS = pht[,1] / pht[,2]
      pht = pht[, c(1, 2, 5, 3, 4)]
      names(pht) = c("Sum of Squares", "df", "Mean Square", "F", "pval")
      row.names(pht)[1:(nrow(pht)-1)] = str_replace_all(row.names(pht)[1:(nrow(pht)-1)], " : ", " & ") %^%
        ": " %^% Glue("\"{effect.text}\"")
      print_table(pht, digits=c(digits, 0, digits, digits, 0),
                  title=Glue("Univariate Tests of \"{effect.text}\":"),
                  note=Glue("<<italic Note>>. Identical to the results obtained with SPSS GLM EMMEANS syntax."))
      cat("\n")
    }
  }, silent=TRUE)

  ## Estimated Marginal Means (emmeans)
  suppressMessages({
    emm0 = emm = emmeans::emmeans(
      object=model, specs=effect, by=by,
      weights="equal",
      model=model.type)
  })
  emm = summary(emm)  # to a data.frame (class 'summary_emm')
  emm$MeanCI = cc_m_ci(emm$emmean, emm$lower.CL, emm$upper.CL, digits)
  vns = names(emm)[1:(length(by)+1)]
  names(emm)[1:(length(by)+1)] = "\"" %^% vns %^% "\""
  emm = cbind(emm[1:(length(by)+1)], emm[c("MeanCI", "SE")])
  names(emm)[length(emm)-1] = "Mean [95% CI of Mean]"

  Print("Estimated Marginal Means of \"{effect.text}\":")
  print_table(emm, digits=digits, row.names=FALSE)
  cat(paste(attr(emm, "mesg"), collapse="\n"))
  cat("\n")

  ## Multiple Comparison (pairwise or other methods)
  # see: ?contrast, ?pairs.emmGrid, ?pairwise.emmc
  contr.method = switch(
    contrast,
    pairwise=,
    revpairwise="Pairwise Comparisons",
    consec=,
    seq="Consecutive (Sequential) Comparisons",
    poly="Polynomial Contrasts",
    eff="Effect Contrasts (vs. Grand Mean)",
    "Multiple Comparisons")
  if(contrast=="pairwise" & reverse==TRUE) contrast = "revpairwise"
  if(contrast=="seq") contrast = "consec"
  if(contrast=="consec") reverse = FALSE
  if(contrast=="poly") p.adjust = "none"
  con0 = con = emmeans::contrast(
    object=emm0, method=contrast,
    adjust=p.adjust, reverse=reverse)
  # pairs(emm, simple="each", reverse=TRUE, combine=TRUE)
  conCI = confint(con)
  con = summary(con)  # to a data.frame (class 'summary_emm')

  ## Cohen's d
  all_paired_diffs = function(v) {
    combns = utils::combn(v, 2)
    combns[1,] - combns[2,]
  }
  if(is.null(sd.pooled)) {
    sigma = stats::sigma(model$lm)
    if(length(sigma)==1) {
      sd.pooled = sigma  # = sqrt(MSE), i.e., RMSE
    } else {
      res = residuals(model$lm)  # matrix
      D = apply(res, 1, all_paired_diffs)
      if(is.matrix(D))
        sd.pooled = sqrt(mean(apply(D, 1, var)))  # RMSE
      else
        sd.pooled = sd(D)
    }
  }
  # rn = row.names(model$anova_table)
  # term = c()
  # for(i in rn) if(i %in% effect) term = c(term, i)
  # term = paste(term, collapse=":")
  # if(is.null(sd.pooled))
  #   sd.pooled = sqrt(model$anova_table[term, "MSE"])
  if(contrast!="poly")
    attr(con, "mesg") = c(
      Glue("Pooled SD for computing Cohen\u2019s d: {formatF(sd.pooled, digits)}"),
      attr(con, "mesg"))
  con$cohen.d.ci = cc_m_ci(con$estimate/sd.pooled,
                           conCI$lower.CL/sd.pooled,
                           conCI$upper.CL/sd.pooled,
                           digits)
  if(length(by)>0) {
    vns = names(con)[2:(length(con)-6)]
    names(con)[2:(length(con)-6)] = "\"" %^% vns %^% "\""
  }
  names(con)[c(1, (length(con)-5):length(con))] =
    c("Contrast", "Estimate", "SE", "df", "t", "pval",
      "Cohen\u2019s d [95% CI of d]")
  p.mesg.index = grepl("^P value adjustment", attr(con, "mesg"))
  if(any(p.mesg.index)) {
    p.mesg = attr(con, "mesg")[which(p.mesg.index)]
    method.mesg = str_extract(p.mesg, "(?<=: ).+(?= method)")
    if(method.mesg %in% c("fdr", "mvt"))
      method.mesg.new = toupper(method.mesg)
    else
      method.mesg.new = capitalize(method.mesg)
    p.mesg.new = paste0("P-value adjustment: ", method.mesg.new, strsplit(p.mesg, method.mesg)[[1]][2], ".")
    attr(con, "mesg")[which(p.mesg.index)] = p.mesg.new
  } else if(p.adjust!="none") {
    attr(con, "mesg") = c(attr(con, "mesg"),
                          "No need to adjust p values.")
  }
  if(contrast=="poly") con[c("Cohen\u2019s d [95% CI of d]")] = NULL

  Print("{contr.method} of \"{effect.text}\":")
  con$df = formatF(con$df, 0)
  print_table(con, digits=digits, row.names=FALSE)
  cat(paste(attr(con, "mesg"), collapse="\n"))
  cat("\n\n")
  Print("<<green Disclaimer:
  By default, pooled SD is <<italic Root Mean Square Error>> (RMSE).
  There is much disagreement on how to compute Cohen\u2019s d.
  You are completely responsible for setting `sd.pooled`.
  You might also use `effectsize::t_to_d()` to compute d.
  >>")
  cat("\n")
  # if(con0@misc[["famSize"]] > 2 & p.adjust != "none")
  #   cat("\n")
  # if(any(grepl("averaged|Pooled SD", attr(con, "mesg"))) & any(p.mesg.index)==FALSE)
  #   cat("\n")

  ## Return (return the raw model for recycling across '%>%' pipelines)
  model$EMMEANS = c(model$EMMEANS, list(list(sim=sim, emm=emm, con=con)))
  invisible(model)
}
psychbruce/bruceR documentation built on Oct. 2, 2023, 10:26 p.m.