R/anova.R

Defines functions sigPhases pvalueConsec peakAnova tanova compZTanova compPvalueTanova compTanovaEffect extractInteraction arrayTtestAnova arrayAnova preAnova compF anovaRandomIndices compSumSq preSumSq forceOrthogonal modelMeans.arrayAnova modelMeans.default modelMeans marginalMeans permPvalues anovaTfce

Documented in anovaRandomIndices anovaTfce arrayAnova arrayTtestAnova compF compPvalueTanova compSumSq compTanovaEffect compZTanova extractInteraction forceOrthogonal marginalMeans modelMeans modelMeans.arrayAnova modelMeans.default peakAnova permPvalues preAnova preSumSq pvalueConsec sigPhases tanova

#
# <<< ANOVA on arrays >>> ----
#

#' TFCE-correction of the test statistic
#'
#' \code{anovaTfce} is called internally by \code{arrayAnova} and
#' \code{arrayTtest} and performs TFCE-correction of the given test statistic
#' @param x numeric matrix or array of test statistic
#' (chan_time X other_dimensions or chan X time X other_dimenions)
#' @param target_dim the dimension(s) which hold chan and time values (usually
#' 1L if x is a matrix and 1:2L if x is an array)
#' @param has_neg logical scalar whether x might have negative values (TRUE for
#' a t-statistic and FALSE for an F-statistic)
#' @param nr_chan number of channels
#' @param nr_time number of time points
#' @param tfce a list of TFCE parameters
#' @keywords internal
anovaTfce <- function(x, target_dim, has_neg, nr_chan, nr_time, tfce) {
  fnDims(x, target_dim,
         function(y) tfceFn(
           matrix_(y, nr_chan, nr_time, arg_check = FALSE),
           chn = tfce$ChN, eh = tfce$EH,
           nr_steps = tfce$steps, has_neg = has_neg),
         arg_check = FALSE, parallel = FALSE)
}

#' Permutation of the test statistic
#'
#' \code{permPvalues} is called internally by \code{arrayAnova} and
#' \code{arrayTtest} and performs permutation analysis.
#' @param obj a list returned by \code{preAnova}
#' @param nperm an integer value indicating the number of permutations
#' @param seed an integer value which specifies a seed (default: NULL), or a
#' list of arguments passed to \code{\link{set.seed}}
#' @param stat_obs a numeric array of the observed test statistic
#' @param parallel a list as returned by \code{\link{parallelParams}}
#' @param tfce NULL (default) or a list of the TFCE parameters
#' @return \code{\link{permPvalues}} returns a logical array whose elements
#' indicate whether the absolute observed test statistic in the given chanXtime
#' cell is smaller than the absolute randomized test statistic
#' @keywords internal
permPvalues <- function(obj, nperm, seed, stat_obs, parallel, tfce = NULL) {
  # helper function
  permfn <- function(obj, perm_vec, tfce,
                     stat_fun, abs_stat_obs, has_neg) {
    stat_perm <- stat_fun(obj, new = as.vector(perm_vec))
    setattr(stat_perm, "dim",
            c(obj$nr_chanXtime, obj$otherdims$size))
    if (!is.null(tfce))
      stat_perm <- anovaTfce(stat_perm, 1L, has_neg,
                             obj$nr_chan, obj$nr_time, tfce)
    if (has_neg) stat_perm <- abs(stat_perm)
    stat_perm <- colMaxs(stat_perm)
    # return
    if (length(stat_perm) > 1L) {
      sweep(abs_stat_obs, obj$otherdims$index, stat_perm, "<")
    } else {
      abs_stat_obs < stat_perm
    }
  }
  # TFCE yes / no
  use_tfce <- if (is.null(tfce)) FALSE else TRUE
  # type: ANOVA / t-test
  if (grepl("between|within|mixed", obj$type)) {
    rand_sample <- anovaRandomIndices(obj, nperm, seed)
    stat_fun <- compF
    has_neg <- FALSE
  } else if (grepl("independent", obj$type)) {
    rand_sample <- isTtestRandomGroups(obj, nperm, seed)
    stat_fun <- isTtest
    has_neg <- TRUE
  } else if (grepl("one|paired", obj$type)) {
    rand_sample <- osTtestRandomSigns(obj, nperm, seed)
    stat_fun <- osTtest
    has_neg <- TRUE
  } else {
    stop("Wrong 'type' node in 'obj'")
  }
  # take the absolute values of the observed statistic
  if (has_neg) stat_obs <- abs(stat_obs)
  # create larger chunks
  chunks <- min(10L, ceiling(nperm/max(1L, getDoParWorkers())))
  # avoid code-style warning
  xi <- NULL; rm(xi)
  # compute sig
  export <- 
    if (!is.null(parallel$cl)) {
      c("iter", "foreach", "%do%", "colMaxs", "anovaTfce")
    } else {
      NULL
    }
  sig <-
    foreach(x = iter(rand_sample, by = "col", chunksize = chunks),
            .combine = "+", .inorder = FALSE,
            .options.snow = parallel$snow_options,
            .options.multicore = parallel$mc_options,
            .export = export) %dopar%
            {
              foreach(xi = iter(x, by = "col", chunksize = 1L),
                      .combine = "+", .inorder = FALSE) %do%
                permfn(obj, xi, tfce, stat_fun, stat_obs, has_neg)
            }
  sig <- (sig + 1) / (nperm + 1)
  # set label
  attr_label <-
    if (use_tfce) {
      "Permuted P-value (with TFCE correction)"
    } else {
      "Permuted P-value (with max(Channel X Time) correction)"
    }
  setattr(sig, "label", attr_label)
}

#' Compute marginal means in an ANOVA design
#'
#' \code{marginalMeans} computes marginal means in ANOVA designs
#' @param form formula of the model
#' @param f_dat data.frame of factors
#' @param a_dat matrix which contains the dependent variables. It must have as
#' many rows as f_dat. Usually a_dat is the result of calling
#' \code{\link{array2mat}} on the orginal data array.
#' @param dimn the original dimension names for the array corresponding to the
#' column names of \code{a_dat}
#' @param keep_term_order logival variable whether to keep the order of the
#' model terms as provided in form (TRUE) or all interaction terms should follow
#' all main effect terms (FALSE, default)
#' @param residualmean logical variable; if TRUE, each factor term is extracted
#' from the data after computing the given marginal mean (default: FALSE)
#' @param whichterm character, numeric or logical vector; indices of model terms
#' which should be analyzed (default: NULL, meaning all terms are included)
#' @param no_icpt logical value; if TRUE, global mean is subtracted before
#' computing the marginal means (default: FALSE)
#' @export
#' @keywords internal
#' @return A named list containing the marginal means for each model term
# TODO: examples, argument checking, handle numeric variable
marginalMeans <- function(form, f_dat, a_dat, dimn, keep_term_order = FALSE,
                          residualmean = FALSE, whichterm = NULL,
                          no_icpt = FALSE) {
  labels <- attr(terms(as.formula(form), keep.order = keep_term_order),
                 "term.labels")
  termL <- if (is.null(whichterm) || is.na(whichterm)) {
    strsplit(labels, ":")
  } else if (is.character(whichterm)) {
    strsplit(whichterm[whichterm %in% labels], ":")
  } else {
    strsplit(labels[whichterm], ":")
  }
  marg_means <- vector("list", length(termL))
  if (residualmean && !no_icpt) a_dat <- sweep(a_dat, 2, colMeans(a_dat), "-")
  for (i in 1:length(termL)) {
    groups <- factor(interaction(f_dat[,termL[[i]]], drop = TRUE))
    groupfreq <- tabulate(groups)
    names(groupfreq) <- levels(groups)
    marg_means[[i]] <- array_(rowsum(a_dat, groups)/groupfreq,
                              c(length(groupfreq), vapply(dimn, length, 0L)),
                              c(list(modelterm = names(groupfreq)), dimn))
    setattr(marg_means[[i]], "freq", groupfreq)
    if (residualmean) {
      a_dat <- a_dat -
        as.vector(subsetArray(marg_means[[i]],
                              list(modelterm = as.numeric(groups))))
    }
  }
  names(marg_means) <- labels
  # return
  marg_means
}

#' Compute adjusted or unadjusted cell means in ANOVA designs
#'
#' \code{modelMeans} is a generic function for computing adjusted or unadjusted
#' cell means in ANOVA designs.
#' @param model an object returned by \code{\link{arrayAnova}} or
#' \code{\link{tanova}}
#' @param .arraydat a numeric array with named dimnames containing the EEG (or
#' other) data. Missing values are not allowed.
#' @param factordef a named list of factor definitions, containing the following
#' elements:
#' \itemize{
#' \item{between: }{character vector of between-subject factors (default: NULL)}
#' \item{within: }{character vector of within-subject factors (default: NULL)}
#' \item{w_id: }{name of the dimension which identifies the subjects
#' (default: "id")}
#' }
#' @param bwdat a data.frame which contains the identification codes
#' (factordef$w_id) and all subject-level variables (usually factors) listed in
#' 'factordef$between'. Missing values are not allowed.
#' @param term a character vector of model terms; a separate array of cell means
#' is returned for each term. The default is NULL, in which case all terms
#' (that is, the means corresponding to all main effects and interactions) are
#' returned. For custom terms, note that interaction terms must be given as
#' "factorA*factorB" and not as "factorA:factorB".
#' @param adjusted a logical value if the cell means should be adjusted for
#' unbalanced data (default: FALSE). See Details.
#' @param ... not used yet
#' @details If 'adjusted' is set to FALSE (the default), \code{modelMeans} fits
#' separate models for each model term and computes the model-based predictions
#' for a reference grid which contains the mean and the 1SD value for continuous
#' covariates, and all levels of the factor variables in the design. If
#' 'adjusted' is set to TRUE, the means are simply the marginal means of the
#' predicted means of the highest-order interaction term. This is equivalent to
#' the so-called LS means (least-squares means) in the SAS terminology.
#' This distinction is only relevant if the design is unbalanced, that is, the
#' sample sizes in a between-subject design are unequal (note that the input
#' array may not contain missing values, therefore the within subject part of
#' the design is always balanced). For such datasets the lower order
#' interaction or main effect means can substantially differ from the averages
#' of the corresponding higher-order means if 'adjusted' is FALSE.
#' @export
#' @return The function returns a named list of arrays; the names are the model
#' terms.
#' @examples
#' # example data
#' data(erps)
#' dat_id <- attr(erps, "id") # to get group memberships
#'
#' # make the dataset unbalanced to illustrate the difference between adjusted
#' # and unadjusted means
#' erps <- subsetArray(erps, list(id = 2:20))
#' dat_id <- dat_id[2:20, ]
#'
#' # compute the means for the group*stimclass*pairtype interaction and all
#' # other lower-order terms
#' fdef <- list(between = "group",
#'              within = c("stimclass", "pairtype"))
#' means <- modelMeans(erps, fdef, bwdat = dat_id)
#'
#' # the same for the least-squares means
#' means_adj <- modelMeans(erps, fdef, bwdat = dat_id, adjusted = TRUE)
#'
#' # this is an unbalanced dataset, so the means for the highest order
#' # interaction are identical, but the marginal means are not
#' stopifnot(identical(
#'     TRUE,
#'     all.equal(means$`group*stimclass*pairtype`,
#'               means_adj$`group*stimclass*pairtype`))
#' )
#' stopifnot(!identical(
#'     TRUE,
#'     all.equal(means$`stimclass*pairtype`,
#'               means_adj$`stimclass*pairtype`))
#' )
#'
#' # simple means for the Fz channel at time 200, for identical pairs in the
#' # "A" stimulus class, separately for the two reading groups
#' simple_means <- sapply(c("control", "dl"), function(g)
#'     mean(subsetArray(erps,
#'                      list(chan = "Fz", time = "200",
#'                           stimclass = "A", pairtype = "ident",
#'                           id = dat_id$group == g)))
#' )
#' stopifnot(identical(
#'     TRUE,
#'     all.equal(means$`group*stimclass*pairtype`["Fz", "200", , "A", "ident"],
#'               simple_means))
#' )
#'
#' # the same ignoring the groups
#' simple_mean_nogroup <- mean(subsetArray(erps,
#'                                         list(chan = "Fz", time = "200",
#'                                         stimclass = "A", pairtype = "ident")))
#' stopifnot(identical(
#'     TRUE,
#'     all.equal(means$`stimclass*pairtype`["Fz", "200", "A", "ident"],
#'               simple_mean_nogroup))
#' )
#' stopifnot(identical(
#'     TRUE,
#'     all.equal(means_adj$`stimclass*pairtype`["Fz", "200", "A", "ident"],
#'               mean(simple_means)))
#' )
#'
modelMeans <- function(...) UseMethod("modelMeans")

#' @export
#' @describeIn modelMeans Default method
modelMeans.default <- function(.arraydat, factordef,
                               bwdat = NULL, term = NULL,
                               adjusted = FALSE, ...) {
  # workhorse function
  compute <- function(tm, dat, y, factordef, ydims,
                      tm0 = NULL, model0 = NULL) {
    vars <- strsplit(tm, "\\*")[[1L]]
    if (is.null(tm0)) {
      form <- as.formula(paste("~", tm, sep = ""))
      mod <- preSumSq(form, dat, y = y, between = character(),
                      adjusted = FALSE)
      coeff <- mod$coeff
      pred_levels <- lapply(dat[vars], function(x) {
        if (is.numeric(x)) c(0, 1) else levels(x)
      })
      pred_grid <- expand.grid(pred_levels, KEEP.OUT.ATTRS = FALSE)
      pred_mm <- model.matrix(form, pred_grid)
      pred <- pred_mm %*% coeff[colnames(pred_mm), , drop = FALSE]
      setattr(pred, "dim", c(vapply(pred_levels, length, 0L), ydims$dim))
      setattr(pred, "dimnames", c(pred_levels, ydims$dimn))
      pred <- aperm(pred, c(ydims$orig_dimid, vars))
    } else {
      vars0 <- strsplit(tm0, "\\*")[[1L]]
      pred <-
        if (identical(vars, vars0)) {
          model0
        } else {
          avgDims(model0, setdiff(vars0, vars))
        }
    }
    # return
    pred
  }
  # set contrasts
  opcons <- options("contrasts")
  options(contrasts = c("contr.helmert", "contr.poly"))
  on.exit(options(opcons))
  #
  input <- preAnova(
    .arraydat = .arraydat, factordef = factordef, bwdat = bwdat,
    verbose = FALSE, tfce = FALSE, perm = permParams(n = 0L))
  dat <- input$dat
  y <- input$.arraydat
  if (is.null(term)) {
    term <- gsub(":", "*", input$full_dimnames$modelterm)
  }
  #
  origpos <- attr(input$teststat_dimid, "origpos")
  names(origpos) <- input$teststat_dimid
  ydimid <- setdiff(input$teststat_dimid, "modelterm")
  orig_ydimid <- ydimid[order(origpos[ydimid])]
  ydimn <- input$full_dimnames[ydimid]
  ydim <- vapply(ydimn, length, 0L)
  y_dims <- list(dim = ydim, dimn = ydimn, orig_dimid = orig_ydimid)
  #
  out <-
    if (!adjusted) {
      lapply(term, compute, dat = dat, y = y, factordef = factordef,
             ydims = y_dims)
    } else {
      term0 <- input$full_dimnames$modelterm
      term0 <- term0[length(term0)]
      term0 <- gsub(":", "*", term0)
      model0 <- compute(term0, dat = dat, y = y, factordef = factordef,
                        ydims = y_dims)
      lapply(term, compute, dat = dat, y = y, factordef = factordef,
             ydims = y_dims, tm0 = term0, model0 = model0)
    }
  # return
  setattr(out, "names", term)
  out
}

#' @export
#' @describeIn modelMeans Method for \code{arrayAnova} objects
modelMeans.arrayAnova <- function(model = NULL, term = NULL, adjusted = FALSE,
                                  ...) {
  mcall <- model$call
  mcall[[1]] <- as.name("modelMeans.default")
  mcall$term <- term
  mcall$adjusted <- adjusted
  eval(mcall)
}

#' @export
#' @describeIn modelMeans Method for \code{tanova} objects
modelMeans.tanova <- modelMeans.arrayAnova


#' Force orthogonality in a model matrix
#'
#' \code{forceOrthogonal} checks if a model matrix is orthogonal and makes
#' adjustments if it is not.
#' @param model_matrix the model matrix
#' @param between the names of between-subject factors
#' @keywords internal
forceOrthogonal <- function(model_matrix, between = character()) {
  if (ncol(model_matrix) > 2L) {
    orth <- cor(model_matrix[, -1L])
    orth <- orth[lower.tri(orth)]
    if (max(abs(orth)) > .Machine$double.eps) {
      if (length(between) > 1L)
        warning(paste0(
          "The design is unbalanced, and this function calculates ",
          "sequential (type-I) tests. Consider re-running the test ",
          "with reordered between-subject factors (see ?arrayAnova)."
        ))
      model_matrix[, 2L] <- model_matrix[, 2L] - mean(model_matrix[, 2L])
      for (i in 3:ncol(model_matrix)) {
        mmi <- model_matrix[,1:(i-1L)]
        coeff <- solve(crossprod(mmi),
                       crossprod(mmi, model_matrix[, i, drop=FALSE]))
        model_matrix[,i] <- model_matrix[,i]- mmi %*% coeff
      }
    }
  }
  # return
  model_matrix
}

#' Prepare sum-of-squares computations
#'
#' \code{preSumSq} prepares the objects which are needed by
#' \code{\link{compSumSq}}. Thus, it returns the model.matrix (X), the X'X
#' matrix, and all unique rows of Xs, and also the residuals if residuals are
#' permuted instead of raw observations.
#' @param form model formula (or character string to \code{as.formula})
#' @param form_data a data.frame containing all variables of the formula
#' @param keep_term_order a logical value indicating whether the terms should
#' keep their positions. If FALSE the terms are reordered so that main effects
#' come first, followed by the interactions, all second-order, all third-order
#' and so on. Effects of a given order are kept in the order specified.
#' @param scaled a logical value indicating if numeric variables in
#' 'form_data' are already scaled (TRUE) or not (FALSE, the default).
#' @param y a numeric matrix of response data if residuals should be computed
#' (usually coming from \code{preAnova})
#' @param between the names of between-subject factors. Only used for warning
#' message in case of unbalanced data.
#' @param adjusted adjustment for unbalanced data (default: TRUE)
#' @return A list with the following components: model_matrix,
#' model_matrix_labels, xpx, unique_contrasts, residuals (optional).
#' @keywords internal
preSumSq <- function(form, form_data,
                     keep_term_order = FALSE,
                     scaled = FALSE, y = NULL, between = NULL,
                     adjusted = TRUE) {
  term_object <- terms(as.formula(form), keep.order = keep_term_order)
  term_labels <- attr(term_object, "term.labels")
  term_factors <- attr(term_object, "factors")
  vars <- rownames(term_factors)
  if (!scaled) {
    scfn <- function(x) {
      if (is.numeric(x) && is.null(attr(x, "scaled:scale"))) {
        scale(x)
      } else {
        x
      }
    }
    form_data[vars] <- lapply(form_data[vars], scfn)
  }
  mm <- model.matrix(term_object, data = form_data)
  # check orthogonality
  if (adjusted) mm <- forceOrthogonal(mm, between)
  #
  xpx <- crossprod(mm)
  mm_labels <- c("(Intercept)", term_labels[attr(mm, "assign")])
  unique_contrasts <-
    lapply(term_labels, function(term) {
      ind <- which(mm_labels == term)
      fastUnique(mm[, ind, drop = FALSE],
                 freq = TRUE)
    })
  setattr(unique_contrasts, "names", term_labels)
  mm_labels <- colnames(mm)
  setattr(mm, "dimnames", NULL)
  setattr(mm, "assign", NULL)
  setattr(mm, "contrasts", NULL)
  #
  out <- list(
    model_matrix = mm, model_matrix_labels = mm_labels, xpx = xpx,
    unique_contrasts = unique_contrasts)
  #
  if (!is.null(y)) {
    out$coeff <- solve(xpx, crossprod(mm, y))
    pred <- mm %*% out$coeff
    out$residuals <- y - pred
  }
  # return
  out
}

#' Compute sum of squares
#'
#' \code{compSumSq} computes sum of squares which are needed by
#' \code{\link{compF}} and \code{\link{permPvalues}}.
#' @param obj a list object as returned by \code{\link{preAnova}}
#' @param new_indices if not NULL (default), it must be a vector of numeric
#' indices to be passed to the model matrix. This parameter is used for
#' permutation-based analyses.
#' @return a numeric matrix of the sum-of-squares with an extra attribute for
#' the degrees of freedom ('Df'), or a list of such matrices
#' @keywords internal
compSumSq <- function(obj, new_indices = NULL) {
  ssqFn <- function(x, y, r, new_indices) {
    mm <- x$model_matrix
    if (!is.null(new_indices)) {
      if (!is.null(r)) {
        y <- r[new_indices, ]
      } else {
        mm <- mm[new_indices, ]
      }
    }
    mm_labels <- x$model_matrix_labels
    xpx <- x$xpx
    unc <- x$unique_contrasts
    ssq <- matrix_(0, ncol(y), length(unc),
                   dimnames = list(NULL, modelterm = names(unc)))
    df <- rep.int(0L, length(unc))
    setattr(df, "names", names(unc))
    coeff <- solve(xpx, crossprod(mm, y))
    setattr(coeff, "dimnames", list(mm_labels, NULL))
    for (i in seq_along(unc)) {
      freq <- attr(unc[[i]], "freq")
      fmeans <- unc[[i]] %*% coeff[colnames(unc[[i]]), , drop = FALSE]
      ssq[, i] <- colSums(fmeans^2 * freq)
      df[i] <- ncol(unc[[i]])
    }
    setattr(ssq, "Df", df)
    #
    ssq
  }
  #
  input <- obj$pre_sumsq
  y <- obj$.arraydat
  residuals <- input[[1]]$residuals
  # return
  lapply(input, ssqFn, y = y, r = residuals, new_indices = new_indices)
}

#' Random permutations for \code{\link{arrayAnova}} and \code{\link{tanova}}
#'
#' \code{anovaRandomIndices} creates a matrix of \code{nperm} random
#' permutations
#' @param input a list returned by \code{\link{preAnova}}
#' @param nperm integer value; the number of permutations
#' @param seed an integer value which specifies a seed (default: NULL), or a
#' list of arguments passed to \code{\link{set.seed}}
#' @return A matrix with as many rows as the design matrix and 'nperm' columns
#' @seealso \code{\link{arrayAnova}} and \code{\link{tanova}}
#' @keywords internal
anovaRandomIndices <- function(input, nperm, seed) {
  setSeed(seed)
  out <- shuffleSet(
    nrow(input$dat), nperm,
    how(within = Within(type = "free"),
        plots = Plots(strata = input$dat[[input$factordef$w_id]],
                      type = "free")))
  # return
  t(out)
}

#' Compute F-values
#'
#' \code{compF} computes F-values. This is an internal function called by
#' the high-level function \code{\link{arrayAnova}}.
#' @param obj a list object as returned by \code{\link{preAnova}}
#' @param new_indices if not NULL (default), it must be a vector of numeric
#' indices to be passed to the model matrix. This parameter is used for
#' permutation-based analyses.
#' @param attribs a logical value if dimension and verbose attributes should
#' be attached to the resulting array (default: FALSE)
#' @param verbose a logical value if p-values, effect sizes, and degrees of
#' freedom should be added to the returned array (as 'p_value', 'effect_size',
#' and 'Df' attributes)
#' @return This function returns a numeric matrix of F-values if 'attribs' is
#' FALSE, and an array of F-values if 'attribs' is TRUE. If the verbose variant
#' was requested, the array has the following extra attributes: 'p_value' and
#' 'effect_size', having the same dimension as the array of F-values, and
#' an integer matrix named 'Df' with term- and residual degrees of freedom for
#' each model term.
#' @keywords internal
compF <- function(obj, new_indices = NULL, attribs = FALSE, verbose = FALSE) {
  if (!attribs) verbose <- FALSE
  type <- obj$type
  f_def <- obj$factordef
  ssq <- compSumSq(obj, new_indices = new_indices)
  Df <- lapply(ssq, attr, "Df")
  if (type == "between") {
    model_ssq <- ssq[[1]]
    model_df <- Df[[1]]
    resid_ssq <- obj$pre_sumsq[[1]]$ssq_total - rowSums(model_ssq)
    resid_df <- attr(resid_ssq, "Df") - sum(model_df)
    Fvals <- sweepMatrix(model_ssq, 2, model_df/resid_df, "/",
                         has_NA = FALSE)
    Fvals <- Fvals/resid_ssq
    if (verbose) {
      pvalues <- t(pf(t(Fvals), model_df, resid_df, lower.tail = FALSE))
      if (!is.null(f_def$observed)) {
        ind <- grepl(paste(f_def$observed, collapse = "|"),
                     colnames(model_ssq))
        SS1 <- rowSums(model_ssq[, ind, drop = FALSE])
        SS2 <- model_ssq; SS2[, !ind] <- 0
        ges <- model_ssq / (model_ssq - SS2 + resid_ssq + SS1)
      } else {
        ges <- model_ssq / (model_ssq + resid_ssq)
      }
    }
  } else if (type == "within") {
    model_ssq <- ssq[[2]]
    model_df <- Df[[2]]
    modnames <- dimnames(model_ssq)$modelterm
    m_ind <- !grepl(f_def$w_id, modnames)
    err_ind <- match(
      paste(f_def$w_id, ":", modnames[m_ind], sep = ""),
      modnames)
    resid_ssq <- model_ssq[, err_ind, drop = FALSE]
    resid_df <- model_df[err_ind]
    model_ssq <- model_ssq[, m_ind, drop = FALSE]
    model_df <- model_df[m_ind]
    Fvals <- (model_ssq/resid_ssq) * (resid_df/model_df)
    if (verbose) {
      pvalues <- t(pf(t(Fvals), model_df, resid_df, lower.tail = FALSE))
      SSr <- rowSums(resid_ssq)
      ges <- model_ssq / (model_ssq + SSr)
    }
  } else if (type == "mixed") {
    model_ssq <- ssq[[1]]
    model_df <- Df[[1]]
    bwind <- grepl(paste(f_def$between, collapse = "|"),
                   colnames(model_ssq))
    residnames <- paste(f_def$w_id,
                        gsub("^:|:$", "",
                             gsub(paste(f_def$between, collapse = "|"),
                                  "", colnames(model_ssq))),
                        sep = ":")
    residnames <- gsub(":+", ":", gsub("^:|:$", "", residnames))
    resid_ssq <- ssq[[2]][, unique(residnames), drop = FALSE]
    resid_df <- Df[[2]][colnames(resid_ssq)]
    for (i in colnames(resid_ssq)) {
      ind <- which(residnames == i & bwind)
      resid_ssq[, i] <- resid_ssq[, i] -
        rowSums(model_ssq[, ind, drop = FALSE])
      resid_df[i] <- resid_df[i] - sum(model_df[ind])
    }
    resid_ssq <- resid_ssq[, residnames, drop = FALSE]
    resid_df <- resid_df[residnames]
    Fvals <- model_ssq / resid_ssq
    Fvals <- sweepMatrix(Fvals, 2, resid_df/model_df, "*", has_NA = FALSE)
    if (verbose) {
      pvalues <- t(pf(t(Fvals), model_df, resid_df, lower.tail = FALSE))
      SSr <- rowSums(resid_ssq[, !duplicated(residnames)])
      if (!is.null(f_def$observed)) {
        ind <- grepl(paste(f_def$observed, collapse = "|"),
                     colnames(model_ssq))
        SS1 <- rowSums(model_ssq[, ind, drop = FALSE])
        SS2 <- model_ssq; SS2[, !ind] <- 0
        ges <- model_ssq / (model_ssq - SS2 + SSr + SS1)
      } else {
        ges <- model_ssq / (model_ssq + SSr)
      }
    }
  }
  #
  if (attribs) {
    setattributes(Fvals, obj, "Traditional F statistic")
    if (verbose) {
      setattributes(pvalues, obj, "Traditional P-value")
      setattributes(ges, obj, "Generalized Eta Squared")
      setattr(ges, "Df", NULL)
      Df <- cbind(Df_model = model_df,
                  Df_residual = rep_len(resid_df, length(model_df)))
      names(dimnames(Df)) <- c("modelterm", "")
      setattr(Df, "label", "Degrees of freedom")
      setattr(Fvals, "p_value", pvalues)
      setattr(Fvals, "effect_size", ges)
      setattr(Fvals, "Df", Df)
    }
  }
  # return
  Fvals
}


#' Parameter checks and data preparation in \code{arrayAnova}
#'
#' \code{preAnova} performs parameter checking, creates a data.frame for
#' the ANOVA design and reshapes .arraydat accordingly, prepares the computation
#' of sum-of-squares. It is called by \code{\link{arrayAnova}} and
#' \code{\link{tanova}}.
#' @param .arraydat numeric array
#' @param factordef a list of between-subject factors (named 'between'),
#' within-subject factors ('within'), subject identifier ('w_id'), and a
#' character vector of observed (i.e., not manipulated) variables ('observed').
#' The last element ('observed') is only used in effect size computations.
#' @param bwdat a data.frame with the subject-level variables (usually factors)
#' @param tfce logical value whether TFCE is requested
#' @param perm logical value whether permutation is requested
#' @param tanova_type type of tanova ("tanova", "dissimilarity", "gfp") if this
#' function is called from \code{tanova}
#' @keywords internal
#' @seealso \code{\link{arrayAnova}} and \code{\link{tanova}} are the
#' higher-level function exported to the user, \code{\link{preSumSq}},
#' \code{\link{compF}} and \code{\link{compTanovaEffect}} are internal
#' functions relying on the returned object of \code{preAnova}.
preAnova <- function(.arraydat, factordef, bwdat, verbose, tfce, perm,
                     tanova_type = NULL) {
  # helper function
  checkObligDimnames <- function(method, dimn, full_dimid) {
    print_dimn <- sub("$", "'", sub("^", "'", dimn))
    print_dim <- if (length(dimn) > 1L) "dimensions" else "dimension"
    print_fac <- if (length(dimn) > 1L) "factors" else "factor"
    print_allow <- if (length(dimn) > 1L) "are" else "is"
    if (!all(dimn %in% full_dimid)) {
      stop(sprintf("For %s, the input array must have %s %s.",
                   method, print_dimn, print_dim))
    }
    if (any(dimn %in% factordef$within)) {
      stop(sprintf("For %s, %s %s not allowed as within-subject %s.",
                   method, print_dimn, print_allow, print_fac))
    }
  }
  #
  # fast check of arguments
  #
  # check if .arraydat is an array
  if (is.data.frame(.arraydat)) .arraydat <- as.matrix(.arraydat)
  assertArray(.arraydat, mode = "numeric", min.d = 1L,
              .var.name = ".arraydat")
  has_NA <- anyNA(.arraydat)
  if (has_NA) {
    stop("Assertion on '.arraydat' failed: Contains missing values")
  }
  # check if factordef is a list
  assertList(factordef, types = "character", names = "unique",
             .var.name = "factordef")
  # check if w_id is provided
  if (is.null(factordef$w_id)) factordef$w_id <- "id"
  # save dimension names
  pre_dimnames <- dimnames(.arraydat)
  full_dimnames <- fillMissingDimnames(pre_dimnames,
                                       dim(.arraydat))
  full_dimid_origord <- names(full_dimnames)
  if (!identical(pre_dimnames, full_dimnames)) {
    dimnames(.arraydat) <- full_dimnames
  }
  full_dimid <- names(full_dimnames)
  # check dimension names
  if (!factordef$w_id %in% full_dimid) {
    stop(sprintf(".arraydat has no '%s' dimension identifier",
                 factordef$w_id))
  }
  if (anyNA(full_dimnames) || anyDuplicated(full_dimnames)) {
    stop("Provide array with unique named dimensions")
  }
  # if TFCE is requested, "chan" and "time" dimensions are obligatory
  nr_chanXtime <- nr_chan <- nr_time <- NULL
  if (tfce) {
    checkObligDimnames("TFCE", c("chan", "time"), full_dimid)
    nr_chan <- length(full_dimnames$chan)
    nr_time <- length(full_dimnames$time)
    nr_chanXtime <- nr_chan * nr_time
  } else if (perm$n > 1L) {
    nr_chanXtime <- c(length(full_dimnames$chan),
                      length(full_dimnames$time))
    nr_chanXtime[nr_chanXtime == 0L] <- 1L
    nr_chanXtime <- prod(nr_chanXtime)
  }
  # if tanova is requested, "chan" dimension is obligatory
  # create also a logical variable for tanova
  if (!is.null(tanova_type)) {
    checkObligDimnames(toupper(tanova_type), "chan", full_dimid)
    nr_chan <- length(full_dimnames$chan)
    tanova <- TRUE
  } else {
    tanova <- FALSE
  }
  # check type of anova
  type <-
    if (!is.null(factordef$between)) {
      if (is.null(factordef$within)) {
        "between"
      } else {
        "mixed"
      }
    } else if (!is.null(factordef$within)) {
      "within"
    } else {
      stop("Provide at least one between- and/or within-subject factor")
    }
  # check between subject factors
  if (!is.null(factordef$between)) {
    #  check bwdat
    assertDataFrame(bwdat, .var.name = "bwdat")
    if (!all(c(factordef$between, factordef$w_id) %in% colnames(bwdat))) {
      stop("Between-subject factors and 'bwdat' dataset do not match.")
    }
    # force to data.frame, keep only relevant variables
    bwdat <- as.data.frame(bwdat[, c(factordef$w_id, factordef$between)])
    bwdat[[factordef$w_id]] <- as.character(bwdat[[factordef$w_id]])
    bwdat[, factordef$between] <- lapply(
      bwdat[, factordef$between, drop = FALSE],
      function(x) if (is.factor(x)) droplevels(x) else factor(x)
    )
    # check for missing values
    if (anyNA(bwdat)) stop("Missing values in bwdat are not allowed")
    # align .arraydat and bwdat on w_id
    id <- intersect(full_dimnames[[factordef$w_id]],
                    bwdat[, factordef$w_id])
    if (length(id) < length(full_dimnames[[factordef$w_id]])) {
      .arraydat <- subsetArray(.arraydat,
                               setNames(list(id), factordef$w_id))
      full_dimnames <- dimnames(.arraydat)
      bwdat <- bwdat[match(id, bwdat[[factordef$w_id]]), ]
    }
  }
  # check if .arraydat has proper dimension identifiers
  if (!is.null(factordef$within)) {
    if (!all(factordef$within %in% names(full_dimnames)))
      stop("Missing within-subject dimension identifiers in .arraydat")
  }
  #
  # reshape data
  #
  # if gfp or dissimilarity, modify the input array
  if (tanova) {
    if (tanova_type == "intensity") {
      .arraydat <- compGfp(.arraydat)
      full_dimnames <- dimnames(.arraydat)
    }
    if (tanova_type == "topography")
      .arraydat <- scaleChan(.arraydat)
  }
  # set contrasts
  opcons <- options("contrasts")
  options(contrasts = c("contr.helmert", "contr.poly"))
  on.exit(options(opcons))
  # model dimensions should be in rows
  modeldims <- c(factordef$w_id, factordef$within)
  # all other dimensions become columns; "chan" and "time" come first
  keepdims <- setdiff(names(full_dimnames), modeldims)
  if ("time" %in% keepdims) keepdims <- unique(c("time", keepdims))
  if ("chan" %in% keepdims) keepdims <- unique(c("chan", keepdims))
  .arraydat <- mergeDims(.arraydat, list(modeldims, keepdims),
                         return_attributes = FALSE,
                         keep_dimnames = FALSE)
  # model data
  dat <- expand.grid(full_dimnames[modeldims], KEEP.OUT.ATTRS = FALSE,
                     stringsAsFactors = FALSE)
  colnames(dat) <- modeldims
  dat[,factordef$between] <- bwdat[,factordef$between]
  # transform characters to factors, and scale numeric variables
  dat[] <- lapply(
    names(dat), function(n) {
      x <- dat[[n]]
      if (storage.mode(x) == "character") {
        level <-
          if (n %in% factordef$between) {
            levels(bwdat[[n]])
          } else {
            full_dimnames[[n]]
          }
        factor(x, levels = level)
      } else if (is.numeric(x)) {
        scale(x)
      } else {
        x
      }
    })
  #
  # formulas
  #
  mean_formula <- as.formula(paste(
    "~",
    gsub("^\\*|\\*$", "",
         paste(
           paste(factordef$between, collapse = "*"),
           paste(factordef$within, collapse = "*"),
           sep = "*")
    )
  ))
  within_formula <- as.formula(paste(
    "~",
    gsub("^\\*|\\*$", "",
         paste(
           paste(factordef$w_id, collapse = "*"),
           paste(factordef$within, collapse = "*"),
           sep = "*")
    )
  ))
  model_formula <-
    if (type == "between" | tanova) {
      list(mean_formula)
    } else {
      list(mean_formula, within_formula)
    }
  #
  # model matrix and others
  #
  pre_sumsq <- vector("list", length(model_formula))
  for (i in seq_along(model_formula)) {
    tempy <- if (i == 1L && perm$type == "residuals") .arraydat else NULL
    pre_sumsq[[i]] <- preSumSq(model_formula[[i]], form_data = dat,
                               scaled = TRUE, y = tempy,
                               between = character())
  }
  if (type == "between" && !tanova) {
    pre_sumsq[[1]]$ssq_total <- colSums(
      sweepMatrix(.arraydat, 2, colMeans(.arraydat), "-", has_NA = FALSE)^2)
    setattr(pre_sumsq[[1]]$ssq_total, "Df",
            length(levels(dat[[factordef$w_id]])) - 1L)
  }
  #
  # dimensions for later use
  #
  # test statistic
  pre_dimnames$modelterm <- full_dimnames$modelterm <-
    attr(terms(mean_formula), "term.labels")
  full_dimid <- names(full_dimnames)
  full_dims <- vapply(full_dimnames, length, integer(1L))
  setattr(full_dims, "names", full_dimid)
  teststat_dimid <- c(keepdims, "modelterm")
  setattr(teststat_dimid, "origpos",
          match(teststat_dimid, c(full_dimid_origord, "modelterm")))
  # not chan or time
  otherdims <-
    if (!is.null(nr_chanXtime)) {
      other_dimid <- setdiff(teststat_dimid, c("chan", "time"))
      list(dimid = other_dimid,
           index = match(other_dimid, teststat_dimid),
           size = prod(full_dims[other_dimid]))
    } else {
      list(NULL)
    }
  #
  #
  # return
  #
  list(.arraydat = .arraydat, dat = dat, factordef = factordef,
       pre_sumsq = pre_sumsq, type = type, has_NA = has_NA,
       mean_formula = mean_formula,
       full_dimnames = full_dimnames, full_dims = full_dims,
       pre_dimnames = pre_dimnames,
       teststat_dimid = teststat_dimid,
       nr_chan = nr_chan, nr_time = nr_time, nr_chanXtime = nr_chanXtime,
       otherdims = otherdims, tanova_type = tanova_type)
}



#' Perform ANOVA (potentially with TFCE correction) on arrays
#'
#' \code{arrayAnova} performs point-to-point ANOVAs on arrays. Permutation-based
#' p-values and Threshold-free Cluster Enhancement (TFCE) correction can be
#' requested.
#' @param .arraydat a numeric array with named dimnames containing the EEG (or
#' other) data. Missing values are not allowed.
#' @param factordef a named list of factor definitions, containing the following
#' elements:
#' \itemize{
#' \item{between: }{character vector of between-subject factors (default: NULL)}
#' \item{within: }{character vector of within-subject factors (default: NULL)}
#' \item{w_id: }{name of the dimension which identifies the subjects
#' (default: "id")}
#' \item{observed: }{character vector of observed (i.e., not manipulated)
#' variables (default: NULL). Only used for effect-size calculations.}
#' }
#' @param bwdat a data.frame which contains the identification codes
#' (factordef$w_id) and all subject-level variables (usually factors) listed in
#' 'factordef$between'. Missing values are not allowed.
#' @param verbose logical value indicating if p-values and effect sizes should
#' be computed for the traditional ANOVA results
#' @param perm either 1) NULL (the default) or FALSE (the same as NULL),
#' both of which mean no permutation, or 2) TRUE, which means
#' permutation with default parameters, or 3) an object as returned by
#' \code{\link{permParams}} with custom parameters (see Examples and also
#' \code{\link{permParams}}).\cr
#' Custom parameters can be also provided by \code{perm = .(key = value)} to
#' save typing (this works by calling \code{\link{permParams}} with the
#' given parameters).
#' @param tfce either 1) NULL (the default) or FALSE (the same as NULL), both of
#' which mean no TFCE correction, or 2) TRUE, which means TFCE correction with
#' default parameters, or 3) an object as returned by \code{\link{tfceParams}}
#' with custom TFCE parameters (see Examples and also \code{\link{tfceParams}}).\cr
#' Custom parameters can be also provided by \code{tfce = .(key = value)} to
#' save typing (this works by calling \code{\link{tfceParams}} with the given
#' parameters).
#' @param parallel either 1) NULL (the default) or FALSE (the same as NULL),
#' both of which mean single-core computation, or 2) TRUE, which means
#' parallelization with default parameters, or 3) an object as returned by
#' \code{\link{parallelParams}} with custom parameters (see Examples and also
#' \code{\link{parallelParams}}).\cr
#' Custom parameters can be also provided by \code{parallel = .(key = value)} to
#' save typing (this works by calling \code{\link{parallelParams}} with the
#' given parameters).
#' @param seed an integer value which specifies a seed (default: NULL), or a
#' list of arguments passed to \code{\link{set.seed}}
#' @details The function assumes that the input array contains at least three
#' named dimensions: "chan" (corresponding to the channels [electrodes]), "time"
#' (corresponding to time points), and a subject identifier as given by
#' \code{factordef$w_id}. All dimensions which are not listed as
#' within-subject factors are treated in a similar way as chan and time, that is
#' separate ANOVA-s are computed for each level of those dimensions.
#' @note The function computes type I p-values - this is correct if the design
#' is fully balanced and orthogonal (if the number of between-subject
#' factors is one, it may have slightly unequal group sizes).
#' @export
#' @return A list object with the following numeric arrays:
#' \itemize{
#' \item{stat: }{a numeric array of F-values, with attribute 'Df' (an integer
#' matrix) containing the term- and residual degrees of freedom for each model
#' term. Optionally (if 'verbose' is TRUE), the F-value array has two extra
#' attributes: 'p_value' and 'effect_size', consisiting of the traditional
#' p-values and the generalized eta squared effect size measures
#' (see References), respectively.}
#' \item{stat_corr: }{a numeric array of TFCE-corrected F-values (if requested)}
#' \item{p_corr: }{a numeric array of permutation-based p-values (if requested)}
#' }
#' @seealso See also the related methods to explore the results, e.g.
#' \code{\link{extract.arrayAnova}}, \code{\link{summary.arrayAnova}}, and the
#' plotting functions \code{\link{modelplot}}, or the lower-level
#' \code{\link{imageValues}} and \code{\link{imagePvalues}}.
#' @references The TFCE correction follows:\cr
#' Mensen, A. and Khatami, R. (2013)
#' Advanced EEG analysis using threshold-free cluster-enhancement and
#' non-parametric statistics. Neuroimage, 67, 111-118.
#' doi:10.1016/j.neuroimage.2012.10.027  \cr
#'
#' The Generalized Eta Squared effect size statistic is described in:\cr
#' Olejnik, S., Algina, J. (2003) Generalized eta and omega squared statistics:
#' Measures of effect size for some common research designs. Psychological
#' Methods 8: pp. 434-447. doi:10.1037/1082-989X.8.4.434 \cr
#' Bakeman, R. (2005) Recommended effect size statistics for repeated measures
#' designs. Behavior Research Methods, 37 (3), 379-384.
#' @examples
#' # example dataset
#' data(erps)
#' dat_id <- attr(erps, "id") # to get group memberships
#' chan_pos <- attr(erps, "chan") # needed for TFCE correction
#'
#' # make the dataset unbalanced to illustrate that if there is only one
#' # between-subject factor, Type 1 and Type 2 results are identical
#' erps <- subsetArray(erps, list(id = 2:20))
#' dat_id <- dat_id[2:20, ]
#'
#' # average the data in each 12 ms time-bin to decrease the computational
#' # burden (not needed in serious analyses)
#' tempdat <- avgBin(erps, "time", 6)
#'
#' # analyze the effect of the reading group (between-subject factor) and the
#' # two experimental conditions (stimclass, pairtye; within-subject factors)
#' # for each channel and time sample (without requesting TFCE correction); this
#' # means we run 1518 repeated measures ANOVAs
#' system.time(
#'     result_eegr <- arrayAnova(tempdat,
#'                               list(between = "group",
#'                                    within = c("stimclass", "pairtype"),
#'                                    w_id = "id",
#'                                    observed = "group"),
#'                               bwdat = dat_id)
#' )
#'
#' # if package 'ez' is installed, you can compare the results; we take only a
#' # subset of the data (choosing "02" channel at time point "207") because
#' # ezANOVA is much slower
#' if (requireNamespace("ez")) {
#'     sub <- list(chan = "O2", time = "207")
#'     tempdat_ez <- transformArray(y ~ ., tempdat, subset = sub)
#'     tempdat_ez$group <- factor(dat_id$group[match(tempdat_ez$id, dat_id$id)])
#'     result_ez <- ez::ezANOVA(tempdat_ez, y, id, .(stimclass, pairtype),
#'                              between = group, observed = group, type = 2)
#'     # compare results
#'     ez_F <- result_ez$ANOVA$F   # F-values
#'     ez_p <- result_ez$ANOVA$p   # p-values
#'     ez_es <- result_ez$ANOVA$ges   # effect sizes
#'     eegr_F <- as.vector(
#'         subsetArray(extract(result_eegr, "stat"), sub))
#'     eegr_p <- as.vector(
#'         subsetArray(extract(result_eegr, "p"), sub))
#'     eegr_es <- as.vector(
#'         subsetArray(extract(result_eegr, "es"), sub))
#'     stopifnot(
#'         all.equal(ez_F, eegr_F),
#'         all.equal(ez_p, eegr_p),
#'         all.equal(ez_es, eegr_es)
#'     )
#' }
#'
#' # the between-subject variable could be numeric, too. Let's create an 'age'
#' # variable and analyze the effects.
#' dat_id$age <- rnorm(nrow(dat_id), 10, 1)
#' result_eegr_c <- arrayAnova(tempdat,
#'                             list(between = "age",
#'                                  within = c("stimclass", "pairtype"),
#'                                  w_id = "id",
#'                                  observed = "age"),
#'                             bwdat = dat_id)
#'
#' # if package 'car' is installed, you can compare the results; we take only a
#' # subset of the data (choosing "01" channel at time point "195")
#' if (requireNamespace("car")) {
#'     #
#'     # subsetting indices
#'     subs <- list(chan = "O1", time = "195")
#'     #
#'     # extract F values and p values from the eegR results
#'     eegr_F <- subsetArray(extract(result_eegr_c, "stat"), subs)
#'     eegr_p <- subsetArray(extract(result_eegr_c, "p"), subs)
#'     #
#'     # run the same analysis with the 'car' package (first we have to reshape
#'     # the data)
#'     tempdat_car <- subsetArray(tempdat, subs)
#'     tempdat_car <- mergeDims(tempdat_car,
#'                              list("id", c("stimclass", "pairtype")))
#'     tempdat_car <- data.frame(tempdat_car)
#'     tempdat_car$id <- dat_id$id
#'     tempdat_car$age <- dat_id$age
#'     idata <- expand.grid(dimnames(tempdat)[c("stimclass", "pairtype")])
#'     maov <- lm(cbind(A.ident, B.ident, C.ident,
#'                      A.subst, B.subst, C.subst,
#'                      A.transp, B.transp, C.transp) ~ age,
#'                data = tempdat_car)
#'     maov <- car::Anova(maov, idata = idata, idesign= ~stimclass*pairtype,
#'                        type = 2)
#'     maov <- summary(maov, multivariate = FALSE)$univ
#'     car_F <- maov[names(eegr_F), "F"]
#'     car_p <- maov[names(eegr_p), "Pr(>F)"]
#'     #
#'     # compare results
#'     stopifnot(
#'         all.equal(as.vector(car_F), as.vector(eegr_F)),
#'         all.equal(as.vector(car_p), as.vector(eegr_p))
#'     )
#' }
#'
#' # in order to use TFCE correction, the channel neigbourhood matrix is needed
#' # (see ?tfceParams and ?chanNb)
#' ChN <- chanNb(chan_pos, alpha = 0.7)
#'
#' # now analyze the data by collapsing the pairtypes, and apply TFCE correction
#' # (note: this will take a couple of seconds); use more randomization
#' # runs (n should be several thousand instead of 499L) in serious analyses
#' tempdat <- avgDims(tempdat, "pairtype")
#' result_tfce <- arrayAnova(tempdat,
#'                           list(between = "group",
#'                                within = "stimclass",
#'                                w_id = "id",
#'                                observed = "group"),
#'                           bwdat = dat_id,
#'                           perm = .(n = 499L),
#'                           tfce = .(ChN = ChN),
#'                           parallel = .(ncores = 2))
#'
#' # plot the corrected and uncorrected results
#' modelplot(result_tfce)
#' modelplot(result_tfce, type = "unc")
#'
#' # compare traditional and TFCE p-values
#' p_all <- extract(result_tfce, c("p", "p_corr"))
#' p_all <- bindArrays(trad = p_all$p, tfce = p_all$p_corr,
#'                     along_name = "method")
#'
#' # plot p-values after -log transformation to increase discriminability;
#' # note how the sporadic effects disappear
#' p_plot <- imageValues(-log(p_all)) # returns a ggplot object
#' p_plot
#'
arrayAnova <- function(.arraydat, factordef, bwdat = NULL, verbose = TRUE,
                       perm = NULL, tfce = NULL, parallel = NULL,
                       seed = NULL) {
  # deparse tfce and parallel
  mcall <- match.call()
  perm <- argumentDeparser(substitute(perm), "permParams",
                           null_params = list(n = 0L))
  tfce <- argumentDeparser(substitute(tfce), "tfceParams")
  ob <- getDoBackend()
  parallel <- argumentDeparser(substitute(parallel), "parallelParams",
                               null_params = list(ncores = 0L))
  if (parallel$cl_new) {
    on.exit(stopCluster(parallel$cl))
  }
  on.exit(setDoBackend(ob), add = TRUE)
  #
  # compute statistics
  out <- arrayTtestAnova(
    "ANOVA", .arraydat,
    factordef = factordef, bwdat = bwdat, verbose = verbose,
    perm = perm, tfce = tfce, parallel = parallel, seed = seed)
  #
  # replace call to the original
  out$call <- mcall
  # set class
  setattr(out, "class", "arrayAnova")
  # return
  out
}

#' United function for \code{\link{arrayTtest}} and \code{\link{arrayAnova}}
#' @keywords internal
arrayTtestAnova <- function(test,
                            .arraydat, .arraydat2 = NULL,
                            factordef = NULL, bwdat = NULL,
                            paired = FALSE, groups = NULL,
                            mu = 0, var_equal = FALSE, id_dim = "id",
                            verbose = TRUE, perm = permParams(n = 0L),
                            tfce = NULL, parallel = NULL, seed = NULL) {
  # helper function for back-transform to original
  backFn <- function(x, obj) {
    if (is.null(dim(x)) || !is.null(dimnames(x))) return(x)
    origpos <- order(attr(obj$teststat_dimid, "origpos"))
    apermArray(x, origpos, keep_attributes. = TRUE)
  }
  # workaround to avoid CRAN warnings
  x <- NULL; i <- NULL
  rm(x, i)
  # prepare arguments
  nperm <- as.integer(perm$n)
  use_tfce <- !is.null(tfce)
  #
  if (test == "ANOVA") {
    #
    # prepare data
    input <- preAnova(.arraydat, factordef, bwdat, verbose = verbose,
                      tfce = use_tfce, perm = perm)
    rm(.arraydat)
    #
    # observed test statistic
    stat_obs <- compF(input, attribs = TRUE, verbose = verbose)
  } else if (test == "T_TEST") {
    #
    # prepare data
    input <- preTtest(.arraydat, .arraydat2, paired, groups, mu, var_equal,
                      id_dim, verbose, tfce = use_tfce, perm = nperm > 1L)
    rm(.arraydat, .arraydat2)
    #
    # observed test statistic
    stat_obs <-
      if (input$type == "independent_samples") {
        isTtest(input, attribs = TRUE)
      } else {
        osTtest(input, attribs = TRUE)
      }
  }
  #
  # TFCE correction
  if (use_tfce) {
    has_neg <- if (test == "ANOVA") FALSE else TRUE
    verbose_label <- sprintf(
      "TFCE-corrected %s statistic",
      if (test == "ANOVA") "F" else "t")
    tfce_obs <- anovaTfce(stat_obs, 1:2,
                          has_neg = has_neg,
                          nr_chan = input$nr_chan,
                          nr_time = input$nr_time, tfce = tfce)
    setattr(tfce_obs, "label", verbose_label)
  }
  #
  # permutations
  if (nperm > 1L) {
    obs <- if (use_tfce) tfce_obs else stat_obs
    perm_pvalues <- permPvalues(input, nperm, seed, obs, parallel, tfce)
  }
  #
  # prepare output
  out <- list(call = match.call())
  #
  # back-transform to original shape
  if (verbose) {
    setattr(stat_obs, "Df", backFn(attr(stat_obs, "Df"), input))
    setattr(stat_obs, "p_value", backFn(attr(stat_obs, "p_value"), input))
    setattr(stat_obs, "effect_size",
            backFn(attr(stat_obs, "effect_size"), input))
  }
  stat_obs <- backFn(stat_obs, input)
  out$stat <- stat_obs
  if (use_tfce) {
    tfce_obs <- backFn(tfce_obs, input)
    out$stat_corr <- tfce_obs
  }
  if (nperm > 1L) {
    perm_pvalues <- backFn(perm_pvalues, input)
    out$p_corr <- perm_pvalues
  }
  #
  # attach dimnames
  dimind <- sort(attr(input$teststat_dimid, "origpos"))
  out$dimnames <- input$pre_dimnames[dimind]
  out$dim <- dim(out$stat)
  setattr(out$dim, "names", names(out$dimnames))
  #
  # return
  out
}



#' Extract interaction
#'
#' \code{extractInteraction} computes the highest-order (interaction) effect
#' (i.e. the difference between differences) from means of each cell of the
#' design matrix
#' @param dat a named list with array elemens, see details below
#' @param sep the character which separates the name of factor levels in the
#' model terms (default: ".")
#' @param sep_fixed logical indicating as sep should be treated "as is" while
#' splitting the model term names
#' @details This function is not intended for direct use, and it only works for
#' specifically formatted data. The input data is supposed to be the return
#' value of \code{\link{marginalMeans}}; a named list which contains the
#' marginal means for each effect of an ANOVA. The names are the names of the
#' factor(s) - for interaction effects the factor names are separated by ":".
#' The list elements are three-dimensional arrays with the following dimension
#' names: modelterm, chan and time. For pure main effects, this function
#' computes all pairwise differences between the levels of the factor. For
#' interaction effects, the function computes all pairwise differences at the
#' level of the highest order interaction. That means for the A:B interaction
#' where factor A has levels Aa & Ab and factor B has levels Ba, Bb and Bc,
#' the function computes the following differences: (Aa-Ab) - (Ba-Bb),
#' (Aa-Ab) - (Ba-Bc), (Aa-Ab) - (Bb-Bc).
#' @export
#' @return A named list is returned with the same names as the input, but the
#' list elements are ia_level x chan x time arrays.
extractInteraction <- function(dat, sep = ".", sep_fixed = TRUE) {
  # function to compute the weights (+1 or -1) of the factor levels (f)
  iasign <- function(f) {
    combin <- lapply(f, function(x) combn(levels(x), 2))
    combin_ind <- expand.grid(lapply(combin,
                                     function(x) seq.int(ncol(x))))
    combin_names <- expand.grid(lapply(f, function(x)
      combn(levels(x), 2, FUN = paste, collapse = "-")))
    combin_names <- apply(combin_names, 1, paste, collapse = ".")
    out <- sapply(1:nrow(combin_ind), function(ii) {
      out <- rep(0, nrow(f))
      ind <- sapply(names(combin), function(x)
        f[, x] %in% combin[[x]][, combin_ind[ii, x]])
      ind <- apply(ind, 1, all)
      if (sum(ind) > 2) {
        tempf <- sapply(droplevels(f[ind, , drop = F]), as.numeric)
        tempf <- sign(1.5 - tempf)
        out[which(ind)] <- apply(tempf, 1, prod)
      } else {
        out[which(ind)] <- c(1, -1)
      }
      return(out)
    })
    colnames(out) <- combin_names
    return(out)
  }
  # function to compute the interaction effects
  compia <- function(x, modterm_name) {
    modterm <- dimnames(x)$modelterm
    facs <- strsplit(modterm, sep, fixed = sep_fixed)
    facs <- data.frame(matrix(unlist(facs),
                              ncol = length(facs[[1]]),
                              byrow = TRUE))
    colnames(facs) <- unlist( strsplit(modterm_name, ":") )
    signs <- iasign(facs)
    rownames(signs) <- modterm
    avg_ia <- fnDims(x, "modelterm",
                     function(xx, y) t( crossprod(xx, y) ),
                     list(y = signs),
                     newdims = list(ia_level = colnames(signs)),
                     vectorized = TRUE)
    return( avg_ia )
  }
  # ------
  # run computations
  out <- mapply(compia, dat, names(dat), SIMPLIFY = FALSE)
  # return
  out
}

# TANOVA functions ===========


#' Compute generalized dissimilarity
#'
#' \code{compTanovaEffect} computes the generalized effect size measure as
#' given in Koenig & Melie-Garcia, 2009, p177. This is an internal function
#' called by the high-level function \code{\link{tanova}}.
#' @param obj a list object as returned by \code{\link{preAnova}}
#' @param new_indices if not NULL (default), it must be a vector of numeric
#' indices to be passed to the model matrix. This parameter is used for
#' permutation-based analyses.
#' @param attribs a logical value if dimension attributes should be attached to
#' the resulting array (default: FALSE)
#' @return This function returns a numeric matrix of effects if 'attribs' is
#' FALSE, and an array of effects if 'attribs' is TRUE.
#' @keywords internal
compTanovaEffect <- function(obj, new_indices = NULL, attribs = FALSE) {
  model <- obj$pre_sumsq[[1]]
  mm <- model$model_matrix
  if (!is.null(new_indices)) {
    if (!is.null(model$residuals)) {
      y <- model$residuals[new_indices,]
    } else {
      mm <- mm[new_indices, ]
      y <- obj$.arraydat
    }
  } else {
    y <- obj$.arraydat
  }
  mm_labels <- model$model_matrix_labels
  xpx <- model$xpx
  unc <- model$unique_contrasts
  #
  gfp <- !"chan" %in% obj$teststat_dimid
  #
  outdimn <- setdiff(obj$teststat_dimid, "chan")
  out <- matrix_(0, prod(obj$full_dims[setdiff(outdimn, "modelterm")]),
                 obj$full_dims["modelterm"])
  #
  coeff <- solve(xpx, crossprod(mm, y))
  setattr(coeff, "dimnames", list(mm_labels, NULL))
  if (gfp) {
    for (i in seq_along(unc)) {
      fmeans <- unc[[i]] %*% coeff[colnames(unc[[i]]), , drop = FALSE]
      out[, i] <- colMeans(abs(fmeans))
    }
  } else {
    for (i in seq_along(unc)) {
      fmeans <- unc[[i]] %*% coeff[colnames(unc[[i]]), , drop = FALSE]
      setattr(fmeans, "dim", c(nrow(fmeans), obj$nr_chan, nrow(out)))
      out[, i] <- colMeans(compGfp(fmeans, channel_dim = 2L))
    }
  }
  if (attribs) {
    label <- switch(
      obj$tanova_type,
      both = "Generalized dissimilarity of maps",
      topography = "Generalized dissimilarity of topographies",
      intensity = "Generalized dissimilarity of intensites")
    setattributes(out, obj, label, outdimn)
    dimn <-  setNames(vector("list", length(outdimn)), outdimn)
    setattr(out, "dimnames", dimn)
  }
  #
  out
}

#' Compute p-values and length-corrected p-values in TANOVA
#'
#' \code{compPvalueTanova} is a helper function to compute p-values and
#' duration-corrected p-values in the high-level \code{\link{tanova}} function.
#' @param effect_perm an array of permuted effects (including the observed
#' effect as the first level of the 'perm' dimension)
#' @param pcrit a vector of p-value limits for correction
#' @return a list of two arrays: the p-values and the corrected p-values
#' @keywords internal
compPvalueTanova <- function(effect_perm, pcrit) {
  # helper function
  consecLimit <- function(sigvec, .dim, alpha) {
    out <- matrixRle(matrix_(sigvec, .dim))
    quantile(out$lengths[out$values > 0], 1 - alpha)
  }
  # p-values
  dims <- dim(effect_perm)
  names(dims) <- names(dimnames(effect_perm))
  nperm <- dims["perm"]
  pvalues_perm <- fnDims(effect_perm, "perm", colRanks,
                         arg_list = list(preserveShape = TRUE),
                         vectorized = TRUE, keep_dimorder = FALSE)
  pvalues_perm <- (nperm + 1 - pvalues_perm) / nperm
  pvalues <- subsetArray(pvalues_perm, list(perm = 1L))
  setattr(pvalues, "dimnames", dimnames(pvalues_perm)[-1L])
  setattr(pvalues, "label", "Permuted P-value")
  out <- list(p = pvalues)
  # consecutive sign. criterion
  if ("time" %in% names(dims)) {
    pvalues_perm <- apermArray(pvalues_perm, first = c("time", "perm"))
    pvalues_consec <- array_(1, dim(pvalues_perm)[-2L],
                             dimnames(pvalues_perm)[-2L])
    for (alpha in sort(pcrit, TRUE)) {
      consec_limit <- fnDims(pvalues_perm < alpha,
                             1:2, consecLimit,
                             arg_list = list(alpha = alpha))
      temp <- matrixRle(
        array2mat(pvalues < alpha, "time",
                  return_attributes = FALSE,
                  keep_dimnames = FALSE))
      ind <- temp$lengths < consec_limit[temp$matrixcolumn]
      temp$values[ind] <- 0
      temp <- inverse.matrixRle(temp)
      pvalues_consec[temp > 0] <- alpha
    }
    pvalues_consec <- apermArray(pvalues_consec, names(dimnames(pvalues)))
    setattr(pvalues_consec, "label",
            "Permuted P-value (with min. duration correction)")
    out[["p_corr"]] <- pvalues_consec
  }
  # return
  out
}

#' Compute effect size in TANOVA
#'
#' \code{compZTanova} is a helper function to compute corrected test statistic
#' in the high-level \code{\link{tanova}} function. It is actually the z-scored 
#' generalized dissimilarity statistic.
#' @param effect_perm an array of permuted effects (including the observed
#' effect as the first level of the 'perm' dimension)
#' @param label the label of the observed test statistic
#' @keywords internal
compZTanova <- function(effect_perm, label = "") {
  out <- scaleArray(effect_perm, by_dims = "perm",
                    base_subset = list(perm = -1L))
  out <- subsetArray(out, list(perm = 1L))
  setattr(out, "label", paste0("Z-scored ", tolower(label)))
  # return
  out
}


#' Topographical ANOVA (TANOVA) and related methods
#'
#' \code{tanova} performs point-to-point topographical ANOVA on arrays. Related
#' methods are GFP-analysis (which is based on the intensity of the signal) and
#' DISS-analysis (focusing only on the global dissimilarity by comparing
#' normalized topographies). See References for further details.
#' @param .arraydat a numeric array with named dimnames containing the EEG (or
#' other) data. Missing values are not allowed.
#' @param factordef a named list of factor definitions, containing the following
#' elements:
#' \itemize{
#' \item{between: }{character vector of between-subject factors (default: NULL)}
#' \item{within: }{character vector of within-subject factors (default: NULL)}
#' \item{w_id: }{name of the dimension which identifies the subjects
#' (default: "id")}
#' }
#' @param bwdat a data.frame which contains the identification codes
#' (factordef$w_id) and all subject-level variables (usually factors) listed in
#' 'factordef$between'. Missing values are not allowed.
#' @param type a character value of "both" (default), "topography", or
#' "intensity". For "both", the analysis is based on the raw amplitude maps,
#' and thereby, it is sensitive to both topographic and intensity differences
#' between the conditions. For "topography", the analysis is based on the 
#' normalized maps, thereby, it can reveal topographic differences. For 
#' "intensity", only the intensity (as measured by the GFP) of the scalp maps 
#' is analyzed. 
#' @param verbose logical value indicating if p-values should be computed
#' @param perm either 1) NULL or FALSE (the same as NULL),
#' both of which mean no permutation, or 2) TRUE, which means
#' permutation with default parameters (the default), or 3) an object as
#' returned by \code{\link{permParams}} with custom parameters (see Examples
#' and also \code{\link{permParams}}).\cr
#' Custom parameters can be also provided by \code{perm = .(key = value)} to
#' save typing (this works by calling \code{\link{permParams}} with the
#' given parameters).
#' @param parallel either 1) NULL (the default) or FALSE (the same as NULL),
#' both of which mean single-core computation, or 2) TRUE, which means
#' parallelization with default parameters, or 3) an object as returned by
#' \code{\link{parallelParams}} with custom parameters (see Examples and also
#' \code{\link{parallelParams}}).\cr
#' Custom parameters can be also provided by \code{parallel = .(key = value)} to
#' save typing (this works by calling \code{\link{parallelParams}} with the
#' given parameters).
#' @param seed an integer value which specifies a seed (default: NULL), or a
#' list of arguments passed to \code{\link{set.seed}}
#' @param pcrit the significance level (or a vector of multiple levels) for the
#' minimum duration correction (default: 0.05)
#' @details The function assumes that the input array contains at least two
#' named dimensions: chan (corresponding to the channels [electrodes]) and time
#' (corresponding to time points). All dimensions which are not listed as
#' within-subject factors are treated in a similar way as chan and time, that is
#' separate TANOVA-s are computed for each level of those dimensions.
#' @note The function computes type I p-values - this is correct if the design
#' is fully balanced and orthogonal (if the number of between-subject
#' factors is one, it may have slightly unequal group sizes).
#' @export
#' @return A list object with z-scores and raw effect statistics, and 
#' uncorrected and corrected p-values.
#' @seealso See also the related methods to explore the results, e.g.
#' \code{\link{extract.tanova}}, \code{\link{summary.tanova}}, and the plotting
#' function \code{\link{modelplot.tanova}}.
#' @references Koenig, T., Melie-Garcia, L. (2009) Statistical analysis of
#' multichannel scalp field data. In: Michel, C.M., Koenig, T., Brandeis, D.,
#' Gianotti, L.R.R., Wackermann, J. (eds) Electrical neuroimaging. Cambridge
#' University Press\cr
#' See also the MATLAB toolbox by Koenig T, Kottlow M, Stein M,
#' Melie-Garcia L (2011) Ragu: A free tool for the analysis of EEG and MEG
#' event-related scalp field data using global randomization statistics.
#' Computational Intelligence and Neuroscience, 2011:938925
#' See also
#' @examples
#' # example dataset
#' data(erps)
#' dat_id <- attr(erps, "id") # to get group memberships
#'
#' # average the data in each 12 ms time-bin to decrease the computational
#' # burden (not needed in serious analyses)
#' tempdat <- avgBin(erps, "time", 6)
#'
#' # Analyze the effect of the reading group (between-subject factor) and the
#' # two experimental conditions (stimclass, pairtye; within-subject factors)
#' # for each channel and time sample, taking into account both intensity and
#' # distributional differences (the default: type = "tanova")
#' # Note that the number of permutations should be increased for serious
#' # purposes (here we use the default: 999L).
#' result_tanova <- tanova(tempdat,
#'                         list(between = "group",
#'                              within = c("stimclass", "pairtype"),
#'                              w_id = "id"),
#'                         bwdat = dat_id,
#'                         parallel = .(ncores = 2))
#'
#' # plot results (for now, only p-values and only for time > 0)
#' modelplot(result_tanova, what = "p", time_window = c(0, Inf))
tanova <- function(.arraydat, factordef, bwdat = NULL,
                   type = c("both", "topography", "intensity"),
                   verbose = TRUE, perm = TRUE, parallel = NULL,
                   seed = NULL, pcrit = 0.05) {
  # helper function for back-transform to original
  backFn <- function(x, obj) {
    origpos <- attr(obj$teststat_dimid, "origpos")
    origpos <- origpos[obj$teststat_dimid != "chan"]
    x <- apermArray(x, order(origpos), keep_attributes. = TRUE)
    setattr(x, "dimnames", NULL)
  }
  # CRAN check
  x <- i <- NULL
  rm(x, i)
  # some arguments must be pre-checked (e.g. deparse parallel)
  type <- match.arg(type)
  mcall <- match.call()
  ob <- getDoBackend()
  perm <- argumentDeparser(substitute(perm), "permParams",
                           null_params = list(n = 0L))
  parallel <- argumentDeparser(substitute(parallel), "parallelParams",
                               null_params = list(ncores = 0L))
  if (parallel$cl_new) {
    on.exit(stopCluster(parallel$cl))
  }
  on.exit(setDoBackend(ob), add = TRUE)
  #
  # prepare data
  input <- preAnova(.arraydat, factordef, bwdat, verbose,
                    tfce = FALSE, perm = perm,
                    tanova_type = type)
  rm(.arraydat)
  #
  # prepare output
  out <- list(call = mcall)
  #
  # calculate effect
  es_obs <- compTanovaEffect(input, attribs = TRUE)
  #     if (verbose) {
  #         factor_means <- marginalMeans(input$mean_formula,
  #                                       input$dat, input$.arraydat,
  #                                       dimnames(es_obs)[-length(dim(es_obs))],
  #                                       keep_term_order = !iaterms_last,
  #                                       residualmean = FALSE)
  #         out$factor_means <- factor_means
  #     }
  if (perm$n > 1L) {
    # generate random orders (dim(randind) = nrow(dat) x nperm)
    randind <- anovaRandomIndices(input, perm$n, seed)
    # run calculations
    chunks <- min(10L, ceiling(perm$n/max(1L, getDoParWorkers())))
    export <- 
      if (!is.null(parallel$cl)) {
        c("iter", "foreach", "%do%", "colMaxs", "compTanovaEffect")
      } else {
        NULL
      }
    es_perm <-
      foreach(x = iter(randind, by = "col", chunksize = chunks),
              .combine = c, .inorder = FALSE,
              .options.snow = parallel$snow_options,
              .options.multicore = parallel$mc_options,
              .export = export) %dopar%
              {
                foreach(i = 1:ncol(x)) %do%
                  compTanovaEffect(input, x[,i])
              }
    # reshape permuted tanova values
    es_perm <- c(es_obs, unlist(es_perm, use.names = FALSE))
    setattr(es_perm, "dim", c(dim(es_obs), perm$n + 1L))
    dimn <-  c(dimnames(es_obs), list(perm = NULL))
    setattr(es_perm, "dimnames", dimn)
    # p-values
    pvals <- compPvalueTanova(es_perm, pcrit)
    # standardized GFP
    zgfp <- compZTanova(es_perm, attr(es_obs, "label"))
  }
  # back-transform to original dimorder
  out$stat <- backFn(es_obs, input)
  rm(es_obs)
  if (perm$n > 1L) {
    # zgfp
    out$stat_corr <- backFn(zgfp, input)
    # p-values
    pvals <- lapply(pvals, backFn, obj = input)
    setattr(out$stat, "p_value", pvals[["p"]])
    if (!is.null(pvals[["p_corr"]]))
      out$p_corr <- pvals[["p_corr"]]
  }
  #
  # attach dimnames
  notchan <- input$teststat_dimid != "chan"
  dimind <- sort(attr(input$teststat_dimid, "origpos")[notchan])
  out$dimnames <- input$pre_dimnames[dimind]
  out$dim <- dim(out$stat)
  setattr(out$dim, "names", names(out$dimnames))
  # return
  setattr(out, "class", "tanova")
  out
}


# Peak Anova functions ===========

#' ANOVA on individual peaks
#'
#' \code{peakAnova} performs automatic peak detection and runs traditional
#' ANOVA on them
#' @param .arraydat a numeric array with named dimnames containing the EEG (or
#' other) data. Missing values are not allowed.
#' @param factordef a named list of factor definitions, containing the following
#' elements:
#' \itemize{
#' \item{"between"}{"character vector of between-subject factors (default: NULL)"}
#' \item{"within"}{"character vector of within-subject factors (default: NULL)"}
#' \item{"w_id"}{"name of the dimension which identifies the subjects
#' (default: "id")}
#' }
#' @param peakdef a named list of peak definitions. A peak definition is a
#' three-element integer vector which specifies the time window (start and
#' end point) and the polarity (1 for positivity and -1 for negativity).
#' @param bwdat a data.frame which contains the identification codes
#' (factordef$w_id) and the group memberships (factordef$between) of the
#' subjects. Missing values are not allowed.
#' @param verbose logical value indicating whether means for each factor level
#' should be also returned
#' @param avg_around_peak integer value which specifies the number of data points
#' which are considered +- in the "mean around the peak" measure
#' @param nperm integer value giving the number of permutations (default: 999L)
#' @param useparallel logical value; if TRUE (default), computations are done
#' in parallel
#' @param ncores integer value corresponding to the number of cores;
#' if NULL (default), it is set to the maximum number of cores available
#' @param par_method parallelization method; can be "snow" (default) or "mc"
#' (multicore)
#' @param cl a cluster definition for snow-type parallelization; if NULL
#' (default), it is set up automatically
#' @param iaterms_last logival variable whether all interaction terms should
#' follow all main effect terms (default: TRUE)
#' @param seed an integer value which specifies a seed (default: NULL)
#' @details OLD VERSION, DO NOT USE IT! updates will follow...
#' @note The function computes type I p-values - this is correct if the design
#' is fully balanced and orthogonal (if the number of between-subject
#' factors is one, it may have slightly unequal group sizes).
#' @export
#' @return A list object.
peakAnova <- function(.arraydat, factordef, peakdef, bwdat = NULL,
                      verbose = TRUE, avg_around_peak = 2,
                      nperm = 1, useparallel = FALSE, ncores = NULL,
                      par_method = c("snow", "mc"), cl = NULL,
                      iaterms_last = TRUE, seed = NULL) {
  # some checks
  stopifnot(is.array(.arraydat) | missing(.arraydat))
  stopifnot(is.list(factordef) | missing(factordef))
  stopifnot(length(peakdef) != 2 | missing(peakdef))
  nperm <- as.integer(nperm)
  if (!is.null(factordef$between) && is.null(bwdat)) {
    stop("No between-participant data provided")
  }
  if (useparallel) {
    if (is.null(ncores)) ncores <- parallel::detectCores()
  }
  #
  out <- list(call = match.call())
  #
  sumSq <- function(.arraydat, f_dat, aov_form, labels = FALSE) {
    out <- summary(aov(as.formula(aov_form), data = f_dat))
    if (!labels) {
      out <- matrix_(unlist(lapply(out, "[", "Mean Sq"),
                            use.names = FALSE),
                     nrow(out[[1]]), ncol(.arraydat))
    } else {
      out <- matrix_(unlist(lapply(out, "[", "Mean Sq"),
                            use.names = FALSE),
                     nrow(out[[1]]), ncol(.arraydat),
                     list(term = gsub(" ","",rownames(out[[1]])),
                          peak = seq_along(out)))
    }
    return(out)
  }
  #
  par_method <- match.arg(par_method)
  # prepare data
  temp <- preAnova(.arraydat, factordef, bwdat, useparallel, ncores, par_method)
  # assign variables to this environment, and potentially overwrite existing ones
  #assignList(temp, verbose = FALSE)
  dat <- temp$dat
  .arraydat <- temp$.arraydat
  factordef <- temp$factordef
  origdimnames <- temp$origdimnames
  par_params <- temp$par_params
  rm(temp)
  #
  timedim <- which(names(dimnames(.arraydat)) == "time")
  origtimedim <- which(names(origdimnames) == "time")
  origdimnames_peaks <- origdimnames
  .arraydat_peaks <- subsetArray(.arraydat,
                                 list(time = dimnames(.arraydat)$time[seq_along(peakdef)]))
  origdimnames_peaks$time <-
    dimnames(.arraydat_peaks)$time <- seq_along(peakdef)
  names(origdimnames_peaks)[origtimedim] <-
    names(dimnames(.arraydat_peaks))[timedim] <- "peak"
  # formula of anova
  aov_formula_char <-
    if (is.null(factordef$within)) {
      paste(
        as.character(quote(.arraydat)), " ~ ",
        paste(as.character(factordef$between), collapse = "*"),
        sep = "")
    } else if (is.null(factordef$between)) {
      paste(
        as.character(quote(.arraydat)), " ~ ",
        paste(as.character(factordef$within), collapse = "*"),
        sep = "")
    } else {
      paste(
        as.character(quote(.arraydat)), " ~ ",
        paste(
          paste(as.character(factordef$between), collapse = "*"),
          paste(as.character(factordef$within), collapse = "*"),
          sep = "*"),
        sep = "")
    }
  aov_formula <- as.formula(aov_formula_char)
  mean_formula <- as.formula(gsub("\\*", ":", aov_formula_char))
  # compute marginal means
  origdims <- vapply(origdimnames, length, 0L)
  modeldims <- c(factordef$w_id, factordef$within)
  keepdims <- setdiff(names(origdimnames), modeldims)
  # find indices corresponding to time windows
  tpoints <- as.numeric(origdimnames$time)
  peakdef <- lapply(peakdef, function(x)
    c(which(origdimnames$time %in% as.character(x[1:2])), x[3]))
  # find peaks
  fmeans <- marginalMeans(mean_formula, dat, .arraydat,
                          origdimnames[keepdims],
                          keep_term_order = !iaterms_last, residualmean = FALSE)
  datrowind <- match(
    interaction(dat[, strsplit(names(fmeans), ":")[[1]] ]),
    rownames(fmeans[[1]]) )
  peakind_facs <- strsplit(rownames(fmeans[[1]]), "\\.")
  peakind_facs <- setNames(data.frame(
    matrix(unlist(peakind_facs, use.names = FALSE),
           length(peakind_facs), length(peakind_facs[[1]]), TRUE)),
    strsplit(names(fmeans), ":")[[1]])
  peakind <- sapply(peakdef, function(x) {
    minmax <- if (x[3] < 0) which.min else which.max
    x[1] -1 + apply(fmeans[[1]][,seq(x[1],x[2])], 1, minmax)})
  #
  # peak amplitudes
  .arraydat_peaks[] <- sapply(1:ncol(peakind), function(i) {
    out <- matrix_(0, nrow(.arraydat), avg_around_peak*2 + 1)
    avgind <- outer(peakind[,i], c(-avg_around_peak:avg_around_peak), "+")
    out[] <- .arraydat[
      cbind(seq_along(datrowind), as.integer(avgind[datrowind,]))]
    return(rowMeans(out))
  })
  #
  # Compute ANOVA on peak amplitudes
  Fvals_obs <- arrayAnovaSub(.arraydat_peaks,
                             factordef, origdimnames_peaks, dat, verbose)
  out <- c(out, list(F_obs = Fvals_obs))
  # if verbose, save factor means
  if (verbose) {
    factor_means <- data.frame(
      group = peakind_facs[rep(1:nrow(peakind_facs),
                               length(peakdef)), , drop = F],
      peak = factor(rep(seq_along(peakdef), each = nrow(fmeans[[1]]))),
      ampl = c(marginalMeans(mean_formula, dat, .arraydat_peaks,
                             origdimnames_peaks["peak"],
                             keep_term_order = !iaterms_last,
                             residualmean = FALSE)[[1]]),
      lat = tpoints[c(peakind)])
    out <- c(out, list(factor_means = factor_means))
  }
  #
  # Compute permutation statistics for peak latencies
  #
  # observed sum of squares
  lateff_obs <- sumSq(peakind, peakind_facs, aov_formula_char, labels = TRUE)
  # permutations
  if (nperm > 1L) {
    #
    permfn <- function(i) {
      facmeans <- marginalMeans(mean_formula, dat[randind[i,], ],
                                .arraydat,
                                origdimnames[keepdims],
                                keep_term_order = !iaterms_last,
                                residualmean = FALSE)[[1]]
      peaki <- sapply(peakdef, function(x) {
        minmax <- if (x[3] < 0) which.min else which.max
        x[1] - 1 + apply(facmeans[, seq(x[1], x[2])], 1, minmax)})
      out <- sumSq(peaki, peakind_facs, aov_formula_char)
      return(out)
    }
    #
    # generate random orders (dim(randind) = nperm X nrow(dat))
    if (!is.null(seed)) set.seed(seed)
    randind <- mclapply(1:nperm, function(i)
      shuffle(nrow(dat),
              how(within = Within(type = "free"),
                  plots = Plots(strata = dat[,factordef$w_id],
                                type = "free"))),
      mc.cores = par_params$ncores)
    randind <- matrix(unlist(randind, use.names = FALSE),
                      nperm, nrow(dat), TRUE)
    #
    if (!useparallel) {
      lateff_perm <- lapply(1:nperm, permfn)
    } else if (par_method == "snow") {
      if (is.null(cl)) cl <- makePSOCKcluster(par_params$ncores, outfile = "")
      clusterExport(cl,
                    varlist = par_params$varlist2snow,
                    envir = environment())
      lateff_perm <- parLapply(cl, 1:nperm, permfn)
      #             stopCluster(cl)
      #             rm(cl)
    } else if (par_method == "mc") {
      lateff_perm <- mclapply(1:nperm, permfn, mc.cores = par_params$ncores)
    }
    lateff_perm <- cbind(c(lateff_obs),
                         matrix_(unlist(lateff_perm, use.names = FALSE),
                                 ncol = nperm))
    lateff_perm <- sweep(lateff_perm[, -1, drop = F], 1,
                         lateff_perm[, 1], "-")
    pvalues <- array_(
      (rowSums(lateff_perm >= 0) + 1) / (nperm + 1),
      dim(lateff_obs), dimnames(lateff_obs))
    out <- c(out, list(lat_pvalues = pvalues))
  }
  # return
  out
}


# P-value utilities ===========


#' Correct p-values in time-series
#'
#' \code{pvalueConsec} sets a minimal consecutive length criterion on the
#' p-values in a time series
#' @param dat a numeric matrix or array containing p-values, or a list of such
#' matrices/arrays. dat must have a named time dimension.
#' @param sig_level numeric value (default: 0.05), the level of significance
#' @param min_length numeric value (default: 10), the minimum number of
#' consecutive significant time points
#' @export
#' @return An object of the same dimension as the input data
pvalueConsec <- function(dat, sig_level = 0.05, min_length = 10) {
  pCorr <- function(x) {
    temp <- rle(x)
    ind <- (temp$values == 1) & (temp$lengths < min_length)
    temp$values[ind] <- 0
    return( inverse.rle(temp) )
  }
  pCorrMat <- function(datarr) {
    datarr <- array2mat(datarr, "time")
    datarr[apply(datarr <= sig_level, 2, pCorr) == 0] <- 1
    datarr <- mat2array(datarr)
    return(datarr)
  }
  if (is.list(dat)) {
    out <- lapply(dat, pCorrMat)
  } else {
    out <- pCorrMat(dat)
  }
  # return
  out
}


#' Internal function to create time ranges of significant effects
#' 
#' \code{sigPhases} is an internal function which extracts significant
#' time ranges from the array of p-values. Used for highlighting significant
#' phases in butterfly plots
#' @param dat a numeric array of corrected and uncorrected p-values
#' @param pcrit numeric value of the level of alpha (e.g., 0.05)
#' @param ymax y coordinate used for setting the highlighted region
#' @keywords internal
sigPhases <- function(dat, pcrit, ymax) {
  sigdata_corr <- subsetArray(dat, measure = "p_corr")
  sigdata_uncorr <- subsetArray(dat, measure = "p")
  sigdata <- array(0L, dim(sigdata_corr), dimnames(sigdata_corr))
  sigdata[sigdata_uncorr <= pcrit] <- 1L
  sigdata[sigdata_corr <= pcrit] <- 2L
  dimn <- dimnames(sigdata)
  timep <- as.Numeric(dimn$time)
  dimn <- expand.grid(dimn[-match("time", names(dimn))])
  out <- matrixRle(array2mat(sigdata, "time"))
  setDT(out)
  out[, time1 := c(1L, 1L + cumsum(lengths[-.N])), by = matrixcolumn]
  out[, time2 := (cumsum(lengths)), by = matrixcolumn]
  out <- out[values > 0L,]
  if (nrow(out)) {
    out[, (colnames(dimn)) := dimn[matrixcolumn,]]
    out[, time1 := timep[time1]]
    out[, time2 := timep[time2]]
    out[, Significant := factor(values,
                                levels = c(1, 2),
                                labels = c("before correction",
                                           "after correction"))]
    out[, 
        ymax := if (values == 1) ymax[1L] else if (values == 2L) ymax[2L],
        by = values]
    out[, values := NULL]
    out[, matrixcolumn := NULL]
    # return
    setDF(out)
    out
  } else {
    # return
    NULL
  }
}
tdeenes/eegR documentation built on April 19, 2021, 4:17 p.m.