MDMR: Multivariate Distance Matrix Regression

#' Compute Gower's centered similarity matrix from a distance matrix
#'
#' Compute Gower's centered similarity matrix \eqn{G}, which is the matrix
#' decomposed by the MDMR test statistic.
#'
#' @param d.mat Symmetric distance matrix (or R distance object) computed from
#'  the outcome data to be used in MDMR.
#'
#' @return G Gower's centered dissimilarity matrix computed from D.
#'
#' @author Daniel B. McArtor (dmcartor@gmail.com) [aut, cre]
#'
#' @references Gower, J. C. (1966). Some distance properties of latent root and
#' vector methods used in multivariate analysis. Biometrika, 53(3-4), 325-338.
#'
#' @export
gower <- function(d.mat) {
  # Convert distance object to matrix form
  d.mat <- as.matrix(d.mat)

  # Dimensionality of distance matrix
  n <- nrow(d.mat)

  # Create Gower's symmetry matrix (Gower, 1966)
  A <- -0.5 * d.mat^2

  # Subtract column means As = (I - 1/n * 11")A
  As <- A - rep(colMeans(A), rep.int(n, n))

  # Subtract row means G = As(I- 1/n * 11")
  # the transpose is just to deal with how "matrix minus vector" operations work
  # in R
  return(t(As) - rep(rowMeans(As), rep.int(n, n)))
}


#' Function to compute analytic p-values using Davies (1980)
#'
#' Compute analytic MDMR p-values
#'
#' @param q Test statistic
#' @param lambda Centered distance matrix eigenvalues
#' @param k Numerator degrees of freedom for this test statistic
#' @param p Total number of predictors in the design matrix (except for the
#' intercept)
#' @param n Sample size
#' @param lim Maximum number of integration terms. Realistic values for "lim"
#' range from 1,000 if the procedure is to be called repeatedly up to 50,000 if
#' it is to be called only occasionally
#' @param acc Error bound. Suitable values for "acc" range from 0.001 to 0.00005
#' which should be adequate for most statistical purposes.
#'
#' @return Output of CompQuadForm::davies()
#'
#' @author Daniel B. McArtor (dmcartor@gmail.com) [aut, cre]
#'
#' @references Davies, R. B. (1980). The Distribution of a Linear Combination of
#'  chi-square Random Variables. Journal of the Royal Statistical Society.
#'  Series C (Applied Statistics), 29(3), 323-333.
#'
#'  Duchesne, P., & De Micheaux, P. L. (2010). Computing the distribution of
#'  quadratic forms: Further comparisons between the Liu-Tang-Zhang
#'  approximation and exact methods. Computational Statistics and Data
#'  Analysis, 54(4), 858-862.
#'
#'  McArtor, D. B., Lubke, G. H., & Bergeman, C. S. (2017). Extending
#'  multivariate distance matrix regression with an effect size measure and the
#'  distribution of the test statistic. Psychometrika, 82, 1052-1077.
.pmdmr <- function(q, lambda, k, p, n = length(lambda),
                   lim = 50000, acc = 1e-20) {
  # Use the eigenvalues of G and the test statistic q to make a vector
  # corresponding to all of the weights for the composite chi-square variables
  # See Equation 12 in McArtor et al.
  gamma <- c(lambda,  -q * lambda)

  # Aggregate the degrees of freedom for each composite chi-square variable
  # See Equation 12 in McArtor et al.
  nu <- c(rep(k, length(lambda)), rep(n-p-1, length(lambda)))

  # Call the Davies function at zero using the given weights and df, along
  # with the starting values of the metaparameters of the algorithm
  pv <- CompQuadForm::davies(0, lambda = gamma, h = nu, lim = lim, acc = acc)

  # Check error status. If there was an error or if the p-value was below zero,
  # return entire davies object
  if(pv$ifault != 0 || pv$Qq < 0) {
    return(pv)
  } else {
    return(pv$Qq)
  }

  return(pv)
}

#' Function to the hat matrix
.hat <- function(X) {
  tcrossprod(tcrossprod(X, solve(crossprod(X))), X)
}

#' Function to compute test statistics for a permuted design matrix
.mdmr.permstats <- function(X, n, vg, trG, px, test.inds) {
  # Permute rows of X
  perm.indices <- sample(1:n, size = n, replace = F)
  X.perm <- X[perm.indices,]

  # Omnibus test statistic
  H.perm <- .hat(X.perm)
  vh.perm <- c(H.perm)
  numer.perm <- as.numeric(crossprod(vh.perm, vg))
  denom.perm <- trG - numer.perm
  f.omni.perm <- numer.perm / denom.perm

  # Conditional test statistics
  Hs.perm <- lapply(1:px, function(k) {
    x.rm <- which(test.inds == k)
    Xs.perm <- X.perm[,-x.rm]
    Hs.perm <- .hat(Xs.perm)
    vh.perm - c(Hs.perm)
  })
  numer.x.perm <- unlist(lapply(Hs.perm, function(vhs.perm) {
    as.numeric(crossprod(vhs.perm, vg))
  }))
  f.x.perm <- numer.x.perm / denom.perm

  # OUTPUT
  return(c(f.omni.perm, f.x.perm))
}

#' function to compute pseudo R-square
.pseudo.r2 <- function(G, vh, vhs, x.inds, unique.xnames = NULL) {

  # =========================== Omnibus Test =============================== #
  # Computational trick: H is idempotent, so H = HH. tr(ABC) = tr(CAB), so
  # tr(HGH) = tr(HHG) = tr(HG). Also, tr(AB) = vec(A)"vec(B), so
  vg <- c(G)
  numer <- crossprod(vh, vg)

  # pseudo-R2 is defined as tr(HGH)/tr(G), so
  denom <- sum(diag(G))

  # Omnibus pseudo R-square
  res <- numer / denom

  # ===================== Tests of Each Predictor ========================== #
  numer.x <- unlist(lapply(1:length(vhs), function(k) {
    num <- NA
    if(k %in% (x.inds)) {
      num <- crossprod(vhs[[k]], vg)
    }
    return(num)
  }))

  r2.x <- numer.x / denom

  # ====================== Return Results =========================== #
  res <- c(res, r2.x)
  names(res) <- c("(Omnibus)", unique.xnames)
  return(res)
}

#' Conduct MDMR with analytic p-values
#'
#' \code{mdmr} (multivariate distance matrix regression) is used to regress a
#' distance matrix onto a set of predictors. It returns the test statistic,
#' pseudo R-square statistic, and analytic p-values for all predictors
#' jointly and for each predictor individually, conditioned on the rest.
#'
#' This function is the fastest approach to conducting MDMR. It uses the
#' fastest known computational strategy to compute the MDMR test statistic (see
#' Appendix A of McArtor et al., 2017), and it uses fast, analytic p-values.
#'
#' The slowest part of conducting MDMR is now the necessary eigendecomposition
#' of the \code{G} matrix, whose computation time is a function of
#' \eqn{n^3}. If MDMR is to be conducted multiple times on the same
#' distance matrix, it is recommended to compute eigenvalues of \code{G} in
#' advance and pass them to the function rather than computing them every
#' time \code{mdmr} is called, as is the case if the argument \code{lambda}
#' is left \code{NULL}.
#'
#' The distance matrix \code{D} can be passed to \code{mdmr} as either a
#' distance object or a symmetric matrix.
#'
#' @param X A \eqn{n x p} matrix or data frame of predictors. Unordered factors
#' will be tested with contrast-codes by default, and ordered factors will be
#' tested with polynomial contrasts. For finer control of how categorical
#' predictors are handled, or if higher-order effects are desired, the output
#' from a call to \code{model.matrix()} can be supplied to this argument as
#' well.
#' @param D Distance matrix computed on the outcome data. Can be either a
#' matrix or an R \code{\link{dist}} object. Either \code{D} or \code{G}
#' must be passed to \code{mdmr()}.
#' @param G Gower's centered similarity matrix computed from \code{D}.
#' Either \code{D} or \code{G} must be passed to \code{mdmr}.
#' @param lambda Optional argument: Eigenvalues of \code{G}.
#' Eigendecomposition of large \code{G} matrices can be somewhat time
#' consuming, and the theoretical p-values require the eigenvalues of
#' \code{G}. If MDMR is to be conducted multiple times on one distance
#' matrix, it is advised to conduct the eigendecomposition once and pass the
#' eigenvalues to \code{mdmr()} directly each time.
#' @param return.lambda Logical; indicates whether or not the eigenvalues of
#' \code{G} should be returned, if calculated. Default is \code{FALSE}.
#' @param start.acc Starting accuracy of the Davies (1980) algorithm
#' implemented in the \code{\link{davies}} function in the \code{CompQuadForm}
#' package (Duchesne &  De Micheaux, 2010) that \code{mdmr()} uses to compute
#' MDMR p-values.
#' @param ncores Integer; if \code{ncores} > 1, the \code{\link{parallel}}
#' package is used to speed computation. Note: Windows users must set
#' \code{ncores = 1} because the \code{parallel} pacakge relies on forking. See
#' \code{mc.cores} in the \code{\link{mclapply}} function in the
#' \code{parallel} pacakge for more details.
#' @param perm.p Logical: should permutation-based p-values be computed instead
#' of analytic p-values? Default behavior is \code{TRUE} if \code{n < 200} and
#' \code{FALSE} otherwise because the anlytic p-values depend on asymptotics.
#' for \code{n > 200} and "permutation" otherwise.
#' @param nperm Number of permutations to use if permutation-based p-values are
#' to be computed.
#' @param seed Random seed to use to generate the permutation null distribution.
#' Defaults to a random seed.
#'
#' @return An object with six elements and a summary function. Calling
#' \code{summary(mdmr.res)} produces a data frame comprised of:
#' \item{Statistic}{Value of the corresponding MDMR test statistic}
#' \item{Numer DF}{Numerator degrees of freedom for the corresponding effect}
#' \item{Pseudo R2}{Size of the corresponding effect on the
#' distance matrix}
#' \item{p-value}{The p-value for each effect.}
#' In addition to the information in the three columns comprising
#' \code{summary(res)}, the \code{res} object also contains:
#'
#' \item{p.prec}{A data.frame reporting the precision of each p-value. If
#' analytic p-values were computed, these are the maximum error bound of the
#' p-values reported by the \code{davies} function in \code{CompQuadForm}. If
#' permutation p-values were computed, it is the standard error of each
#' permutation p-value.}
#' \item{lambda}{A vector of the eigenvalues of \code{G} (if
#' \code{return.lambda = T}).}
#' \item{nperm}{Number of permutations used. Will read \code{NA} if analytic
#' p-values were computed}
#'
#' Note that the printed output of \code{summary(res)} will truncate p-values
#' to the smallest trustworthy values, but the object returned by
#' \code{summary(res)} will contain the p-values as computed. The reason for
#' this truncation differs for analytic and permutation p-values. For an
#' analytic p-value, if the error bound of the Davies algorithm is larger than
#' the p-value, the only conclusion that can be drawn with certainty is that
#' the p-value is smaller than (or equal to) the error bound. For a permutation
#' test, the estimated p-value will be zero if no permuted test statistics are
#' greater than the observed statistic, but the zero p-value is only a product
#' of the finite number of permutations conduted. The only conclusion that can
#' be drawn is that the p-value is smaller than \code{1/nperm}.
#'
#' @author Daniel B. McArtor (dmcartor@gmail.com) [aut, cre]
#'
#' @references Davies, R. B. (1980). The Distribution of a Linear Combination of
#'  chi-square Random Variables. Journal of the Royal Statistical Society.
#'  Series C (Applied Statistics), 29(3), 323-333.
#'
#'  Duchesne, P., & De Micheaux, P. L. (2010). Computing the distribution of
#'  quadratic forms: Further comparisons between the Liu-Tang-Zhang
#'  approximation and exact methods. Computational Statistics and Data
#'  Analysis, 54(4), 858-862.
#'
#'  McArtor, D. B., Lubke, G. H., & Bergeman, C. S. (2017). Extending
#'  multivariate distance matrix regression with an effect size measure and the
#'  distribution of the test statistic. Psychometrika, 82, 1052-1077.
#'
#' @examples
#'# --- The following two approaches yield equivalent results --- #
#'# Approach 1
#'data(mdmrdata)
#'D <- dist(Y.mdmr, method = "euclidean")
#'res1 <- mdmr(X = X.mdmr, D = D)
#'summary(res1)
#'
#'# Approach 2
#'data(mdmrdata)
#'D <- dist(Y.mdmr, method = "euclidean")
#'G <- gower(D)
#'res2 <- mdmr(X = X.mdmr, G = G)
#'summary(res2)
#'
#' @importFrom CompQuadForm davies
#' @importFrom parallel mclapply
#' @export
mdmr <- function(X, D = NULL, G = NULL, lambda = NULL, return.lambda = F,
                 start.acc = 1e-20, ncores = 1,
                 perm.p = (nrow(as.matrix(X)) < 200),
                 nperm = 500, seed = NULL) {

  ##############################################################################
  ## INPUT HANDLING
  ##############################################################################
  # If X is a vector, convert it to a matrix
  if(is.vector(X)) {
    X <- as.matrix(X)
  }

  # Get names of the columns in X
  unique.xnames <- colnames(X)
  if(is.null(unique.xnames)) {
    colnames(X) <- unique.xnames <- paste0("X", 1:ncol(X))
  }

  # Make sure "D" is not interpreted as the D function
  if(is.function(D)) {
    stop(paste0("Please provide either a distance matrix or a ",
                "Gower-centered distance matrix."))
  }

  # Make sure either D or G was provided
  if(is.null(D) & is.null(G)) {
    stop(paste0("Please provide either a distance matrix ",
                "or a Gower-centered distance matrix."))
  }

  # Make sure D is a distance matrix
  if(!is.null(D)) {
    if(nrow(as.matrix(D)) != ncol(as.matrix(D))) {
      stop(paste0("Please provide either a distance matrix ",
                  "or a Gower-centered distance matrix."))
    }
  }

  # If G was not provided, compute it from D
  if(is.null(G)) {
    G <- gower(D)
  }

  # Get test indices of variables comprising a model matrix (e.g. which contrast
  # codes need to be tested jointly to assess a factor if a model.matrix was
  # passed as X). If X isn"t a model matrix, do them all one-at-a-time.
  test.inds <- attr(X, "assign")
  if(!is.null(test.inds)) {
    is.model.mat <- T
  }
  if(is.null(test.inds)) {
    is.model.mat <- F
  }

  # Remove observations that are misxing on X
  X.na <- which(rowSums(is.na(as.matrix(X))) > 0)
  if(length(X.na) > 0) {
    X <- X[-X.na,]
    G <- G[-X.na, -X.na]
    warning(paste0(length(X.na),
                   " observations removed due to missingness on X."))
  }


  # Handle potential factors if X is not a model.matrix
  if(!is.model.mat) {
    contr.list <- lapply(1:ncol(X), FUN = function(k) {
      contr.type <- NULL
      if(is.factor(X[,k])) {
        contr.type <- "contr.sum"
      }
      if(is.ordered(X[,k])) {
        contr.type <- "contr.poly"
      }
      return(contr.type)
    })
    names(contr.list) <- colnames(X)
    contr.list <- contr.list[!unlist(lapply(contr.list, is.null))]
    # Case 1: at least some categorical predictors
    if(length(contr.list) > 0) {
      X <- stats::model.matrix(~ . , data = as.data.frame(X),
                               contrasts.arg = contr.list)
    }
    # Case 2: all numeric predictors
    if(length(contr.list) == 0) {
      X <- stats::model.matrix(~ . , data = as.data.frame(X))
    }


    test.inds <- attr(X, "assign")
  }
  xnames <- colnames(X)
  unique.xnames <- lapply(1:max(test.inds), FUN = function(kk) {
    hold.names <- varname <- xnames[test.inds == kk]
    if(length(varname) > 1) {
      constant.char <-
        which(unlist(lapply(1:nchar(hold.names[1]), FUN = function(ind) {
          chars <- unlist(lapply(hold.names, FUN = function(var.name) {
            substr(x = var.name, start = ind, stop = ind)
          }))
          stats::var(as.numeric(as.factor(chars))) == 0
        })))
      varname <- paste(unlist(lapply(constant.char, FUN = function(cc) {
        substr(x = hold.names[1], start = cc, stop = cc)
      })), collapse = "")
    }
    varname
  })
  unique.xnames <- unlist(unique.xnames)


  # Record the number of items and sample size
  p <- ncol(X)-1
  px <- length(unique.xnames)
  n <- nrow(X)


  ##############################################################################
  ## COMPUTE OMNIBUS TEST STATISTIC
  ##############################################################################
  # Compute hat matrix
  H <- .hat(X)

  # Computational trick: H is idempotent, so H = HH. tr(ABC) = tr(CAB), so
  # tr(HGH) = tr(HHG) = tr(HG). Also, tr(AB) = vec(A)"vec(B), so
  vh <- c(H)
  vg <- c(G)
  numer <- as.numeric(crossprod(vh, vg))

  # Numerical trick: tr((I-H)G(I-H)) = tr(G) - tr(HGH), so
  trG <- sum(diag(G))
  denom <- trG - numer

  # Save omnibus F
  pr2.omni <- as.numeric(numer / trG)
  f.omni <- as.numeric(numer / denom)

  ##############################################################################
  ## COMPUTE CONDITIONAL TEST STATISTICS
  ##############################################################################
  # Get vectorized hat matrices for each conditional effect
  Hs <- parallel::mclapply(1:px, mc.cores = ncores, FUN = function(k) {
    x.rm <- which(test.inds == k)
    Xs <- X[,-x.rm]
    Hs <- .hat(Xs)
    c(H - Hs)
  })

  # Get DF of each test
  df <- unlist(parallel::mclapply(1:px, mc.cores = ncores, FUN = function(k) {
    x.rm <- which(test.inds == k)
    length(x.rm)
  }))

  # Compute SSD due to conditional effect
  numer.x <- unlist(
    parallel::mclapply(Hs, mc.cores = ncores, FUN = function(vhs) {
      crossprod(vhs, vg)
    }))

  # Rescale to get either test statistic or pseudo r-square
  f.x <- numer.x / denom
  pr2.x <- numer.x / trG

  # Combine test statistics and pseudo R-squares
  stat <- data.frame("stat" = c(f.omni, f.x),
                     row.names = c("(Omnibus)", unique.xnames))
  df <- data.frame("df" = c(p, df),
                   row.names = c("(Omnibus)", unique.xnames))
  pr2 <- data.frame("pseudo.Rsq" = c(pr2.omni, pr2.x),
                    row.names = c("(Omnibus)", unique.xnames))

  ##############################################################################
  ## p-values: ANALYTIC APPROACH
  ##############################################################################
  if(!perm.p) {

    # Omnibus p-value
    # Compute the eigenvalues of G if they were not provided
    if(is.null(lambda)) {
      lambda <- eigen(G, only.values = T)$values
    }

    # Compute adjusted sample size
    n.tilde <- (n-p-1) * lambda[1] / sum(lambda)
    if(n.tilde < 75) {
      warning(
        paste0("Adjusted sample size = ", round(n.tilde), "\n",
               "Asymptotic properties of the null distribution may not hold.\n",
               "This can result in overly conservative p-values.\n",
               "Permutation tests are recommended."))
    }

    # Initiailze accuracy of the Davies algorithm
    acc.omni <- start.acc
    pv.omni <- .pmdmr(f.omni, lambda, k = p, p = p, n = n, acc = acc.omni)

    # If the davies procedure threw an error, decrease the accuracy
    while(length(pv.omni) > 1) {
      acc.omni <- acc.omni * 10
      pv.omni <- .pmdmr(q = f.omni, lambda = lambda, k = p, p = p, n = n,
                        acc = acc.omni)
    }

    # Get p-values
    p.res <- parallel::mclapply(1:px, mc.cores = ncores, FUN = function(k) {
      item.acc <- start.acc
      pv <- .pmdmr(q = f.x[k], lambda = lambda, k = df[k+1,1],
                   p = p, n = n, acc = item.acc)

      # If the davies procedure threw an error, decrease the accuracy
      while(length(pv) > 1) {
        item.acc <- item.acc * 10
        pv <- .pmdmr(q = f.x[k], lambda = lambda, k = df[k+1,1],
                     p = p, n = n, acc = item.acc)
      }
      c(pv.x = pv, acc = item.acc)
    })
    pv.x <- unlist(lapply(p.res, function(p) {p[[1]]}))
    acc.x <- unlist(lapply(p.res, function(p) {p[[2]]}))

    pv <- data.frame("analytic.pvals" = c(pv.omni, pv.x),
                     row.names = c("(Omnibus)", unique.xnames))
    pv.acc <- data.frame("p.max.error" = c(acc.omni, acc.x),
                         row.names = c("(Omnibus)", unique.xnames))

    # Overwrite nperm to show no permutations were done
    nperm <- NA
  }

  ##############################################################################
  ## p-values: PERMUTATION APPROACH
  ##############################################################################
  if(perm.p) {

    # Set up chunk size
    perm.nchunk <- ceiling(n * nperm / 1e6)
    perm.chunkSize <- ceiling(nperm / perm.nchunk)
    nperm <- perm.nchunk * perm.chunkSize
    perm.chunkStart <- seq(1, nperm, by = perm.chunkSize)
    perm.chunkEnd <- perm.chunkStart + perm.chunkSize - 1
    perm.chunkEnd[perm.nchunk] <- nperm

    # Set seed if not specified: make sure it doesn"t overflow
    if(is.null(seed)) {
      max.int <- .Machine$integer.max
      seed.max <- floor((max.int - perm.chunkSize) / perm.nchunk)
      seed <- round(stats::runif(1,0,1)*seed.max)
    }

    # Initialize counter for number of times each permuted test statistic is
    # larger than the observed test statistic
    perm.geq.obs <- rep(0, px + 1)

    # Compute permutation p-values
    for(i in 1:perm.nchunk) {
      res <- parallel::mclapply(
        1:perm.chunkSize, mc.cores = ncores, function(xx) {
          set.seed(seed * i + xx)
          .mdmr.permstats(X, n, vg, trG, px, test.inds) >= stat
        })
      perm.geq.obs <- perm.geq.obs +
        colSums(
          matrix(unlist(res), nrow = perm.chunkSize, ncol = px +1, byrow = T))

      cat((perm.chunkEnd[i]/nperm)*100,
          "% of permutation test statistics computed.",
          fill = T)
    }

    # Compute p-values
    pv.omni <- perm.geq.obs[1] / nperm
    pv.x <- perm.geq.obs[-1] / nperm
    pv <- data.frame("perm.pvals" = c(pv.omni, pv.x),
                     row.names = c("(Omnibus)", unique.xnames))

    # Compute p-value accuracy
    #   Use max of p-values and 1/nperm so that we don"t get zero SE's
    pv.omni.hold <- max(pv.omni, 1/nperm)
    pv.x.hold <- pmax(pv.x, 1/nperm)

    acc.omni <- sqrt((pv.omni.hold * (1-pv.omni.hold)) / nperm)
    acc.x <- sqrt((pv.x.hold * (1-pv.x.hold)) / nperm)
    pv.acc <- data.frame("perm.p.SE" = c(acc.omni, acc.x),
                         row.names = c("(Omnibus)", unique.xnames))
    rm(pv.omni.hold, pv.x.hold)

    # Fill in LAMBDA with a note if it was requested
    if(return.lambda) {
      lambda <- "Eigenvalues are not used in the permutation approach"
    }
  }


  ##############################################################################
  ## COMBINE AND RETURN OUTPUT
  ##############################################################################
  if(return.lambda) {
    out <- list("stat" = stat, "pr.sq" = pr2, "pv" = pv, "p.prec" = pv.acc,
                "df" = df, lambda = lambda, nperm = nperm)
  } else {
    out <- list("stat" = stat, "pr.sq" = pr2, "pv" = pv, "p.prec" = pv.acc,
                "df" = df, lambda = NULL, nperm = nperm)
  }

  class(out) <- c("mdmr", class(out))

  return(out)
}


#' Print MDMR Object
#'
#' \code{print} method for class \code{mdmr}
#'
#' @param x Output from \code{mdmr}
#' @param ... Further arguments passed to or from other methods.
#'
#' @return
#' \item{p-value}{Analytic p-values for the omnibus test and each predictor}
#'
#' @author Daniel B. McArtor (dmcartor@gmail.com) [aut, cre]
#'
#'
#' @export
print.mdmr <- function(x, ...) {
  if(is.na(x$nperm)) {
    pv.name <- "Analytic p-value"
    # If it's analytic, we can only say it's below davies error
    out <- rep(NA, nrow(x$pv))
    for(i in 1:length(out)) {
      out[i] <- format.pval(x$pv[i,1], eps = x$p.prec[i,1])
    }
    out <- data.frame(out,row.names = rownames(x$pv))
    names(out) <- pv.name
    print(out)
  }
  if(!is.na(x$nperm)) {
    pv.name <- "Permutation p-value"
    # If it's a permutation test, we can only say it's below 1/nperm
    out <- data.frame(format.pval(x$pv, eps = 1/x$nperm),
                      row.names = rownames(x$pv))
    names(out) <- pv.name
    print(out)
  }
}

#' Summarizing MDMR Results
#'
#' \code{summary} method for class \code{mdmr}
#'
#' @param object Output from \code{mdmr}
#' @param ... Further arguments passed to or from other methods.
#'
#' @return Calling
#' \code{summary(mdmr.res)} produces a data frame comprised of:
#' \item{Statistic}{Value of the corresponding MDMR test statistic}
#' \item{Pseudo R2}{Size of the corresponding effect on the
#' distance matrix}
#' \item{p-value}{The p-value for each effect.}
#' In addition to the information in the three columns comprising
#' \code{summary(res)}, the \code{res} object also contains:
#'
#' \item{p.prec}{A data.frame reporting the precision of each p-value. If
#' analytic p-values were computed, these are the maximum error bound of the
#' p-values reported by the \code{davies} function in \code{CompQuadForm}. If
#' permutation p-values were computed, it is the standard error of each
#' permutation p-value.}
#' \item{lambda}{A vector of the eigenvalues of \code{G} (if
#' \code{return.lambda = T}).}
#' \item{nperm}{Number of permutations used. Will read \code{NA} if analytic
#' p-values were computed}
#'
#' Note that the printed output of \code{summary(res)} will truncate p-values
#' to the smallest trustworthy values, but the object returned by
#' \code{summary(res)} will contain the p-values as computed. The reason for
#' this truncation differs for analytic and permutation p-values. For an
#' analytic p-value, if the error bound of the Davies algorithm is larger than
#' the p-value, the only conclusion that can be drawn with certainty is that
#' the p-value is smaller than (or equal to) the error bound. For a permutation
#' test, the estimated p-value will be zero if no permuted test statistics are
#' greater than the observed statistic, but the zero p-value is only a product
#' of the finite number of permutations conduted. The only conclusion that can
#' be drawn is that the p-value is smaller than \code{1/nperm}.
#'
#' @author Daniel B. McArtor (dmcartor@gmail.com) [aut, cre]
#'
#' @references Davies, R. B. (1980). The Distribution of a Linear Combination of
#'  chi-square Random Variables. Journal of the Royal Statistical Society.
#'  Series C (Applied Statistics), 29(3), 323-333.
#'
#'  Duchesne, P., & De Micheaux, P. L. (2010). Computing the distribution of
#'  quadratic forms: Further comparisons between the Liu-Tang-Zhang
#'  approximation and exact methods. Computational Statistics and Data
#'  Analysis, 54(4), 858-862.
#'
#'  McArtor, D. B., Lubke, G. H., & Bergeman, C. S. (2017). Extending
#'  multivariate distance matrix regression with an effect size measure and the
#'  distribution of the test statistic. Psychometrika, 82, 1052-1077.
#'
#' @examples
#'# --- The following two approaches yield equivalent results --- #
#'# Approach 1
#'data(mdmrdata)
#'D <- dist(Y.mdmr, method = "euclidean")
#'mdmr.res <- mdmr(X = X.mdmr, D = D)
#'summary(mdmr.res)
#'
#' @export
summary.mdmr <- function(object, ...) {
  if(is.na(object$nperm)) {
    pv.name <- "Analytic p-value"
    # If it's analytic, we can only say it's below davies error
    pv.print <- rep(NA, nrow(object$pv))
    for(i in 1:length(pv.print)) {
      pv.print[i] <- format.pval(object$pv[i,1], eps = object$p.prec[i,1])
    }
    print.res <- data.frame("Statistic" =
                              format(object$stat, digits = 3),
                            "Numer DF" = object$df,
                            "Pseudo R2" = format.pval(object$pr.sq, digits = 3),
                            "p-value" = pv.print)
    out.res <- data.frame("Statistic" = object$stat,
                          "Numer DF" = object$df,
                          "Pseudo R2" = object$pr.sq,
                          "p-value" = object$pv)
    names(print.res) <- names(out.res) <- c("Statistic", "Numer DF",
                                            "Pseudo R2", pv.name)
  }
  if(!is.na(object$nperm)) {
    pv.name <- "Permutation p-value"
    # If it's a permutation test, we can only say it's below 1/nperm
    pv.print <- format.pval(object$pv, eps = 1/object$nperm)

    print.res <- data.frame("Statistic" =
                              format(object$stat, digits = 3),
                            "Numer DF" = object$df,
                            "Pseudo R2" = format.pval(object$pr.sq, digits = 3),
                            "p-value" = pv.print)
    out.res <- data.frame("Statistic" = object$stat,
                          "Numer DF" = object$df,
                          "Pseudo R2" = object$pr.sq,
                          "p-value" = object$pv)
    names(print.res) <- names(out.res) <- c("Statistic", "Numer DF",
                                            "Pseudo R2", pv.name)
  }



  # Add significance codes to p-values
  print.res <- data.frame(print.res, NA)
  for(k in 1:5) {
    print.res[,k] <- paste(print.res[,k])
  }
  names(print.res)[5] <- ""
  for(l in 1:nrow(print.res)) {
    if(object$pv[l,1] > 0.1) {
      print.res[l,5] <- "   "
    }
    if((object$pv[l,1] <= 0.1) & (object$pv[l,1] > 0.05)) {
      print.res[l,5] <- ".  "
    }
    if((object$pv[l,1] <= 0.05) & (object$pv[l,1] > 0.01)) {
      print.res[l,5] <- "*  "
    }
    if((object$pv[l,1] <= 0.01) & (object$pv[l,1] > 0.001)) {
      print.res[l,5] <- "** "
    }
    if(object$pv[l,1] <= 0.001) {
      print.res[l,5] <- "***"
    }
  }


  print(print.res)
  cat("---", fill = T)
  cat(paste0("Signif. codes:  0 \"***\" 0.001 \"**\" 0.01 \"*\" 0.05 ",
             "\".\" 0.1 \" \" 1"))

  invisible(out.res)
}






#' Compute univariate MDMR effect sizes
#'
#' \code{delta} computes permutation-based effect sizes on individual items
#' comprising the distance matrix outcome used in multivariate distance matrix
#' regression. It returns the omnibus estimates of delta (i.e. effect size of
#' the entire design matrix on each outcome) as well as estimates of each
#' pair-wise effect size (i.e. the effect of each predictor on each outcome
#' variable, conditional on the rest of the predictors).
#'
#' See McArtor et al. (2017) for a detailed description of how delta is
#' computed. Note that it is a relative measure of effect, quantifying which
#' effects are strong (high values of delta) and weak (low values of delta)
#' within a single analysis, but estimates of delta cannot be directly compared
#' across different datasets.
#'
#' There are two options for using this function. The first option is to
#' specify the predictor matrix \code{X}, the outcome matrix \code{Y}, the
#' distance type \code{dtype} (supported by "dist" in R), and number of
#' iterations \code{niter}. This option conducts the permutation of each Y-item
#' \code{niter} times (to average out random association in each permutation)
#' and reports the median estimates of delta over the \code{niter} reps.
#'
#' The second option is to specify \code{X}, \code{G}, and \code{G.list}, a
#' list of G matrices where the permutation has already been done for each item
#' comprising Y. The names of the elements in \code{G.list} should correspond
#' to the names of the variables that were permuted. This option is implemented
#' so that delta can be computed when MDMR is being used in conjunction with
#' distance metrics not supported by \code{dist}.
#'
#' @param X A \eqn{n x p} matrix or data frame of predictors. Unordered factors
#' will be tested with contrast-codes by default, and ordered factors will be
#' tested with polynomial contrasts. For finer control of how categorical
#' predictors are handled, or if higher-order effects are desired, the output
#' from a call to \code{model.matrix()} can be supplied to this argument as
#' well.
#' @param Y Outcome data: \eqn{n x q} matrix of scores along the
#' dependent variables.
#' @param dtype Measure of dissimilarity that will be used by \code{\link{dist}}
#' to compute the distance matrix based on \code{Y}. As is the case when calling
#' \code{dist} directly, this must be one of \code{"euclidean"},
#' \code{"maximum"}, \code{"manhattan"}, \code{"canberra"}, \code{"binary"} or
#' \code{"minkowski"}, and unambiguous substring can be given.
#' @param niter Number of times to permute each outcome item in the procedure
#' to compute delta. The final result is the average of all \code{niter}
#' iterations. Higher values of \code{niter} require more computation time, but
#' result in more precise estimates.
#' @param x.inds Vector indicating which columns of X should have their
#' conditional effect sizes computed. Default value of \code{NULL} results in
#' all effects being computed, and a value of \code{0} results in no conditional
#' effects being computed, such that only the omnibus effect sizes will be
#' reported.
#' @param y.inds Vector indicating which columns of Y effect sizes should be
#' computed on. Default value of \code{NULL} results in all columns being
#' used.
#' @param G Gower's centered similarity matrix computed from \code{D}.
#' Either \code{D} or \code{G} must be passed to \code{mdmr()}.
#' @param G.list List of length \eqn{q} where the \eqn{i^{th}}
#' element contains the \code{G} matrix computed from distance a matrix that
#' was computed on a version of \code{Y} where the \eqn{i^{th}}
#' column has been randomly permuted.
#' @param ncores Integer; if \code{ncores} > 1, the \code{\link{parallel}}
#' package is used to speed computation. Note: Windows users must set
#' \code{ncores = 1} because the \code{parallel} pacakge relies on forking. See
#' \code{mc.cores} in the \code{\link{mclapply}} function in the
#' \code{parallel} pacakge for more details.
#' @param seed Integer; sets seed for the permutations of each variable
#' comprising Y so that results can be replicated.
#' @param plot.res Logical; Indicates whether or not a heat-map of the results
#' should be plotted.
#' @param grayscale Logical; Indicates whether or not the heat-map should be
#' plotted in grayscale.
#' @param cex Multiplier for cex.axis, cex.lab, cex.main, and cex that are
#' passed to the plotted result.
#' @param y.las Orientation of labels for the outcome items. Defaults to
#' vertical (2). Value of 1 prints horizontal labels, and is only recommended
#' if the multivariate outcome is comprised of few variables.
#'
#' @return A data frame whose rows correspond to the omnibus effects and the
#' effect of each individual predictor (conditional on the rest), and whose
#' columns correspond to each outcome variable whose effect sizes are being
#' quantified. If \code{plot.res = TRUE}, a heat-map is plotted of this data
#' frame to easily identify the strongest effects. Note that the heatmap is
#' partitioned into the omnibus effect (first row) and pair-wise effects
#' (remaining rows), because otherwise the omnibus effect would dominate the
#' heatmap.
#'
#' @author Daniel B. McArtor (dmcartor@gmail.com) [aut, cre]
#'
#' @references
#'  McArtor, D. B., Lubke, G. H., & Bergeman, C. S. (2017). Extending
#'  multivariate distance matrix regression with an effect size measure and the
#'  distribution of the test statistic. Psychometrika, 82, 1052-1077.
#'
#' @examples
#' data(mdmrdata)
#' # --- Method 1 --- #
#' delta(X.mdmr, Y = Y.mdmr, dtype = "euclidean", niter = 1, seed = 12345)
#'
#' # --- Method 2 --- #
#' D <- dist(Y.mdmr, method = "euclidean")
#' G <- gower(D)
#' q <- ncol(Y.mdmr)
#' G.list <- vector(mode = "list", length = q)
#' names(G.list) <- names(Y.mdmr)
#' for(i in 1:q) {
#'    Y.shuf <- Y.mdmr
#'    Y.shuf[,i] <- sample(Y.shuf[,i])
#'    G.list[[i]] <- gower(dist(Y.shuf, method = "euclidean"))
#' }
#' delta(X.mdmr, G = G, G.list = G.list)
#'
#' @export
#' @importFrom parallel mclapply
delta <- function(X, Y = NULL, dtype = NULL, niter = 10,
                  x.inds = NULL, y.inds = NULL,
                  G = NULL, G.list = NULL, ncores = 1, seed = NULL,
                  plot.res = F, grayscale = F, cex = 1, y.las = 2) {
  ##############################################################################
  ## Step 1: Check input type
  ##############################################################################
  # If a list of distance matrices is not provided....
  if(is.null(G.list)) {
    # Make sure raw data and distance METRIC is provided
    if(any(is.null(Y), is.null(dtype))) {
      stop(paste0("Input either Y and the distance type ",
                  "or a list of distance matrices"))
    }
    # Set the method to raw (see below)
    method <- "raw"
  }
  # If a list of distance matrices is provided...
  if(!is.null(G.list)) {
    # Set the method to list (see below)
    method <- "list"
    if(!is.null(y.inds)) {
      warning("y.inds is ignored if G.list is supplied.")
    }
  }

  # Get test indices of variables comprising a model matrix (e.g. which contrast
  # codes need to be tested jointly to assess a factor if a model.matrix was
  # passed as X). If X isn"t a model matrix, do them all one-at-a-time.
  test.inds <- attr(X, "assign")
  if(!is.null(test.inds)) {
    is.model.mat <- T
  }
  if(is.null(test.inds)) {
    is.model.mat <- F
  }

  # Remove observations that are misxing on X
  X.na <- which(rowSums(is.na(as.matrix(X))) > 0)
  if(length(X.na) > 0) {
    X <- X[-X.na,]
    G <- G[-X.na, -X.na]
    warning(paste0(length(X.na),
                   " observations removed due to missingness on X."))
  }

  # Handle potential factors if X is not a model.matrix
  if(!is.model.mat) {
    contr.list <- lapply(1:ncol(X), FUN = function(k) {
      contr.type <- NULL
      if(is.factor(X[,k])) {
        contr.type <- "contr.sum"
      }
      if(is.ordered(X[,k])) {
        contr.type <- "contr.poly"
      }
      return(contr.type)
    })
    names(contr.list) <- colnames(X)
    contr.list <- contr.list[!unlist(lapply(contr.list, is.null))]
    # Case 1: at least some categorical predictors
    if(length(contr.list) > 0) {
      X <- stats::model.matrix(~ . , data = as.data.frame(X),
                               contrasts.arg = contr.list)
    }
    # Case 2: all numeric predictors
    if(length(contr.list) == 0) {
      X <- stats::model.matrix(~ . , data = as.data.frame(X))
    }


    test.inds <- attr(X, "assign")
  }
  xnames <- colnames(X)
  unique.xnames <- lapply(1:max(test.inds), FUN = function(kk) {
    hold.names <- varname <- xnames[test.inds == kk]
    if(length(varname) > 1) {
      constant.char <-
        which(unlist(lapply(1:nchar(hold.names[1]), FUN = function(ind) {
          chars <- unlist(lapply(hold.names, FUN = function(var.name) {
            substr(x = var.name, start = ind, stop = ind)
          }))
          stats::var(as.numeric(as.factor(chars))) == 0
        })))
      varname <- paste(unlist(lapply(constant.char, FUN = function(cc) {
        substr(x = hold.names[1], start = cc, stop = cc)
      })), collapse = "")
    }
    varname
  })
  unique.xnames <- unlist(unique.xnames)

  # Record the number of items and sample size
  p <- ncol(X) - 1
  px <- length(unique.xnames)
  n <- nrow(X)

  # Which variables are to be tested?
  if(is.null(x.inds)) {x.inds <- 1:px}
  if(any(x.inds > p)) {stop(paste0("x.inds must be between 1 and ncol(X)"))}

  ##############################################################################
  ## Step 2: Compute hat matrices
  ##############################################################################
  # Do all the computations that are required every time pseudo.r2 is called
  # Overall hat matrix
  H <- .hat(X)
  vh <- c(H)
  # Conditional hat matrices
  vhs <- parallel::mclapply(1:px, mc.cores = ncores, FUN = function(k) {
    hh <- NA
    if(k %in% x.inds) {
      x.rm <- which(test.inds == k)
      Xs <- X[,-x.rm]
      hh <- H - .hat(Xs)
    }
    c(hh)
  })

  ##############################################################################
  ## Step 3: Computation if raw data are provided
  ##############################################################################
  if(method == "raw") {
    # ----- Manage Input ----- #
    Y <- as.matrix(Y)
    q <- ncol(Y)
    if(is.null(y.inds)) {y.inds <- 1:q}
    if(any(y.inds > q)) {stop(paste0("y.inds must be between 1 and ncol(Y)"))}
    ynames <- colnames(data.frame(Y))
    if(all(ynames == paste0("X", 1:q))) {
      ynames <- paste0("Y", 1:q)
    }

    G <- gower(stats::dist(Y, method = dtype))

    # ----- Populate delta matrices using jackknife procedure ----- #
    # Get the "real" pseudo R-square
    pr2 <- .pseudo.r2(G, vh, vhs, x.inds, unique.xnames)

    # If no seed is specified by the user, generate a random one - mclapply
    # requires one, which is why "seed" is an argument rather than using the
    # standard approach of just setting a seed prior to running the function.
    # Make sure the seed doesn"t overflow.
    if(is.null(seed)) {
      max.int <- .Machine$integer.max
      max.seed <- floor(max.int - 1 - niter*10)
      seed <- round(stats::runif(1,0,1) * max.seed)
    }

    # Compute pseudo R-square with each item jackknifed "niter" times
    jack.pr2 <-
      parallel::mclapply(1:niter, mc.cores = ncores, function(i) {
        set.seed(seed+i)
        jackknifed.y <- lapply(1:q, function(k) {
          y.jack <- Y
          y.jack[,k] <- sample(y.jack[,k], size = n,
                               replace = F)
          y.jack
        })
        res <- lapply(1:q, function(k) {
          pr2.jack <- rep(NA, p+1)
          if(k %in% y.inds) {
            GG <- gower(stats::dist(jackknifed.y[[k]], method = dtype))
            pr2.jack <- .pseudo.r2(GG, vh, vhs, x.inds, unique.xnames)
          }
          pr2.jack
        })
        res <- matrix(unlist(res), nrow = px+1, ncol = q)
        dimnames(res) <- list(c("(Omnibus)", unique.xnames),
                              paste0(ynames, ".jack"))
        res
      })

    # Subtract the jackknifed pseudo R-squares from the real pseudo R-squares
    # to get the delta statistics for each rep
    jack.pr2 <- parallel::mclapply(jack.pr2, mc.cores = ncores, function(jack) {
      apply(jack, 2, function(x) {pr2 - x})
    })

    # Compute Median of Delta
    delta.med <- matrix(0, nrow = px + 1, ncol = q)
    dimnames(delta.med) <-  list(c("(Omnibus)", unique.xnames),
                                 paste0(ynames, ".jack"))
    for(i in 1:nrow(delta.med)) {
      for(j in 1:ncol(delta.med)) {
        delta.med[i,j] <- stats::median(
          unlist(lapply(jack.pr2, function(x) {x[i,j]})))
      }
    }

    # Trim out results to only the requested X and Y variables
    delta.med <- delta.med[c(1, x.inds + 1), y.inds, drop = F]
  }

  ##############################################################################
  ## Step 4: Computation if list of distance matrices is provided
  ##############################################################################
  if(method == "list") {
    # Manage Input
    X <- as.matrix(X)
    xnames <- colnames(data.frame(X))
    q <- length(G.list)

    ynames <- names(G.list)
    if(is.null(ynames)) {
      ynames <- paste0("Y", 1:q)
    }

    # Populate delta matrices using jackknife procedure
    # Get the "real" pseudo R-square
    pr2 <- .pseudo.r2(G, vh, vhs, x.inds, unique.xnames)

    # Get each permuted pseudo R-square

    # If no seed is specified by the user, generate a random one - mclapply
    # requires one, which is why "seed" is an argument rather than using the
    # standard approach of just setting a seed prior to running the function
    if(is.null(seed)) {
      seed <- round(stats::runif(1,0,1) * 1e5)
    }

    # Compute pseudo R-square with each item jackknifed "niter" times
    jack.pr2 <-
      matrix(unlist(
        parallel::mclapply(G.list, function(GG) {
          .pseudo.r2(GG, vh, vhs, x.inds, unique.xnames)
        })), nrow = px+1, ncol = q)

    # Subtract the jackknifed pseudo R-squares from the real pseudo R-squares
    # to get the delta statistics for each rep
    jack.pr2 <- apply(jack.pr2, 2, function(x) {pr2 - x})
    dimnames(jack.pr2) <- list(c("(Omnibus)", unique.xnames),
                               paste0(ynames, ".jack"))

    # In this case, there's only one rep, so the median is the single rep
    delta.med <- jack.pr2

    # Trim out results to only the requested X and Y variables
    delta.med <- delta.med[c(1, x.inds + 1), , drop = F]
  }

  ##############################################################################
  ## Step 5: Plot
  ##############################################################################
  if(plot.res) {
    # Number of outcome items to display
    q <- length(y.inds)
    if(q == 0) {
      q <- length(G.list)
    }
    # colors
    red <- 1
    green <- 1
    blue <- 1
    if(grayscale) {
      red <- green <- blue <- 0
    }

    # Case 1: Univaraite X
    if(px == 1) {
      if(q == 1) {
        warning(paste0("Plotting results is uninformative ",
                       "when X and Y are unidimensional."))
      }
      if(q > 1) {
        graphics::plot(NA, xlim = c(0.5, q+0.5), ylim = c(0.5,px+0.5),
                       xaxt = "n",
                       yaxt = "n",  xlab = "", ylab = "", bty = "n",
                       main = "MDMR Effect Sizes",
                       cex.axis = cex, cex.lab = cex, cex = cex, cex.main = cex)
        graphics::axis(1, at = 1:q, labels = c(ynames[y.inds]), las = y.las,
                       cex.axis = cex, cex.lab = cex)
        graphics::axis(2, at = (px):1, labels = c("(Omnibus)"), las = 1,
                       cex.axis = cex, cex.lab = cex)

        # --- Convert to z scores for shading --- #
        z.scores <- matrix(scale(c(delta.med[1,])), nrow = px, ncol = q)
        z.scores[delta.med[1,] < 0] <- -9999
        omni.cols <- stats::pnorm(z.scores[1,])

        for(j in 1:ncol(delta.med)) {
          x.low <- j-0.5
          y.low <- px-0.5
          x.up <- j+0.5
          y.up <- px+0.5

          # Y importances
          graphics::rect(x.low, y.low, x.up, y.up,
                         col = grDevices::rgb(0*red, 0*green, 1*blue,
                                              omni.cols[j]))

          # Effect Size text
          graphics::text(x = j, y = 1, col = "white",
                         labels = formatC(delta.med[1,j], format = "g",
                                          digits = 2), cex = cex * 0.8)
        }
      }
    }

    # Case 2: Multivariate X
    if(px > 1) {

      # Number of conditional effects to display
      px <- length(x.inds)
      if(all(x.inds == 0)) {
        px <- 0
        unique.xnames <- NULL
      }

      graphics::plot(NA, xlim = c(0.5, q+0.5), ylim = c(0.5,px+0.5+1),
                     xaxt = "n",
                     yaxt = "n",  xlab = "", ylab = "", bty = "n",
                     main = "MDMR Effect Sizes",
                     cex.axis = cex, cex.lab = cex, cex = cex, cex.main = cex)
      graphics::axis(1, at = 1:q, labels = c(ynames[y.inds]), las = y.las,
                     cex.axis = cex, cex.lab = cex)
      graphics::axis(2, at = (px+1):1, labels = c("(Omnibus)",
                                                  unique.xnames[x.inds]),
                     las = 1, cex.axis = cex, cex.lab = cex)

      # Convert to z scores for shading
      z.scores <- matrix(scale(c(delta.med)), nrow = px+1, ncol = q)
      z.scores[which(delta.med < 0, arr.ind = T)] <- -9999
      omni.cols <- stats::pnorm(z.scores[1,])
      pairwise.cols <- stats::pnorm(z.scores[-1,,drop=F])

      for(i in 1:nrow(delta.med)) {
        for(j in 1:ncol(delta.med)) {
          x.low <- j-0.5
          y.low <- px-i+1-0.5+1
          x.up <- j+0.5
          y.up <- px-i+1+0.5+1

          if(i == 1) {
            # Y importances
            graphics::rect(x.low, y.low, x.up, y.up,
                           col = grDevices::rgb(0*red, 0*green, 1*blue,
                                                omni.cols[j]))
          }
          if(i > 1) {
            # XY Importances
            if(px > 0) {
              graphics::rect(x.low, y.low, x.up, y.up,
                             col = grDevices::rgb(0*red, 0.75*green, 0*blue,
                                                  pairwise.cols[i-1,j]))
            }
          }

          # Effect Size text
          graphics::text(x = j, y = px - i + 1 + 1, col = "white",
                         labels =
                           formatC(delta.med[i,j], format = "g", digits = 2),
                         cex = 0.8*cex)
        }
      }
    }
  }

  return(delta.med)
}

#' Simulated predictor data to illustrate the MDMR package.
#'
#' See package vignette by calling \code{vignette("mdmr-vignette")}.
"X.mdmr"

#' Simulated outcome data to illustrate the MDMR package.
#'
#' See package vignette by calling \code{vignette("mdmr-vignette")}.
"Y.mdmr"

#' Simulated clustered predictor data to illustrate the Mixed-MDMR function
#'
#' See \code{\link{mixed.mdmr}}.
"X.clust"

#' Simulated clustered outcome data to illustrate the Mixed-MDMR function
#'
#' See \code{\link{mixed.mdmr}}.
"Y.clust"

#' Fit Mixed-MDMR models
#'
#' \code{mixed.mdmr} allows users to conduct multivariate distance matrix
#' regression (MDMR) in the context of a (hierarchically) clustered sample
#' without inflating Type-I error rates as a result of the violation of the
#' independence assumption. This is done by invoking a mixed-effects modeling
#' framework, in which clustering/grouping variables are specified as random
#' effects and the covariate effects of interest are fixed effects. The input
#' to \code{mixed.mdmr} largely reflects the input of the \code{\link{lmer}}
#' function from the package \code{\link{lme4}} insofar as the specification of
#' random and fixed effects are concerned (see Arguments for details). Note that
#' this function simply controls for the random effects in order to test the
#' fixed effects; it does not facilitate point estimation or inference on the
#' random effects.
#'
#' @param fmla A one-sided linear formula object describing both the fixed-effects and random-effects part of the model, beginning with an ~ operator, which is followed by the terms to include in the model, separated by + operators. Random-effects terms are distinguished by vertical bars (|) separating expressions for design matrices from grouping factors. Two vertical bars (||) can be used to specify multiple uncorrelated random effects for the same grouping variable.
#' @param data A mandatory data frame containing the variables named in formula.
#' @param D Distance matrix computed on the outcome data. Can be either a
#' matrix or an R \code{\link{dist}} object. Either \code{D} or \code{G}
#' must be passed to \code{mdmr()}.
#' @param G Gower's centered similarity matrix computed from \code{D}.
#' Either \code{D} or \code{G} must be passed to \code{mdmr}.
#' @param use.ssd The proportion of the total sum of squared distances (SSD)
#' that will be targeted in the modeling process. In the case of non-Euclidean
#' distances, specifying \code{use.ssd} to be slightly smaller than 1.00 (e.g.,
#' 0.99) can substantially lower the computational burden of \code{mixed.mdmr}
#' while maintaining well-controlled Type-I error rates and only sacrificing
#' a trivial amount of power. In the case of Euclidean distances the
#' computational burden of \code{mixed.mdmr} is small, so \code{use.ssd} should
#' be set to 1.00.
#' @param start.acc Starting accuracy of the Davies (1980) algorithm
#' implemented in the \code{\link{davies}} function in the \code{CompQuadForm}
#' package (Duchesne &  De Micheaux, 2010) that \code{mdmr()} uses to compute
#' MDMR p-values.
#' @param ncores Integer; if \code{ncores} > 1, the \code{\link{parallel}}
#' package is used to speed computation. Note: Windows users must set
#' \code{ncores = 1} because the \code{parallel} pacakge relies on forking. See
#' \code{mc.cores} in the \code{\link{mclapply}} function in the
#' \code{parallel} pacakge for more details.
#'
#' @return An object with six elements and a summary function. Calling
#' \code{summary(mixed.mdmr.res)} produces a data frame comprised of:
#' \item{Statistic}{Value of the corresponding MDMR test statistic}
#' \item{Numer DF}{Numerator degrees of freedom for the corresponding effect}
#' \item{p-value}{The p-value for each effect.}
#' In addition to the information in the three columns comprising
#' \code{summary(res)}, the \code{res} object also contains:
#'
#' \item{p.prec}{A data.frame reporting the precision of each p-value. If
#' analytic p-values were computed, these are the maximum error bound of the
#' p-values reported by the \code{davies} function in \code{CompQuadForm}. If
#' permutation p-values were computed, it is the standard error of each
#' permutation p-value.}
#'
#' Note that the printed output of \code{summary(res)} will truncate p-values
#' to the smallest trustworthy values, but the object returned by
#' \code{summary(res)} will contain the p-values as computed. The reason for
#' this truncation differs for analytic and permutation p-values. For an
#' analytic p-value, if the error bound of the Davies algorithm is larger than
#' the p-value, the only conclusion that can be drawn with certainty is that
#' the p-value is smaller than (or equal to) the error bound.
#'
#' @author Daniel B. McArtor (dmcartor@gmail.com) [aut, cre]
#'
#' @references Davies, R. B. (1980). The Distribution of a Linear Combination of
#'  chi-square Random Variables. Journal of the Royal Statistical Society.
#'  Series C (Applied Statistics), 29(3), 323-333.
#'
#'  Duchesne, P., & De Micheaux, P. L. (2010). Computing the distribution of
#'  quadratic forms: Further comparisons between the Liu-Tang-Zhang
#'  approximation and exact methods. Computational Statistics and Data
#'  Analysis, 54(4), 858-862.
#'
#'  McArtor, D. B. (2017). Extending a distance-based approach to multivariate
#'  multiple regression (Doctoral Dissertation).
#'
#' @examples
#  # Source data
#' data("clustmdmrdata")
#'
#' # Get distance matrix
#' D <- dist(Y.clust)
#'
#' # Regular MDMR without the grouping variable
#' mdmr.res <- mdmr(X = X.clust[,1:2], D = D, perm.p = FALSE)
#'
#' # Results look significant
#' summary(mdmr.res)
#'
#' # Account for grouping variable
#' mixed.res <- mixed.mdmr(~ x1 + x2 + (x1 + x2 | grp),
#'                         data = X.clust, D = D)
#'
#' # Signifance was due to the grouping variable
#' summary(mixed.res)
#'
#'
#' @importFrom CompQuadForm davies
#' @importFrom parallel mclapply
#' @importFrom lme4 lmer lmerControl
#' @importFrom car Anova
#' @importFrom utils tail
#' @import stats
#' @export
mixed.mdmr <- function(fmla, data,
                       D = NULL, G = NULL,
                       use.ssd = 1,
                       start.acc = 1e-20, ncores = 1) {

  ##############################################################################
  ## Step 1:  Handle input
  ##############################################################################

  # ============================================================================
  # Step 1a:  Manage the input formula
  # ============================================================================

  # Get RHS of formula
  fmla <- paste(fmla)
  if(length(fmla > 1)) {
    fmla <- tail(fmla, 1)
  }

  # Get omnibus reduced model formula (all fixed effects removed)
  fmla.redux <- strsplit(fmla, split = "\\(")[[1]]
  rand.terms <- grep(x = fmla.redux, pattern = "\\)")
  fixed.terms <- (1:length(fmla.redux))[-rand.terms]
  fmla.fixed <- paste(fmla.redux[fixed.terms])
  fmla.fixed <- unlist(strsplit(fmla.fixed, split = "\\+"))
  fmla.fixed <- paste0(fmla.fixed[fmla.fixed != " "], collapse = "+")
  fmla.redux <- fmla.redux[rand.terms]
  for(k in 1:length(fmla.redux)) {
    fmla.redux[k] <- paste0("(", fmla.redux)
  }
  if(length(fmla.redux) > 0) {
    fmla.redux <- paste(fmla.redux, collapse = "+")
  }
  if(length(fmla.redux) == 0) {
    fmla.redux <- "1"
  }

  # Get the fixed portion of the formula for handling the input data in Step 1b
  fmla.fixed <- as.formula(paste0("~ ", fmla.fixed))

  # ============================================================================
  # Step 1b:  Covariate management
  # ============================================================================
  # ----------------------------------------------------------------------------
  # Get the data
  # ----------------------------------------------------------------------------
  # Model matrix formulation of the predictors in the model
  X <- model.matrix(fmla.fixed, data = data)
  # Number of predictors that will be tested
  p <- length(unique(attr(X, "assign")))
  # Sample size
  n <- nrow(X)
  # Numerator DF for the omnibus test
  df.omni <- ncol(X) - 1
  # Numerator DF for each conditional test of a predictor
  df.x <- unlist(lapply(unique(attr(X, "assign")), FUN = function(k) {
    sum(attr(X, "assign") == k)
  }))
  # Combine degrees of freedom
  df.all <- c(df.omni, df.x)

  # ============================================================================
  # Step 1c:   Outcome management
  # ============================================================================
  if(class(D) != "dist") {
    if(class(D) != "matrix") {
      stop("Please pass a distance object or an n x n matrix to D")
    }
    if(class(D) == "matrix") {
      if(nrow(D) != ncol(D)) {
        stop("Please pass a distance object or an n x n matrix to D")
      }
    }
  }

  # Get eigenvalues and eigenvectors of G
  D <- as.matrix(D)
  if(nrow(D) != n) {
    stop("The dimensionality of D does not match nrow(data)")
  }
  G <- gower(D)
  eig <- eigen(G)
  U <- eig$vectors
  lambda <- eig$values

  ##############################################################################
  ## Step 2:   Reduce number of dimensions of D to aid computation time
  ##############################################################################

  # ============================================================================
  # Step 2a:  Record dimensionality; remove trivially small roots
  # ============================================================================

  # Original dimensionality of U
  qq.srt <- length(lambda)

  # Make lambda sum to 1 so that we can elimitate the relatively near-zero roots
  lambda <- lambda / sum(lambda)
  names(lambda) <- 1:qq.srt

  # Get rid of dims that explain less than (1e-10 / 1) of the total SSD
  keep.inds <- which(abs(lambda) > 1e-10)
  U <- U[,keep.inds,drop=F]
  lambda <- lambda[keep.inds]
  qq <- length(lambda)
  keep.lambda <- rep(T, qq)
  keep.ssd <- sum(lambda * keep.lambda)

  # ============================================================================
  # Step 2b:  Optionally, keep only the dimension that explain "use.ssd" of the
  #           total SSD
  # ============================================================================

  if(use.ssd < 1) {

    # Start out keeping just the largest eigenvalue
    keep.lambda <- rep(F, qq)
    keep.lambda[1] <- T

    # Compute the total ssd based on only the retained dimensions
    keep.ssd <- sum(lambda * keep.lambda)

    # Until the retained SSD is within tol=0.001 of the target SSD %, keep more
    # dimensions
    while(abs(keep.ssd - use.ssd) > 0.001) {

      # If it's too small, add a real dimension
      if(keep.ssd < use.ssd) {
        keep.lambda[which(!keep.lambda)[1]] <- T
      }

      # If it's too large, add an imaginary dimension
      if(keep.ssd > use.ssd) {
        keep.lambda[tail(which(!keep.lambda), 1)] <- T
      }

      # Recompute the total ssd based on currently retained dimesions
      keep.ssd <- sum(lambda * keep.lambda)

      # Break if we"ve kept all dimensions, that is, if the dimensionality can"t
      # be reduced to satisfy the criteria keep.ssd and "tol"
      if(all(keep.lambda)) {break()}
    }

    # Trim the dimensions
    U <- U[,keep.lambda,drop=F]
    lambda <- lambda[keep.lambda]
    qq <- length(lambda)
  }

  ##############################################################################
  ## Step 3:  Get LRT statistics assessing all fixed effects
  ##############################################################################

  # Iterate over all retained MDS axes
  chisqs <- parallel::mclapply(1:qq, mc.cores = ncores, FUN = function(k) {

    # Define the outcome variable for this axis
    u.hold <- U[,k]

    # --------------------------------------------------------------------------
    # Tests of individual predictors
    # --------------------------------------------------------------------------
    # Formula for the full model for this outcome
    fmla.full <- as.formula(paste0("u.hold ~ ", fmla))

    # Fit full model
    lmer.full <- lmer(fmla.full, data = data, REML = F,
                      lmerControl(calc.derivs = F))

    # Get test statistics for individual X using car::Anova()
    chisq.x.res <- Anova(lmer.full, type = "III", test.statistic = "Chisq")
    chisq.x <- chisq.x.res$Chisq
    names(chisq.x) <- rownames(chisq.x.res)
    rm(chisq.x.res)

    # --------------------------------------------------------------------------
    # Omnibus test
    # --------------------------------------------------------------------------
    # Fit model
    fmla.redux <- as.formula(paste0("u.hold ~ ", fmla.redux))
    lmer.redux <- lmer(fmla.redux, data = data, REML = F,
                       lmerControl(calc.derivs = F))

    # Get test statisitc via model comparison approach
    chisq.omni <- anova(lmer.redux, lmer.full)$Chisq[2]
    names(chisq.omni) <- "Omnibus"

    # --------------------------------------------------------------------------
    # Return results
    # --------------------------------------------------------------------------
    return(c(chisq.omni, chisq.x))
  })
  # Get names of tests
  nn <- names(chisqs[[1]])

  ##############################################################################
  ## Step 4:  Compute overall test statistics and p-values
  ##############################################################################

  # ============================================================================
  # Step 4a:  Compute overall test statistics
  # ============================================================================
  tilde.l <- unlist(parallel::mclapply(
    1:length(nn), mc.cores = ncores, FUN = function(k) {
      sum(unlist(lapply(chisqs, "[[", k)) * lambda)}))
  names(tilde.l) <- nn

  # ============================================================================
  # Step 4b:  p-values
  # ============================================================================
  pv <-
    matrix(unlist(parallel::mclapply(
      1:length(nn), mc.cores = ncores, FUN = function(k) {

        df <- df.all[k]

        # ----------------------------------------------------------------------
        # Compute p-value
        # ----------------------------------------------------------------------
        acc <- 1e-20
        pp <- CompQuadForm::davies(tilde.l[k], lambda = lambda,
                     h = rep(df, qq), lim = 50000, acc = acc)

        # Error-check
        err <- any(pp$ifault != 0, pp$Qq > 1, pp$Qq < 0)
        while(err) {
          acc <- acc * 10
          if(acc > 0.01) {
            warning(paste0("Unable to compute p-value for ", nn[k]))
            return(c(NA, df, acc = acc))
          }
          pp <- CompQuadForm::davies(tilde.l[k], lambda = lambda,
                       h = rep(df, qq), lim = 50000, acc = acc)
          err <- any(pp$ifault != 0, pp$Qq > 1, pp$Qq < 0)
        }
        return(c(pp$Qq, df, acc = acc))

      })), ncol = 3, byrow = T)

  ##############################################################################
  ## Step 5: Output
  ##############################################################################

  df <- pv[,2]
  acc <- pv[,3]
  pv <- pv[,1]
  names(tilde.l) <- names(pv) <- names(acc) <- names(df) <- nn

  out <- list("stat" = tilde.l,"pv" = pv, "p.prec" = acc,
              "df" = df, ssd.used = keep.ssd)

  class(out) <- c("mixed.mdmr", class(out))

  return(out)
}


#' Print Mixed MDMR Object
#'
#' \code{print} method for class \code{mixed.mdmr}
#'
#' @param x Output from \code{\link{mixed.mdmr}}
#' @param ... Further arguments passed to or from other methods.
#'
#' @return
#' \item{p-value}{Analytic p-values for the omnibus test and each predictor}
#'
#' @author Daniel B. McArtor (dmcartor@gmail.com) [aut, cre]
#'
#'
#' @export
print.mixed.mdmr <- function(x, ...) {
  pv.name <- "p-value"
  # If it's analytic, we can only say it's below davies error
  out <- rep(NA, length(x$pv))
  for(i in 1:length(out)) {
    out[i] <- format.pval(x$pv[i], eps = x$p.prec[i])
  }
  out <- data.frame(out, row.names = names(x$pv))
  names(out) <- pv.name
  print(out)
}

#' Summarizing Mixed MDMR Results
#'
#' \code{summary} method for class \code{mixed.mdmr}
#'
#' @param object Output from \code{\link{mixed.mdmr}}
#' @param ... Further arguments passed to or from other methods.
#'
#' @return Calling
#' \code{summary(mdmr.res)} produces a data frame comprised of:
#' \item{Statistic}{Value of the corresponding MDMR test statistic}
#' \item{p-value}{The p-value for each effect.}
#' In addition to the information in the three columns comprising
#' \code{summary(res)}, the \code{res} object also contains:
#'
#' \item{p.prec}{A data.frame reporting the precision of each p-value. If
#' analytic p-values were computed, these are the maximum error bound of the
#' p-values reported by the \code{davies} function in \code{CompQuadForm}. If
#' permutation p-values were computed, it is the standard error of each
#' permutation p-value.}
#'
#' Note that the printed output of \code{summary(res)} will truncate p-values
#' to the smallest trustworthy values, but the object returned by
#' \code{summary(res)} will contain the p-values as computed. The reason for
#' this truncation differs for analytic and permutation p-values. For an
#' analytic p-value, if the error bound of the Davies algorithm is larger than
#' the p-value, the only conclusion that can be drawn with certainty is that
#' the p-value is smaller than (or equal to) the error bound.
#'
#' @author Daniel B. McArtor (dmcartor@gmail.com) [aut, cre]
#'
#' @references Davies, R. B. (1980). The Distribution of a Linear Combination of
#'  chi-square Random Variables. Journal of the Royal Statistical Society.
#'  Series C (Applied Statistics), 29(3), 323-333.
#'
#'  Duchesne, P., & De Micheaux, P. L. (2010). Computing the distribution of
#'  quadratic forms: Further comparisons between the Liu-Tang-Zhang
#'  approximation and exact methods. Computational Statistics and Data
#'  Analysis, 54(4), 858-862.
#'
#'  McArtor, D. B. (2017). Extending a distance-based approach to multivariate
#'  multiple regression (Doctoral Dissertation).
#'
#' @examples
#  # Source data
#' data("clustmdmrdata")
#'
#' # Get distance matrix
#' D <- dist(Y.clust)
#'
#' # Regular MDMR without the grouping variable
#' mdmr.res <- mdmr(X = X.clust[,1:2], D = D, perm.p = FALSE)
#'
#' # Results look significant
#' summary(mdmr.res)
#'
#'
#' # Account for grouping variable
#' mixed.res <- mixed.mdmr(~ x1 + x2 + (x1 + x2 | grp),
#'                         data = X.clust, D = D)
#'
#' # Signifance was due to the grouping variable
#' summary(mixed.res)
#'
#' @export
summary.mixed.mdmr <- function(object, ...) {
  pv.name <- "p-value"
  pv.print <- rep(NA, length(object$pv))
  for(i in 1:length(pv.print)) {
    pv.print[i] <- format.pval(object$pv[i], eps = object$p.prec[i])
  }
  print.res <- data.frame("Statistic" =
                            format(object$stat, digits = 3),
                          "Numer DF" = object$df,
                          "p-value" = pv.print)
  out.res <- data.frame("Statistic" = object$stat,
                        "Numer DF" = object$df,
                        "p-value" = object$pv)
  names(print.res) <- names(out.res) <- c("Statistic", "Numer DF", pv.name)

  # Add significance codes to p-values
  print.res <- data.frame(print.res, NA)
  for(k in 1:3) {
    print.res[,k] <- paste(print.res[,k])
  }
  names(print.res)[4] <- ""
  for(l in 1:nrow(print.res)) {
    if(object$pv[l] > 0.1) {
      print.res[l,4] <- "   "
    }
    if((object$pv[l] <= 0.1) & (object$pv[l] > 0.05)) {
      print.res[l,4] <- ".  "
    }
    if((object$pv[l] <= 0.05) & (object$pv[l] > 0.01)) {
      print.res[l,4] <- "*  "
    }
    if((object$pv[l] <= 0.01) & (object$pv[l] > 0.001)) {
      print.res[l,4] <- "** "
    }
    if(object$pv[l] <= 0.001) {
      print.res[l,4] <- "***"
    }
  }

  print(print.res)
  cat("---", fill = T)
  cat(paste0("Signif. codes:  0 \"***\" 0.001 \"**\" 0.01 \"*\" 0.05 ",
             "\".\" 0.1 \" \" 1"))
  invisible(out.res)
}