R/function.R

Defines functions Dsn_usw univ Dsn_VAGE Dsn_royston Dsn_MSMK Dsn_HZ Dsn_FA Dsn_SWM2MK Dsn_MK MK STD copulas MVNMIX IMMV SPH PSVII PSII power.usw power.mswV power.mswR power.msk power.mhz power.faTest power.mvnTest mhz faTest mvnTest

Documented in copulas faTest IMMV mhz MVNMIX mvnTest power.faTest power.mhz power.msk power.mswR power.mswV power.mvnTest power.usw PSII PSVII SPH

#' A Powerful Test for Multivariate Normality (Zhou-Shao's Test)
#'
#' @description A simple and powerful test for multivariate normality with a combination of multivariate
#' kurtosis (MK) and Shapiro-Wilk which was proposed by Zhou and Shao (2014). The \emph{p}-value of the test
#' statistic (\eqn{T_n}) is computed based on a simulated null distribution of \eqn{T_n}. Details see Zhou and Shao (2014).
#' @param X an \eqn{n*p} data matrix or data frame, where \eqn{n} is number of rows (observations) and \eqn{p} is number of columns (variables) and \eqn{n>p}.
#' @param B number of Monte Carlo simulations for null distribution, default is 1000 (increase B to increase the precision of \emph{p}-value).
#' @param pct percentiles of MK to get \eqn{c_1} and \eqn{c_2} described in the reference paper, default is (0.01, 0.99).
#' @return Returns a list with two objects:
#' \describe{
#' \item{\code{mv.test}}{results of the Zhou-Shao's test for multivariate normality , i.e., test statistic \eqn{T_n},
#' \emph{p}-value (under H0, i.e. multivariate normal, that \eqn{T_n} is at least as extreme as the observed value), and multivariate normality summary (YES, if \emph{p}-value>0.05).}
#' \item{\code{uv.shapiro}}{a dataframe with \eqn{p} rows detailing univariate Shapiro-Wilk tests. Columns in the dataframe contain test statistics \emph{W}, \emph{p}-value,and univariate normality summary (YES, if \emph{p}-value>0.05).}
#' }
#' @references Zhou, M., & Shao, Y. (2014). A powerful test for multivariate normality. \emph{Journal of applied statistics}, 41(2), 351-363.
#' @references Shapiro, S. S., & Wilk, M. B. (1965). An analysis of variance test for normality (complete samples). \emph{Biometrika}, 52(3/4), 591-611.
#' @import stats
#' @seealso \code{\link{power.mvnTest}}, \code{\link{msk}}, \code{\link{mardia}}, \code{\link{msw}}, \code{\link{faTest}}, \code{\link{mhz}}
#' @export
#' @examples
#' set.seed(12345)
#'
#' ## Data from gamma distribution ##
#' X = matrix(rgamma(50*4,shape =  2),50)
#' mvnTest(X, B=100)
#'
#' ## load the ubiquitous multivariate iris data ##
#' ## (first 50 observations of columns 1:4) ##
#' iris.df = iris[1:50, 1:4]
#' mvnTest(iris.df, B=100)
#'
mvnTest <- function(X, B=1000, pct=c(0.01,0.99)) {
  X <- as.matrix(X)
  X <- X[complete.cases(X), ]
  X <- as.matrix(X)
  n  = NROW(X)
  p  = NCOL(X)
  mk = Dsn_MK(n, p, B)

  ##obtain c1 and c2 from the null distribution of MK
  cri_mk = quantile(mk, pct)

  ## Component of the Tn test statistic
  SWM2 <- function(X){
    SWS <- function(x) shapiro.test(x)$statistic
    X <- as.matrix(X)
    p = NCOL(X)
    StdX = STD(X)
    return((mean(apply(StdX, 2, SWS)) + mean(sort(apply(StdX%*%t(StdX), 2, SWS))[1:p]))/2)
  }

  ## Tn statistic function
  SWM2MK <- function(X, cri_mk) {
    Mk      <- MK(X)
    Index   <- (Mk > cri_mk[2]|Mk < cri_mk[1])
    return( (1 - Index)*(1 - SWM2(X)) + Index)
  }

  ## Simulate the null distribution of Tn
  Dsn_SWM2MK <- function(n, p, cri_mk, B) {
    s <- rep(0,B)
    for(i in 1:B) {
      s[i] <- SWM2MK(matrix(rnorm(n*p), n), cri_mk)
    }
    return(s)
  }

  ## obtain Tn statistic
  Tn = SWM2MK(X, cri_mk)

  ##obtain the null distribution sample from Tn
  ##(note that cri_mk only depends on the null distribution of MK (i.e., data independent), and can be calculated in advance
  swm2mk = Dsn_SWM2MK(n, p, cri_mk, B)

  ##calculate the p-value, i.e., the chance (under H0, i.e. multivariate normal) that Tn is at least as extreme as the observed value
  p.value <- mean(swm2mk > Tn)
  result <- ifelse(p.value>0.05,"YES","NO")
  output <- noquote(c(round(Tn,4),round(p.value,4),result))
  names(output) <- c("Tn","p-value","Result")

  final <- list(output,univ(X))
  names(final) <- c("mv.test","uv.shapiro")

  return(final)
}



#' Rotational Robust Shapiro-Wilk Type (SWT) Test for Multivariate Normality (FA Test of Fattorini)
#'
#' @description It computes FA Test proposed by Fattorini (1986). This test would be more rotationally robust than other
#' SWT tests such as Royston (1982) H test and the test proposed by Villasenor-Alva and Gonzalez-Estrada (2009).
#' The \emph{p}-value of the test statistic is computed based on a
#' simulated null distribution of the statistic.
#' @param X an \eqn{n*p} data matrix or data frame, where \eqn{n} is number of rows (observations) and \eqn{p} is number of columns (variables) and \eqn{n>p}.
#' @param B number of Monte Carlo simulations for null distribution, default is 1000 (increase B to increase the precision of \emph{p}-value).
#' @return Returns a list with two objects:
#' \describe{
#' \item{\code{mv.test}}{results of the FA test for multivariate normality, i.e., test statistic,
#' \emph{p}-value, and multivariate normality summary (YES, if \emph{p}-value>0.05).}
#' \item{\code{uv.shapiro}}{a dataframe with \eqn{p} rows detailing univariate Shapiro-Wilk tests. Columns in the dataframe contain test statistics \emph{W}, \emph{p}-value,and univariate normality summary (YES, if \emph{p}-value>0.05).}
#' }
#' @references Fattorini, L. (1986). Remarks on the use of Shapiro-Wilk statistic for testing multivariate normality. \emph{Statistica}, 46(2), 209-217.
#' @references Lee, R., Qian, M., & Shao, Y. (2014). On rotational robustness of Shapiro-Wilk type tests for multivariate normality. \emph{Open Journal of Statistics}, 4(11), 964.
#' @references Shapiro, S. S., & Wilk, M. B. (1965). An analysis of variance test for normality (complete samples). \emph{Biometrika}, 52(3/4), 591-611.
#' @references Royston, J. P. (1982). An extension of Shapiro and Wilk's W test for normality to large samples. \emph{Journal of the Royal Statistical Society: Series C (Applied Statistics)}, 31(2), 115-124.
#' @references Villasenor Alva, J. A., & Estrada, E. G. (2009). A generalization of Shapiro–Wilk's test for multivariate normality. \emph{Communications in Statistics—Theory and Methods}, 38(11), 1870-1883.
#' @references Zhou, M., & Shao, Y. (2014). A powerful test for multivariate normality. \emph{Journal of applied statistics}, 41(2), 351-363.
#' @import stats
#' @seealso \code{\link{power.faTest}}, \code{\link{mvnTest}}, \code{\link{msk}}, \code{\link{mardia}}, \code{\link{msw}}, \code{\link{mhz}}
#' @export
#' @examples
#' set.seed(12345)
#'
#' ## Data from gamma distribution ##
#' X = matrix(rgamma(50*4,shape =  2),50)
#' faTest(X, B=100)
#'
#' ## load the ubiquitous multivariate iris data ##
#' ## (first 50 observations of columns 1:4) ##
#' iris.df = iris[1:50, 1:4]
#' faTest(iris.df, B=100)
#'
faTest <- function(X, B=1000) {

  X <- as.matrix(X)
  X <- X[complete.cases(X), ]
  X <- as.matrix(X)
  n  = NROW(X)
  p  = NCOL(X)

  SW <- function(x){shapiro.test(x)$statistic}

  FA <- function(X) {
    X <- as.matrix(X)
    n <- NROW(X)
    p <- NCOL(X)
    mu <- apply(X,2,mean)
    nSinver <- solve((n-1)*cov(X))
    Y <- X%*%t((X-matrix(rep(mu,n),ncol=p,byrow=T))%*%nSinver)
    return(1 - min(apply(Y,2,SW)))
  }

  Dsn_FA <- function(n, p, B){
    s <- rep(0, B)

    for(i in 1:B) {
      s[i] <- FA(matrix(rnorm(n*p), n))
    }
    return(s)

  }

  fa = FA(X)
  Dsn_fa = Dsn_FA(n, p, B)

  p.value <- mean(Dsn_fa > fa)
  result <- ifelse(p.value>0.05,"YES","NO")
  output <- noquote(c(round(fa,4),round(p.value,4),result))
  names(output) <- c("Statistic","p-value","Result")

  final <- list(output,univ(X))
  names(final) <- c("mv.test","uv.shapiro")

  return(final)
}



#' Shapiro-Wilk Type (SWT) Tests for Multivariate Normality
#'
#' @description The SWT-based tests for multivariate normality including Royston's H test and the test proposed
#' by Villasenor-Alva and Gonzalez-Estrada (2009).
#' @param X an \eqn{n*p} numeric matrix or data frame, the number of \eqn{n} must be between 3 and 5000, n>p.
#' @return Returns a list with two objects:
#' \describe{
#' \item{\code{mv.test}}{a result table of multivariate normality tests,
#' including the name of the test, test statistic,
#' \emph{p}-value, and multivariate normality summary (Yes, if \emph{p}-value>0.05). Note that the test results
#' of \code{Royston} will not be reported if \eqn{n > 2000} or \eqn{n < 3} and the test results
#' of Villasenor-Alva and Gonzalez-Estrada (\code{VAGE}) will not be reported if  \eqn{n > 5000} or \eqn{n < 12}.}
#' \item{\code{uv.shapiro}}{a dataframe with \eqn{p} rows detailing univariate Shapiro-Wilk tests.
#' Columns in the dataframe contain test statistics \emph{W}, \emph{p}-value,and univariate normality summary (YES, if \emph{p}-value>0.05).}
#' If the number of variable is \eqn{p=1}, only univariate Shapiro-wilk's test result will be produced.}
#' @references Shapiro, S. S., & Wilk, M. B. (1965). An analysis of variance test for normality (complete samples). \emph{Biometrika}, 52(3/4), 591-611.
#' @references Royston, J. P. (1982). An extension of Shapiro and Wilk's W test for normality to large samples. \emph{Journal of the Royal Statistical Society: Series C (Applied Statistics)}, 31(2), 115-124.
#' @references Villasenor Alva, J. A., & Estrada, E. G. (2009). A generalization of Shapiro–Wilk's test for multivariate normality. \emph{Communications in Statistics—Theory and Methods}, 38(11), 1870-1883.
#' @references Lee, R., Qian, M., & Shao, Y. (2014). On rotational robustness of Shapiro-Wilk type tests for multivariate normality. \emph{Open Journal of Statistics}, 4(11), 964.
#' @import stats nortest moments
#' @seealso \code{\link{power.mswR}}, \code{\link{power.mswV}}, \code{\link{mvnTest}}, \code{\link{faTest}}, \code{\link{msk}}, \code{\link{mardia}}, \code{\link{mhz}}, \code{\link{mvn}}, \code{\link{shapiro.test}}
#' @export
#' @examples
#' set.seed(12345)
#'
#' ## Data from gamma distribution
#' X = matrix(rgamma(50*4,shape =  2),50)
#' msw(X)
#'
#' ## Data from normal distribution
#' X = matrix(rnorm(50*4,mean = 2 , sd = 1),50)
#' msw(X)
#'
#' ## load the ubiquitous multivariate iris data ##
#' ## (first 50 observations of columns 1:4) ##
#' iris.df = iris[1:50, 1:4]
#' msw(iris.df)
#'
msw <- function (X) {

  X <- as.matrix(X)
  X <- X[complete.cases(X), ]
  X <- as.matrix(X)
  n <- nrow(X)
  p <- ncol(X)

  VAGE<- function (X)
  {
  dname <- deparse(substitute(X))
  if (is.vector(X) == TRUE)
    X = cbind(X)
  stopifnot(is.matrix(X))
  n <- nrow(X)
  if (n < 12 || n > 5000)
    stop("Sample size must be between 12 and 5000.")
  p <- ncol(X)
  if (n <= p)
    stop("Sample size must be larger than vector dimension.")
  if (n > p) {
    x <- scale(X, scale = FALSE)
    eigenv <- eigen(var(X), symmetric = TRUE)
    e.vec <- as.matrix(eigenv$vectors)
    sqrS <- e.vec %*% diag(1/sqrt(eigenv$values), ncol = p) %*%
      t(e.vec)
    z <- t(sqrS %*% t(x))
    w <- rep(NA, p)
    for (k in 1:p) {
      w[k] <- shapiro.test(z[, k])$statistic
    }
    wast <- mean(w)
    y <- log(n)
    w1 <- log(1 - wast)
    m <- -1.5861 - 0.31082 * y - 0.083751 * y^2 + 0.0038915 *
      y^3
    s <- exp(-0.4803 - 0.082676 * y + 0.0030302 * y^2)
    s2 <- s^2
    sigma2 <- log((p - 1 + exp(s2))/p)
    mu1 <- m + s2/2 - sigma2/2
    p.value <- pnorm(w1, mean = mu1, sd = sqrt(sigma2), lower.tail = FALSE)
    results <- list(statistic = c(MVW = wast), p.value = p.value,
                    method = "Generalized Shapiro-Wilk test for Multivariate Normality by Villasenor-Alva and Gonzalez-Estrada",
                    data.name = dname)
    class(results) = "htest"
    return(results)
  }
  }

  royston <- function (a)
  {
    p = dim(a)[2]
    n = dim(a)[1]
    z <- matrix(nrow = p, ncol = 1)
    z <- as.data.frame(z)
    w <- matrix(nrow = p, ncol = 1)
    w <- as.data.frame(w)
    if (n <= 3) {
      stop("n must be greater than 3")
    }
    else if (n >= 4 || n <= 11) {
      x = n
      g = -2.273 + 0.459 * x
      m = 0.544 - 0.39978 * x + 0.025054 * x^2 - 0.0006714 *
        x^3
      s = exp(1.3822 - 0.77857 * x + 0.062767 * x^2 - 0.0020322 *
                x^3)
      for (i in 1:p) {
        a2 = a[, i]
        {
          if (kurtosis(a2) > 3) {
            w = sf.test(a2)$statistic
          }
          else {
            w = shapiro.test(a2)$statistic
          }
        }
        z[i, 1] = (-log(g - (log(1 - w))) - m)/s
      }
    }
    if (n > 2000) {
      stop("n must be less than 2000")
    }
    else if (n >= 12 || n <= 2000) {
      x = log(n)
      g = 0
      m = -1.5861 - 0.31082 * x - 0.083751 * x^2 + 0.0038915 *
        x^3
      s = exp(-0.4803 - 0.082676 * x + 0.0030302 * x^2)
      for (i in 1:p) {
        a2 = a[, i]
        {
          if (kurtosis(a2) > 3) {
            w = sf.test(a2)$statistic
          }
          else {
            w = shapiro.test(a2)$statistic
          }
        }
        z[i, 1] = ((log(1 - w)) + g - m)/s
      }
    }
    else {
      stop("n is not in the proper range")
    }
    u = 0.715
    v = 0.21364 + 0.015124 * (log(n))^2 - 0.0018034 * (log(n))^3
    l = 5
    C = cor(a)
    NC = (C^l) * (1 - (u * (1 - C)^u)/v)
    T = sum(sum(NC)) - p
    mC = T/(p^2 - p)
    edf = p/(1 + (p - 1) * mC)
    Res <- matrix(nrow = p, ncol = 1)
    Res <- as.data.frame(Res)
    for (i in 1:p) {
      Res = (qnorm((pnorm(-z[, ]))/2))^2
    }
    RH = (edf * (sum(Res)))/p
    pv = pchisq(RH, edf, lower.tail = FALSE)
    {
      if (pv < 0.05) {
        dist.check = ("Data analyzed have a non-normal distribution.")
      }
      else {
        dist.check = ("Data analyzed have a normal distribution.")
      }
    }
    res = structure(list(H = RH, p.value = pv, distribution = dist.check))
    res
  }

  if (p == 1){
    final <- list(univ(X))
    names(final) <- c("uv.shapiro")
    return(final)
  } else if (p > 1){
    if (n <= p){
      stop("Sample size must be larger than vector dimension.")}
    if (n > p){
      if (n <= 3) {
        stop("n must be greater than 3")
      }else if(n > 3 & n < 12){
        rtest <-  royston(X)
        r.result <- ifelse(rtest$p.value > 0.05, "YES", "NO")
        r.output <- noquote(c("Royston",round(rtest$H,4),round(rtest$p.value,4),r.result))
        names(r.output) <- c("Test","Statistic","p-value","Result")

        final <- list(r.output,univ(X))
        names(final) <- c("mv.test","uv.shapiro")
        return(final)
      }else if(n >= 12 & n <= 2000){
        rtest <-  royston(X)
        r.result <- ifelse(rtest$p.value > 0.05, "YES", "NO")
        r.output <- noquote(c("Royston",round(rtest$H,4),round(rtest$p.value,4),r.result))
        names(r.output) <- c("Test","Statistic","p-value","Result")

        vtest <- VAGE(X)
        v.result <- ifelse(vtest$p.value > 0.05, "YES", "NO")
        v.output <- noquote(c("VA-GE",round(vtest$statistic,4),round(vtest$p.value,4),v.result))
        names(v.output) <- c("Test","Statistic","p-value","Result")

        combind <- noquote(rbind(r.output,v.output))
        rownames(combind)<-NULL
        final <- list(combind,univ(X))
        names(final) <- c("mv.test","uv.shapiro")
        return(final)
      }else if(n >2000 & n <=5000){
        vtest <- VAGE(X)
        v.result <- ifelse(vtest$p.value > 0.05, "YES", "NO")
        v.output <- noquote(c("VA-GE",round(vtest$statistic,4),round(vtest$p.value,4),v.result))
        names(v.output) <- c("Test","Statistic","p-value","Result")

        final <- list(v.output,univ(X))
        names(final) <- c("mv.test","uv.shapiro")
        return(final)
      }
      if(n >5000)
        stop("Sample size must be between 3 and 5000.")
    }
  }
}



#' Mardia Test (Skewness and Kurtosis) for Multivariate Normality
#'
#' @description It computes Mardia (1970)'s multivariate skewness and kurtosis statistics and their corresponding
#'  p-value. Both p-values of skewness and kurtosis statistics should be greater than 0.05 to conclude
#'  multivariate normality. The skewness statistic will be adjusted for sample size \eqn{n < 20}.
#' @param X an \eqn{n*p} numeric matrix or data frame.
#' @param std if \code{TRUE}, the data matrix or data frame will be standardized via normalizing
#' the covariance matrix by \eqn{n}.
#' @return Returns a list with two objects:
#' \describe{
#' \item{\code{mv.test}}{results of the Mardia test, i.e., test statistic, \emph{p}-value, and multivariate normality summary (YES, if both skewness and kurtosis \emph{p}-value>0.05).}
#' \item{\code{uv.shapiro}}{a dataframe with \eqn{p} rows detailing univariate Shapiro-Wilk tests. Columns in the dataframe contain test statistics \emph{W}, \emph{p}-value,and univariate normality summary (YES, if \emph{p}-value>0.05).}
#' }
#' @references Mardia, K. V. (1970). Measures of multivariate skewness and kurtosis with applications. \emph{Biometrika}, 57(3), 519-530.
#' @references Shapiro, S. S., & Wilk, M. B. (1965). An analysis of variance test for normality (complete samples). \emph{Biometrika}, 52(3/4), 591-611.
#' @references Doornik, J. A., & Hansen, H. (2008). An omnibus test for univariate and multivariate normality. \emph{Oxford Bulletin of Economics and Statistics}, 70, 927-939.
#' @references Zhou, M., & Shao, Y. (2014). A powerful test for multivariate normality. \emph{Journal of applied statistics}, 41(2), 351-363.
#' @import stats
#' @seealso \code{\link{mvnTest}}, \code{\link{faTest}}, \code{\link{msw}}, \code{\link{msk}}, \code{\link{mhz}}, \code{\link{mvn}}
#' @export
#' @examples
#' set.seed(12345)
#'
#' ## Data from gamma distribution
#' X = matrix(rgamma(50*4,shape =  2),50)
#' mardia(X)
#'
#' ## Data from normal distribution
#' X = matrix(rnorm(50*4,mean = 2 , sd = 1),50)
#' mardia(X)
#'
#' ## load the ubiquitous multivariate iris data ##
#' ## (first 50 observations of columns 1:4) ##
#' iris.df = iris[1:50, 1:4]
#' mardia(iris.df)
#'
mardia <- function (X, std = TRUE){
  X = as.data.frame(X)
  dname <- deparse(substitute(X))
  data <- X[complete.cases(X), ]
  data <- as.matrix(data)
  n <- nrow(data)
  p <- ncol(data)
  data.org <- data
  data <- scale(data, scale = FALSE)
  if (std) {
    S <- ((n - 1)/n) * cov(data)
  }
  else {
    S <- cov(data)
  }
  D <- data %*% solve(S, tol = 1e-25) %*% t(data)
  g1p <- sum(D^3)/n^2
  g2p <- sum(diag((D^2)))/n
  df <- p * (p + 1) * (p + 2)/6
  k = ((p + 1) * (n + 1) * (n + 3))/(n * ((n + 1) * (p + 1) -
                                            6))
  if (n < 20) {
    skew <- n * k * g1p/6
    p.skew <- pchisq(skew, df, lower.tail = FALSE)
  }
  else {
    skew <- n * g1p/6
    p.skew <- pchisq(skew, df, lower.tail = FALSE)
  }
  kurt <- (g2p - p * (p + 2)*(n-1)/(n+1)) * sqrt(n/(8 * p * (p + 2)))
  p.kurt <- 2 * (1 - pnorm(abs(kurt)))
  skewMVN = ifelse(p.skew > 0.05, "YES", "NO")
  kurtoMVN = ifelse(p.kurt > 0.05, "YES", "NO")
  MVN = ifelse(p.kurt > 0.05 && p.skew > 0.05, "YES",
               "NO")
  result <- cbind.data.frame(test = "Mardia", g1p = g1p,
                             chi.skew = skew, p.value.skew = p.skew, skewnewss = skewMVN,
                             g2p = g2p, z.kurtosis = kurt, p.value.kurt = p.kurt,
                             kurtosis = kurtoMVN, MVN = MVN)
  resultSkewness = cbind.data.frame(Test = "Skewness",
                                    Statistic = as.factor(round(skew,4)), `p-value` = as.factor(round(p.skew,4)),
                                    Result = skewMVN)
  resultKurtosis = cbind.data.frame(Test = "Kurtosis",
                                    Statistic = as.factor(round(kurt,4)), `p-value` = as.factor(round(p.kurt,4)),
                                    Result = kurtoMVN)
  MVNresult = cbind.data.frame(Test = "MV Normality", Statistic = NA,
                               `p-value` = NA, Result = MVN)
  result = rbind.data.frame(resultSkewness, resultKurtosis,
                            MVNresult)

  final <- list(result,univ(X))
  names(final) <- c("mv.test","uv.shapiro")

  return(final)
}



#' Bowman and Shenton Test for Multivariate Normality
#'
#' @description It computes Bowman and Shenton (1975)'s test statistic (MSK) and its corresponding
#'  p-value for multivariate normality. The statistic is calculated based on a combination of
#'  multivariate skewness (MS) and kurtosis (MK) such that \eqn{MSK=MS+|MK|^2}. For formulas of MS and MK,
#'  please refer to Mardia (1970). The corresponding p-value of the statistic is computed based on a
#'  simulated null distribution of MSK. The skewness statistic (MS) will be adjusted for sample size \eqn{n < 20}.
#' @param X an \eqn{n*p} numeric matrix or data frame.
#' @param B number of Monte Carlo simulations for null distribution, default is 1000 (increase B to increase the precision of p-value).
#' @return Returns a list with two objects:
#' \describe{
#' \item{\code{mv.test}}{results of the Bowman and Shenton test, i.e., test statistic, \emph{p}-value, and multivariate normality summary (YES, if \emph{p}-value>0.05).}
#' \item{\code{uv.shapiro}}{a dataframe with \eqn{p} rows detailing univariate Shapiro-Wilk tests. Columns in the dataframe contain test statistics \emph{W}, \emph{p}-value,and univariate normality summary (YES, if \emph{p}-value>0.05).}
#' }
#' @references Bowman, K. O., & Shenton, L. R. (1975). Omnibus test contours for departures from normality based on \eqn{\sqrt b_1} and \eqn{b_2}. \emph{Biometrika}, 62(2), 243-250.
#' @references Shapiro, S. S., & Wilk, M. B. (1965). An analysis of variance test for normality (complete samples). \emph{Biometrika}, 52(3/4), 591-611.
#' @references Mardia, K. V. (1970). Measures of multivariate skewness and kurtosis with applications. \emph{Biometrika}, 57(3), 519-530.
#' @references Doornik, J. A., & Hansen, H. (2008). An omnibus test for univariate and multivariate normality. \emph{Oxford Bulletin of Economics and Statistics}, 70, 927-939.
#' @references Zhou, M., & Shao, Y. (2014). A powerful test for multivariate normality. \emph{Journal of applied statistics}, 41(2), 351-363.
#' @import stats
#' @seealso \code{\link{power.msk}}, \code{\link{mvnTest}}, \code{\link{faTest}}, \code{\link{msw}}, \code{\link{mardia}}, \code{\link{mhz}}, \code{\link{mvn}}
#' @export
#' @examples
#' set.seed(12345)
#'
#' ## Data from gamma distribution
#' X = matrix(rgamma(50*4,shape =  2),50)
#' msk(X, B=100)
#'
#' ## load the ubiquitous multivariate iris data ##
#' ## (first 50 observations of columns 1:4) ##
#' iris.df = iris[1:50, 1:4]
#' msk(iris.df, B=100)
#'
msk <- function (X, B=1000)
{
  X = as.data.frame(X)
  dname <- deparse(substitute(X))
  data <- X[complete.cases(X), ]
  data <- as.matrix(data)
  n <- nrow(data)
  p <- ncol(data)

  msk_1 <- function (X)
  {
    X = as.data.frame(X)
    dname <- deparse(substitute(X))
    data <- X[complete.cases(X), ]
    data <- as.matrix(data)
    n <- nrow(data)
    p <- ncol(data)
    data.org <- data
    data <- scale(data, scale = FALSE)
    S <- ((n - 1)/n) * cov(data)
    D <- data %*% solve(S, tol = 1e-25) %*% t(data)
    g1p <- sum(D^3)/n^2
    g2p <- sum(diag((D^2)))/n
    df <- p * (p + 1) * (p + 2)/6
    k = ((p + 1) * (n + 1) * (n + 3))/(n * ((n + 1) * (p + 1) -
                                              6))
    if (n < 20) {
      skew <- n * k * g1p/6
    }
    else {
      skew <- n * g1p/6
    }
    kurt <- (g2p - p * (p + 2)*(n-1)/(n+1)) * sqrt(n/(8 * p * (p + 2)))
    msk <- skew + kurt^2

    return(msk)
  }


  dsn_msk_1 <- function(n, p, B) {
    s <- rep(0,B)
    for(i in 1:B) {
      s[i] <- msk_1(matrix(rnorm(n*p), n))
    }
    return(s)
  }

  msk <- msk_1(X)
  dsn_msk_1 <-  dsn_msk_1(n, p, B)

  p.value <- mean(dsn_msk_1 > msk)
  result <- ifelse(p.value>0.05,"YES","NO")
  output <- noquote(c(round(msk,4),round(p.value,4),result))
  names(output) <- c("Statistic","p-value","Result")

  final <- list(output,univ(X))
  names(final) <- c("mv.test","uv.shapiro")

  return(final)
}



#' Henze-Zirkler Test for Multivariate Normality
#'
#' @description It computes a multiviariate normality test based on a non-negative functional distance which
#' was proposed by Henze and Zirkler (1990). Under the null hypothesis the test statistic is approximately
#' log-normally distributed.
#' @param X an \eqn{n*p} numeric matrix or data frame.
#' @return Returns a list with two objects:
#' \describe{
#' \item{\code{mv.test}}{results of the Henze-Zirkler test, i.e., test statistic, \emph{p}-value, and multivariate normality summary (YES, if \emph{p}-value>0.05).}
#' \item{\code{uv.shapiro}}{a dataframe with \eqn{p} rows detailing univariate Shapiro-Wilk tests. Columns in the dataframe contain test statistics \emph{W}, \emph{p}-value,and univariate normality summary (YES, if \emph{p}-value>0.05).}
#' }
#' @references Henze, N., & Zirkler, B. (1990). A class of invariant consistent tests for multivariate normality. \emph{Communications in statistics-Theory and Methods}, 19(10), 3595-3617.
#' @references Shapiro, S. S., & Wilk, M. B. (1965). An analysis of variance test for normality (complete samples). \emph{Biometrika}, 52(3/4), 591-611.
#' @references Zhou, M., & Shao, Y. (2014). A powerful test for multivariate normality. \emph{Journal of applied statistics}, 41(2), 351-363.
#' @import stats
#' @seealso \code{\link{power.mhz}}, \code{\link{mvnTest}}, \code{\link{faTest}}, \code{\link{msw}}, \code{\link{msk}}, \code{\link{mardia}}, \code{\link{mvn}}
#' @export
#' @examples
#' set.seed(12345)
#'
#' ## Data from gamma distribution
#' X = matrix(rgamma(50*4,shape =  2),50)
#' mhz(X)
#'
#' ## Data from normal distribution
#' X = matrix(rnorm(50*4,mean = 2 , sd = 1),50)
#' mhz(X)
#'
#' ## load the ubiquitous multivariate iris data ##
#' ## (first 50 observations of columns 1:4) ##
#' iris.df = iris[1:50, 1:4]
#' mhz(iris.df)
mhz <- function(X){

  X   <- as.matrix(X)
  X   <- X[complete.cases(X),]
  X   <- as.matrix(X)
  n   <- NROW(X)
  p   <- NCOL(X)

  HZ <- function(X) {
    HZoptb <- function(n, p)  ((2*p + 1)*n/4)^(1/(p + 4))/sqrt(2)

    X   <- as.matrix(X)
    n   <- NROW(X)
    p   <- NCOL(X)
    S   <- var(X)*(n-1)/n
    b   <- HZoptb(n,p)
    if(n == 1)  return(NA)
    if( min(eigen(S)$values) < 1e-3)  return(4*n)
    else {
      Y <- STD(X)
      DY    <- as.matrix(dist(Y, diag = TRUE, upper = TRUE))
      part1 <- sum(exp(- b^2 * DY^2/2))/n
      part2 <- (-2*(1+b^2)^{-p/2}*sum(exp(-b^2/(2*(1+b^2))*apply(Y^2,1,sum))))
      part3 <- (1+2*b^2)^{-p/2}*n

      statistic <- part1+part2+part3

      mu <- 1 - ((1 + 2 * b^2)^(-p/2)) * (1 + (p * b^2)/(1 +
                                                           2 * b^2) + (p * (p + 2) * b^4)/(2 * (1 + 2 * b^2)^2))
      w.b <- (1 + b^2) * (1 + 3 * b^2)
      sigma.sq <- 2 * (1 + 4 * b^2)^(-p/2) + 2 * (1 + 2 * b^2)^(-p) *
        (1 + (2 * p * b^4)/(1 + (2 * b^2))^2 + (3 * p *
                                                  (p + 2) * b^8)/(4 * (1 + 2 * b^2)^4)) - 4 *
        (w.b^(-p/2)) * (1 + (3 * p * b^4)/(2 * w.b) +
                          (p * (p + 2) * b^8)/(2 * w.b^2))
      p.mu <- log(sqrt(mu^4/(sigma.sq + mu^2)))
      p.sigma <- sqrt(log((sigma.sq + mu^2)/mu^2))
      p.value <- 1 - (plnorm(statistic, p.mu, p.sigma))

      result <- ifelse(p.value>0.05,"YES","NO")
      output <- noquote(c(round(statistic,4),round(p.value,4),result))
      names(output) <- c("Statistic","p-value","Result")
      return(output)
    }
  }

  hz <- HZ(X)

  final <- list(hz,univ(X))
  names(final) <- c("mv.test","uv.shapiro")
  return(final)
}



#' Power Calculation using the Zhou-Shao's Multivariate Normality Test Statistic (\eqn{T_n})
#'
#' @description Empirical power calculation using the Zhou-Shao's multivariate normality test Statistic \eqn{T_n}.
#' @param a significance level (\eqn{\alpha}).
#' @param n number of rows (observations).
#' @param p number of columns (variables), \eqn{n>p}.
#' @param B number of Monte Carlo simulations, default is 1000 (can increase B to increase the precision).
#' @param pct percentiles of MK to get c1 and c2 described in the reference paper,default is (0.01, 0.99).
#' @param FUN self-defined function for generate multivariate distribution. See example.
#' @param ... optional arguments passed to \code{FUN}.
#' @return Returns a numeric value of the estimated empirical power (value between 0 and 1).
#' @references Zhou, M., & Shao, Y. (2014). A powerful test for multivariate normality. \emph{Journal of applied statistics}, 41(2), 351-363.
#' @import stats
#' @export
#' @examples
#' set.seed(12345)
#'
#' ## Power calculation against bivariate (p=2) independent Beta(1, 1) distribution ##
#' ## at sample size n=50 for Tn at one-sided alpha = 0.05 ##
#'
#' power.mvnTest(a = 0.05, n = 50, p = 2,  B = 100, pct = c(0.01, 0.99), FUN=IMMV, D1=runif)
#'
power.mvnTest <- function(a, n, p, B=1000, pct=c(0.01,0.99), FUN,...){

  mk = Dsn_MK(n, p, B)

  ##obtain c1 and c2 from the null distribution of MK
  cri_mk = quantile(mk, pct)

  ## Component of the Tn test statistic
  SWM2 <- function(X){
    SWS <- function(x) shapiro.test(x)$statistic
    X <- as.matrix(X)
    p = NCOL(X)
    StdX = STD(X)
    return((mean(apply(StdX, 2, SWS)) + mean(sort(apply(StdX%*%t(StdX), 2, SWS))[1:p]))/2)
  }

  ## Tn statistic function
  SWM2MK <- function(X, cri_mk) {
    Mk      <- MK(X)
    Index   <- (Mk > cri_mk[2]|Mk < cri_mk[1])
    return( (1 - Index)*(1 - SWM2(X)) + Index)
  }

  FUN  <- match.fun(FUN)
  s    <- rep(0,B)
  for(i in 1:B){
    X    <- FUN(n, p, ...)
    s[i] <- SWM2MK(X, cri_mk)
  }

  tn  <- Dsn_SWM2MK(n, p, B, pct)
  cri <- quantile(tn, (1-a))

  return(sum(s > cri)/B)
}



#' Power Calculation using the Fattorini's FA Test Statistic
#'
#' @description Empirical power calculation using the Fattorini's FA Test Statistic.
#' @param a significance level (\eqn{\alpha}).
#' @param n number of rows (observations).
#' @param p number of columns (variables), \eqn{n>p}.
#' @param B number of Monte Carlo simulations, default is 1000 (can increase B to increase the precision).
#' @param FUN self-defined function for generate multivariate distribution. See example.
#' @param ... optional arguments passed to \code{FUN}.
#' @return Returns a numeric value of the estimated empirical power (value between 0 and 1).
#' @references Fattorini, L. (1986). Remarks on the use of Shapiro-Wilk statistic for testing multivariate normality. \emph{Statistica}, 46(2), 209-217.
#' @import stats
#' @export
#' @examples
#' set.seed(12345)
#'
#' ## Power calculation against bivariate (p=2) independent Beta(1, 1) distribution ##
#' ## at sample size n=50 at one-sided alpha = 0.05 ##
#'
#' power.faTest(a = 0.05, n = 50, p = 2,  B = 100, FUN=IMMV, D1=runif)
#'
power.faTest <- function(a, n, p, B=1000, FUN,...) {

  ## SW statistic ##
  SW <- function(x)  shapiro.test(x)$statistic

  ## FA statistic ##
  FA <- function(X) {
    X <- as.matrix(X)
    n <- NROW(X)
    p <- NCOL(X)
    mu <- apply(X,2,mean)
    nSinver <- solve((n-1)*cov(X))
    Y <- X%*%t((X-matrix(rep(mu,n),ncol=p,byrow=T))%*%nSinver)
    return(1 - min(apply(Y,2,SW)))
  }

  FUN <- match.fun(FUN)
  s   <- rep(0,B)
  for(i in 1:B) {
    s[i] <- FA(FUN(n, p, ...))
  }

  tn  <- Dsn_FA(n, p, B)
  cri <- quantile(tn, (1-a))

  return(sum(s > cri)/B)
}



#' Power Calculation using the Henze-Zirkler Test Statistic
#'
#' @description Empirical power calculation using the Henze-Zirkler Test Statistic.
#' @param a significance level (\eqn{\alpha}).
#' @param n number of rows (observations).
#' @param p number of columns (variables), \eqn{n>p}.
#' @param B number of Monte Carlo simulations, default is 1000 (can increase B to increase the precision).
#' @param FUN self-defined function for generate multivariate distribution. See example.
#' @param ... optional arguments passed to \code{FUN}.
#' @return Returns a numeric value of the estimated empirical power (value between 0 and 1).
#' @references Henze, N., & Zirkler, B. (1990). A class of invariant consistent tests for multivariate normality. \emph{Communications in statistics-Theory and Methods}, 19(10), 3595-3617.
#' @import stats
#' @export
#' @examples
#' set.seed(12345)
#'
#' ## Power calculation against bivariate (p=2) independent Beta(1, 1) distribution ##
#' ## at sample size n=50 at one-sided alpha = 0.05 ##
#'
#' power.mhz(a = 0.05, n = 50, p = 2,  B = 100, FUN=IMMV, D1=runif)
#'
power.mhz <- function(a, n, p, B=1000, FUN,...) {
  FUN <- match.fun(FUN)
  s   <-  rep(0,B)
  for(i in 1:B) {
    s[i] <- as.numeric(mhz(FUN(n, p, ...))$mv.test[1])
  }

  tn  <- Dsn_HZ(n, p, B)
  cri <- quantile(tn, (1-a))

  return(sum(s > cri)/B)
}



#' Power Calculation using the Bowman and Shenton Test Statistic
#'
#' @description Empirical power calculation using Bowman and Shenton Test Statistic.
#' @param a significance level (\eqn{\alpha}).
#' @param n number of rows (observations).
#' @param p number of columns (variables), \eqn{n>p}.
#' @param B number of Monte Carlo simulations, default is 1000 (can increase B to increase the precision).
#' @param FUN self-defined function for generate multivariate distribution. See example.
#' @param ... optional arguments passed to \code{FUN}.
#' @return Returns a numeric value of the estimated empirical power (value between 0 and 1).
#' @references Bowman, K. O., & Shenton, L. R. (1975). Omnibus test contours for departures from normality based on \eqn{\sqrt b_1} and \eqn{b_2}. \emph{Biometrika}, 62(2), 243-250.
#' @import stats
#' @export
#' @examples
#' set.seed(12345)
#'
#' ## Power calculation against bivariate (p=2) independent Beta(1, 1) distribution ##
#' ## at sample size n=50 at one-sided alpha = 0.05 ##
#'
#' power.msk(a = 0.05, n = 50, p = 2,  B = 100, FUN=IMMV, D1=runif)
#'
power.msk <- function(a, n, p, B=1000, FUN,...){
  FUN  <- match.fun(FUN)
  s    <- rep(0,B)
  for(i in 1:B){
    X    <- FUN(n, p, ...)
    s[i] <- MSMK(X)
  }

  tn  <- Dsn_MSMK(n, p, B)
  cri <- quantile(tn, (1-a))

  return(sum(s > cri)/B)
}



#' Power Calculation using the SWT-based Royston Test Statistic
#'
#' @description Empirical power calculation using Royston test statistic.
#' @param a significance level (\eqn{\alpha}).
#' @param n number of rows (observations).
#' @param p number of columns (variables), \eqn{n>p}.
#' @param B number of Monte Carlo simulations, default is 1000 (can increase B to increase the precision).
#' @param FUN self-defined function for generate multivariate distribution. See example.
#' @param ... optional arguments passed to \code{FUN}.
#' @return Returns a numeric value of the estimated empirical power (value between 0 and 1).
#' @references Royston, J. P. (1982). An extension of Shapiro and Wilk's W test for normality to large samples. \emph{Journal of the Royal Statistical Society: Series C (Applied Statistics)}, 31(2), 115-124.
#' @import stats
#' @export
#' @examples
#' set.seed(12345)
#'
#' ## Power calculation against bivariate (p=2) independent Beta(1, 1) distribution ##
#' ## at sample size n=50 at one-sided alpha = 0.05 ##
#'
#' power.mswR(a = 0.05, n = 50, p = 2,  B = 100, FUN=IMMV, D1=runif)
#'
power.mswR <- function(a, n, p, B=1000, FUN,...){
  FUN  <- match.fun(FUN)
  s    <- rep(0,B)
  for(i in 1:B){
    X    <- FUN(n, p, ...)
    s[i] <- as.numeric(msw(X)$mv.test[1,2])
  }

  tn  <- Dsn_royston(n, p, B)
  cri <- quantile(tn, (1-a))

  return(sum(s > cri)/B)
}



#' Power Calculation using the SWT-based Villasenor-Alva and Gonzalez-Estrada (VAGE) Test Statistic
#'
#' @description Empirical power calculation using VAGE test statistic.
#' @param a significance level (\eqn{\alpha}).
#' @param n number of rows (observations).
#' @param p number of columns (variables), \eqn{n>p}.
#' @param B number of Monte Carlo simulations, default is 1000 (can increase B to increase the precision).
#' @param FUN self-defined function for generate multivariate distribution. See example.
#' @param ... optional arguments passed to \code{FUN}.
#' @return Returns a numeric value of the estimated empirical power (value between 0 and 1).
#' @references Villasenor Alva, J. A., & Estrada, E. G. (2009). A generalization of Shapiro–Wilk's test for multivariate normality. \emph{Communications in Statistics—Theory and Methods}, 38(11), 1870-1883.
#' @import stats
#' @export
#' @examples
#' set.seed(12345)
#'
#' ## Power calculation against bivariate (p=2) independent Beta(1, 1) distribution ##
#' ## at sample size n=50 at one-sided alpha = 0.05 ##
#'
#' power.mswV(a = 0.05, n = 50, p = 2,  B = 100, FUN=IMMV, D1=runif)
#'
power.mswV <- function(a, n, p, B=1000, FUN,...){
  FUN  <- match.fun(FUN)
  s    <- rep(0,B)
  for(i in 1:B){
    X    <- FUN(n, p, ...)
    s[i] <- as.numeric(msw(X)$mv.test[2,2])
  }

  tn  <- Dsn_VAGE(n, p, B)
  cri <- quantile(tn, a)

  return(sum(s < cri)/B)
}
# power.mswV <- function(a, n, p, B, FUN,...){
#   FUN  <- match.fun(FUN)
#   s    <- rep(0,B)
#   for(i in 1:B){
#     X    <- FUN(n, p, ...)
#     s[i] <- as.numeric(mvShapiro.Test(X)$statistic)
#   }
#
#   tn  <- Dsn_VAGE(n, p, B)
#   cri <- quantile(tn, a)
#
#   return(sum(s < cri)/B)
# }


#' Power Calculation using the Univariate Shapiro-Wilk Test Statistic
#'
#' @description Empirical power calculation using univariate Shapiro-Wilk test statistic.
#' @param a significance level (\eqn{\alpha}).
#' @param n number of rows (observations).
#' @param p p=1 for univariate.
#' @param B number of Monte Carlo simulations, default is 1000 (can increase B to increase the precision).
#' @param FUN self-defined function for generate multivariate distribution. See example.
#' @param ... optional arguments passed to \code{FUN}.
#' @return Returns a numeric value of the estimated empirical power (value between 0 and 1).
#' @references Shapiro, S. S., & Wilk, M. B. (1965). An analysis of variance test for normality (complete samples). \emph{Biometrika}, 52(3/4), 591-611.
#' @import stats
#' @export
#' @examples
#' set.seed(12345)
#'
#' ## Power calculation against univariate (p=1) independent Beta(1, 1) distribution ##
#' ## at sample size n=50 at one-sided alpha = 0.05 ##
#'
#' power.usw(a = 0.05, n = 50, p = 1,  B = 100, FUN=IMMV, D1=runif)
#'
power.usw <- function(a, n, p=1, B=1000, FUN,...){
  FUN  <- match.fun(FUN)
  s    <- rep(0,B)
  for(i in 1:B){
    X    <- FUN(n, p, ...)
    s[i] <- as.numeric(msw(X)$uv.shapiro[1,1])
  }

  tn  <- Dsn_usw(n, p, B)
  cri <- quantile(tn, a)

  return(sum(s < cri)/B)
}



#' Random Generation for the Spherically Symmetric Pearson Type II Distribution
#'
#' @description Generate univariate or multivariate random sample for the spherically symmetric Pearson type II distribution.
#' @param n number of rows (observations).
#' @param p number of columns (variables).
#' @param s shape parameter, \eqn{s>-1}.
#' @return Returns univariate (\eqn{p=1}) or multivariate (\eqn{p>1}) random sample matrix.
#' @references Kotz, S. (1975). Multivariate distributions at a cross road. In A Modern Course on Statistical Distributions in Scientific Work (pp. 247-270). Springer, Dordrecht.
#' @references Henze, N., & Zirkler, B. (1990). A class of invariant consistent tests for multivariate normality. \emph{Communications in statistics-Theory and Methods}, 19(10), 3595-3617.
#' @import stats
#' @export
#' @examples
#' set.seed(12345)
#'
#' ## Generate 5X2 random sample matrix from PSII(s=1) ##
#' PSII(n=5, p=2, s=1)
#'
#'
#' ## Power calculation against bivariate (p=2) PSII(s=1) distribution ##
#' ## at sample size n=50 at one-sided alpha = 0.05 ##
#'
#' # Zhou-Shao's test #
#' power.mvnTest(a = 0.05, n = 50, p = 2,  B = 100, FUN = PSII, s = 1)
#'
PSII <- function(n, p, s) {

  V      <- matrix(rnorm(n*p), n)
  Vnew   <- V/sqrt(apply(V^2, 1, sum))
  repara <- 1/(s + 1)
  r    <- sqrt(rbeta(n, shape1 = p/2, shape2 = s + 1))

  return(r*Vnew)
}



#' Random Generation for the Spherically Symmetric Pearson Type VII Distribution
#'
#' @description Generate univariate or multivariate random sample for the spherically symmetric Pearson type VII distribution.
#' @param n number of rows (observations).
#' @param p number of columns (variables).
#' @param s shape parameter, \eqn{s > p/2}.
#' @return Returns univariate (\eqn{p=1}) or multivariate (\eqn{p>1}) random sample matrix.
#' @references Kotz, S. (1975). Multivariate distributions at a cross road. In A Modern Course on Statistical Distributions in Scientific Work (pp. 247-270). Springer, Dordrecht.
#' @references Henze, N., & Zirkler, B. (1990). A class of invariant consistent tests for multivariate normality. \emph{Communications in statistics-Theory and Methods}, 19(10), 3595-3617.
#' @import stats
#' @export
#' @examples
#' set.seed(12345)
#'
#' ## Generate 5X2 random sample matrix from PSVII(s=3) ##
#' PSVII(n=5, p=2, s=3)
#'
#'
#' ## Power calculation against bivariate (p=2) PSVII(s=3) distribution ##
#' ## at sample size n=50 at one-sided alpha = 0.05 ##
#'
#' # Zhou-Shao's test #
#' power.mvnTest(a = 0.05, n = 50, p = 2,  B = 100, FUN = PSVII, s = 3)
#'
PSVII <- function(n, p, s) {

  V      <- matrix(rnorm(n*p), n)
  Vnew   <- V/sqrt(apply(V^2, 1, sum))
  repara <- - 1/(s - 1)
  r    <- sqrt(1/rbeta(n, shape1 = s - p/2, shape2 = p/2) - 1)

  return(r*Vnew)
}



#' Random Generation for General Spherically Symmetric Distributions
#'
#' @description Generate univariate or multivariate random sample for general spherically symmetric distributions.
#' @param n number of rows (observations).
#' @param p number of columns (variables).
#' @param D random generation functions for some distributions (e.g., \code{rgamma}, \code{rbeta}).
#' @param ... optional arguments passed to \code{D}.
#' @return Returns univariate (\eqn{p=1}) or multivariate (\eqn{p>1}) random sample matrix.
#' @references Chmielewski, M. A. (1981). Elliptically symmetric distributions: A review and bibliography. \emph{International Statistical Review/Revue Internationale de Statistique}, 67-74.
#' @references Henze, N., & Zirkler, B. (1990). A class of invariant consistent tests for multivariate normality. \emph{Communications in statistics-Theory and Methods}, 19(10), 3595-3617.
#' @import stats
#' @export
#' @examples
#' set.seed(12345)
#'
#' ## Generate 5X2 random sample matrix from SPH(Beta(1,1)) ##
#' SPH(n=5, p=2, D=rbeta, shape1=1, shape2=1)
#'
#'
#' ## Power calculation against bivariate (p=2) SPH(Beta(1,1)) distribution ##
#' ## at sample size n=50 at one-sided alpha = 0.05 ##
#'
#' # Zhou-Shao's test #
#' power.mvnTest(a=0.05, n=50, p=2, B=100, FUN=SPH, D=rbeta, shape1=1, shape2=1)
#'
SPH <- function(n, p, D,...){

  FUN  <- match.fun(D)
  r    <- FUN(n, ...)
  V    <- matrix(rnorm(n*p), n)
  Vnew <- V/sqrt(apply(V^2, 1, sum))

  return(r*Vnew)
}



#' Random Generation for Distribution with Independent Marginals
#'
#' @description Generate univariate or multivariate random sample for distribution with independent marginals such that \eqn{D_1 \otimes D_2}.
#' \eqn{D_1 \otimes D_2} denotes the distribution having independent marginal distributions \eqn{D_1} and \eqn{D_2}. This function can generate
#' multivariate random samples only from distribution \eqn{D_1} or from both \eqn{D_1} and \eqn{D_2}.
#' @param n number of rows (observations).
#' @param p total number of columns (variables).
#' @param q number of columns from distribution \code{D1} if generate multivariate samples from independent marginal distribution \eqn{D_1} and \eqn{D_2}.
#' Default is \code{NULL}, i.e., generating samples only from one distribution.
#' @param D1 random generation function for 1st distribution (e.g., \code{rnorm}, \code{rbeta}).
#' @param D2 random generation function for 2nd distribution (e.g., \code{rnorm}, \code{rbeta}).
#' @param D1.args a list of optional arguments passed to \code{D1}.
#' @param D2.args a list of optional arguments passed to \code{D2}.
#' @return Returns univariate (\eqn{p=1}) or multivariate (\eqn{p>1}) random sample matrix.
#' @references Zhou, M., & Shao, Y. (2014). A powerful test for multivariate normality. \emph{Journal of applied statistics}, 41(2), 351-363.
#' @references Henze, N., & Zirkler, B. (1990). A class of invariant consistent tests for multivariate normality. \emph{Communications in statistics-Theory and Methods}, 19(10), 3595-3617.
#' @import stats
#' @export
#' @examples
#' set.seed(12345)
#'
#' ## Generate 5X2 random sample matrix from IMMV(N(0,1),Beta(1,2)) ##
#' IMMV(n=5, p=2, q=1, D1=rbeta, D1.args=list(shape1=1,shape2=2), D2=rnorm)
#'
#'
#' ## Power calculation against bivariate (p=2) IMMV(Gamma(5,1)) distribution ##
#' ## at sample size n=50 at one-sided alpha = 0.05 ##
#'
#' # Zhou-Shao's test #
#' power.mvnTest(a=0.05, n=50, p=2, B=100, FUN=IMMV, D1=rgamma, D1.args=list(shape=5, rate=1))
#'
#' ## Power calculation against bivariate (p=2) IMMV(N(0,1),Beta(1,2)) distribution ##
#' ## at sample size n=50 at one-sided alpha = 0.05 ##
#'
#' # Zhou-Shao's test #
#' power.mvnTest(a=0.05, n=50, p=2, B=100, FUN=IMMV, q=1, D1=rbeta, D1.args=list(shape1=1,shape2=2),
#' D2=rnorm)
#'
IMMV <- function(n,p,q=NULL,D1,D2=NULL,D1.args=list(),D2.args=list()){
  if(is.null(q)){
    FUN1 <- match.fun(D1)
    ds1 <- matrix(do.call(FUN1, c(n*p, D1.args)),n)
    return(ds1)
  }else{
    FUN1 <- match.fun(D1)
    ds1 <- matrix(do.call(FUN1, c(n*q, D1.args)),n)
    FUN2 <- match.fun(D2)
    ds2 <- matrix(do.call(FUN2, c(n*(p-q), D2.args)),n)
    return(cbind(ds1,ds2))
  }
}



#' Random Generation for the Normal Mixture Distribution
#'
#' @description Generate univariate or multivariate random sample for the normal mixture distribution with density
#' \eqn{\lambda N(0,\sum_1)+(1-\lambda)N(bl, \sum_2)}, where \eqn{l} is the column vector with all elements being 1,
#' \eqn{\sum_i=(1-\rho_i)I+\rho_ill^T} for \eqn{i=1,2}. \eqn{\rho} has to satisfy \eqn{\rho > -1/(p-1)} in order to make the
#' covariance matrix meaningful.
#' @param n number of rows (observations).
#' @param p total number of columns (variables).
#' @param lambda weight parameter to allocate the proportions of the mixture, \eqn{0<\lambda<1}.
#' @param mu2 is \eqn{bl} of \eqn{N(bl, \sum_2)}.
#' @param rho1 parameter in \eqn{\sum_1}.
#' @param rho2 parameter in \eqn{\sum_2}.
#' @return Returns univariate (\eqn{p=1}) or multivariate (\eqn{p>1}) random sample matrix.
#' @references Zhou, M., & Shao, Y. (2014). A powerful test for multivariate normality. \emph{Journal of applied statistics}, 41(2), 351-363.
#' @import stats
#' @export
#' @examples
#' set.seed(12345)
#'
#' ## Generate 5X2 random sample matrix from MVNMIX(0.5,4,0,0) ##
#' MVNMIX(n=5, p=2, lambda=0.5, mu2=4, rho1=0, rho2=0)
#'
#'
#' ## Power calculation against bivariate (p=2) MVNMIX(0.5,4,0,0) distribution ##
#' ## at sample size n=50 at one-sided alpha = 0.05 ##
#'
#' # Zhou-Shao's test #
#' power.mvnTest(a=0.05, n=50, p=2, B=100, FUN=MVNMIX, lambda=0.5, mu2=4, rho1=0, rho2=0)
#'
MVNMIX <- function(n, p, lambda, mu2, rho1 = 0, rho2 = 0) {
  A1 <- diag(sqrt(1 - rho1), p) +
    (sqrt(rho1*p + 1 - rho1) - sqrt(1 - rho1))/p * matrix(rep(1, p*p), p)
  A2 <- diag(sqrt(1 - rho2), p) +
    (sqrt(rho2*p + 1 - rho2) - sqrt(1 - rho2))/p * matrix(rep(1, p*p), p)
  u <- runif(n)
  S <- NULL
  for( i in 1:n) {
    if(u[i] < lambda)  X <- t(A1%*%rnorm(p))
    else               X <- mu2 + t(A2%*%rnorm(p))
    S <- rbind(S, X)
  }
  return(S)
}



#' Random Generation for the Copula Generated Distributions
#'
#' @description Generate univariate or multivariate random sample for the Copula Generated Distributions.
#' @param n number of rows (observations).
#' @param p total number of columns (variables).
#' @param c name of an Archimedean copula, choosing from "\code{clayton}" (default), "\code{frank}", or "\code{gumbel}".
#' @param param number (numeric) specifying the copula parameter.
#' @param invF inverse function (quantile function, e.g. \code{qnorm}).
#' @param ... optional arguments passed to \code{invF}.
#' @return univariate (\eqn{p=1}) or multivariate (\eqn{p>1}) random sample.
#' @references Yan, J. (2007). Enjoy the joy of copulas: with a package copula. \emph{Journal of Statistical Software}, 21(4), 1-21.
#' @importFrom copula claytonCopula frankCopula gumbelCopula rCopula
#' @export
#' @examples
#' set.seed(12345)
#'
#' ## Generate 5X2 random sample matrix from Clayton(0.5, qnorm) ##
#' copulas(n=50, p=2, c="clayton", param=0.5, invF=qnorm)
#'
#'
#' ## Power calculation against bivariate (p=2) Clayton(0.5, qnorm) distribution ##
#' ## at sample size n=50 at one-sided alpha = 0.05 ##
#'
#' # Zhou-Shao's test #
#' power.mvnTest(a=0.05, n=50, p=2, B=100, FUN=copulas, c="clayton", param=0.5, invF=qnorm)
#'
copulas <- function(n, p, c="clayton", param, invF, ...) {
  if(c=="clayton"){
    obj  <- claytonCopula(param, dim = p)
  }else if(c=="frank"){
    obj  <- frankCopula(param, dim = p)
  }else if(c=="gumbel"){
    obj  <- gumbelCopula(param, dim = p)
  }

  u    <- rCopula(n=n,copula=obj)
  invF <- match.fun(invF)
  s    <- apply(u, 2, invF, ...)
  return(s)
}








##### some inner functions for above functions ########
## standardize data ##
STD <- function(X){
  n     <- NROW(X)
  p     <- NCOL(X)
  if(p == 1) {
    X <- as.numeric(X)
    return(as.matrix((X-mean(X))/sd(X)*sqrt(n/(n-1))))}
  else {
    X1    <- (X-matrix(rep(apply(X,2,mean),n),ncol=p,byrow=T))
    EIGEN <- eigen(cov(X)*(n-1)/n)
    S1    <- EIGEN$vectors%*%diag((sqrt(EIGEN$values))^{-1})%*%t(EIGEN$vectors)
    return(X1%*%t(S1))
  }
}


## MK Test Statistic ##
MK <- function(X) {
  n <- NROW(X)
  p <- NCOL(X)
  if(p==1) {
    mu<-mean(X)
    b21<-n*sum((X-mu)^4)/(sum((X-mu)^2))^2
    return(sqrt(n/24)*(b21-3*(n-1)/(n+1)))
  }
  else
  {
    stdX <- STD(X)
    b    <- apply(stdX^2,1,sum)
    return(sqrt(n/(8*p*(p+2)))*(mean(b^2)-p*(p+2)*(n-1)/(n+1)))
  }
}


## Simulate Null Distribution of the MK Statistic ##
Dsn_MK <- function(n, p, B) {
  s <- rep(0,B)
  for(i in 1:B) {
    s[i] <- MK(matrix(rnorm(n*p), n))
  }
  return(s)
}


## Simulate Null Distribution of the Zhou-Shao's Test Statistic (Tn) ##
Dsn_SWM2MK <- function(n, p, B, pct) {
  mk = Dsn_MK(n, p, B)
  ##obtain c1 and c2 from the null distribution of MK
  cri_mk = quantile(mk, pct)
  ## Component of the Tn test statistic
  SWM2 <- function(X){
    SWS <- function(x) shapiro.test(x)$statistic
    X <- as.matrix(X)
    p = NCOL(X)
    StdX = STD(X)
    return((mean(apply(StdX, 2, SWS)) + mean(sort(apply(StdX%*%t(StdX), 2, SWS))[1:p]))/2)
  }
  ## Tn statistic function
  SWM2MK <- function(X, cri_mk) {
    Mk      <- MK(X)
    Index   <- (Mk > cri_mk[2]|Mk < cri_mk[1])
    return( (1 - Index)*(1 - SWM2(X)) + Index)
  }
  s <- rep(0,B)
  for(i in 1:B) {
    s[i] <- SWM2MK(matrix(rnorm(n*p), n), cri_mk)
  }
  return(s)
}


## Simulate Null Distribution of FA Test Statistic ##
Dsn_FA <- function(n, p, B){

  ## SW statistic ##
  SW <- function(x)  shapiro.test(x)$statistic

  ## FA statistic ##
  FA <- function(X) {
    X <- as.matrix(X)
    n <- NROW(X)
    p <- NCOL(X)
    mu <- apply(X,2,mean)
    nSinver <- solve((n-1)*cov(X))
    Y <- X%*%t((X-matrix(rep(mu,n),ncol=p,byrow=T))%*%nSinver)
    return(1 - min(apply(Y,2,SW)))
  }

  s <- rep(0, B)

  for(i in 1:B) {
    s[i] <- FA(matrix(rnorm(n*p), n))
  }
  return(s)

}


## Simulate Null Distribution of HZ Test Statistic ##
Dsn_HZ <- function(n, p, B) {
  s  <- rep(0, B)
  for( i in 1:B) {
    s[i] <- as.numeric(mhz(matrix(rnorm(n*p), n))$mv.test[1])
  }
  return(s)
}


## MSK statistic function ##
MSMK <- function (X)
{
  dataframe = as.data.frame(X)
  dname <- deparse(substitute(X))
  data <- X[complete.cases(X), ]
  data <- as.matrix(data)
  n <- nrow(data)
  p <- ncol(data)
  data.org <- data
  data <- scale(data, scale = FALSE)
  S <- ((n - 1)/n) * cov(data)
  D <- data %*% solve(S, tol = 1e-25) %*% t(data)
  g1p <- sum(D^3)/n^2
  g2p <- sum(diag((D^2)))/n
  df <- p * (p + 1) * (p + 2)/6
  k = ((p + 1) * (n + 1) * (n + 3))/(n * ((n + 1) * (p + 1) -
                                            6))
  if (n < 20) {
    skew <- n * k * g1p/6
  }
  else {
    skew <- n * g1p/6
  }
  kurt <- (g2p - p * (p + 2)*(n-1)/(n+1)) * sqrt(n/(8 * p * (p + 2)))
  msk <- skew + kurt^2

  return(msk)
}


## Simulate Null Distribution of MSK Test Statistic ##
Dsn_MSMK <- function(n, p, B) {
  s <- rep(0,B)
  for(i in 1:B) {
    s[i] <- MSMK(matrix(rnorm(n*p), n))
  }
  return(s)
}


## Simulate Null Distribution of MSW-Royston Test Statistic ##
Dsn_royston <- function(n, p, B) {
  s <- rep(0,B)
  for(i in 1:B) {
    s[i] <- as.numeric(msw(matrix(rnorm(n*p), n))$mv.test[1,2])
  }
  return(s)
}


## Simulate Null Distribution of MSW-VAGE Test Statistic ##
Dsn_VAGE <- function(n, p, B) {
  s <- rep(0,B)
  for(i in 1:B) {
    s[i] <- as.numeric(msw(matrix(rnorm(n*p), n))$mv.test[2,2])
  }
  return(s)
}
# Dsn_VAGE <- function(n, p, B) {
#   s <- rep(0,B)
#   for(i in 1:B) {
#     s[i] <- as.numeric(mvShapiro.Test(matrix(rnorm(n*p), n))$statistic)
#   }
#   return(s)
# }


## Univariate Shapiro-Wilk test ##
univ <- function(X){
  univariate_1 <- t(sapply(as.data.frame(X), function(x) shapiro.test(x))[1:2,])
  uvresult <- ifelse(univariate_1[,2]>0.05, "Yes", "No")
  univariate_2 <- rbind(round(Reduce(cbind,univariate_1[,1]),4),round(Reduce(cbind,univariate_1[,2]),4),uvresult)
  colnames(univariate_2)<-names(univariate_1[,1])
  univariate <- noquote(t(univariate_2))
  colnames(univariate) <- c("W","p-value","UV.Normality")
  return(univariate)
}

## Simulate Null Distribution of univariate SW Test Statistic ##
Dsn_usw <- function(n, p, B) {
  s <- rep(0,B)
  for(i in 1:B) {
    s[i] <- as.numeric(msw(matrix(rnorm(n*p), n))$uv.shapiro[1])
  }
  return(s)
}

Try the mvnormalTest package in your browser

Any scripts or data that you put into this service are public.

mvnormalTest documentation built on April 28, 2020, 5:06 p.m.