R/adtest.R
In robCompositions: Compositional Data Analysis

Documented in adtest

#' Anderson-Darling Normality Tests
#' 
#' This function provides three kinds of Anderson-Darling Normality Tests
#' (Anderson and Darling, 1952).
#' 
#' Three version of the test are implemented (univariate, angle and radius
#' test) and it depends on the data which test is chosen.
#' 
#' If the data is univariate the univariate Anderson-Darling test for normality
#' is applied.
#' 
#' If the data is bivariate the angle Anderson-Darling test for normality is
#' performed out.
#' 
#' If the data is multivariate the radius Anderson-Darling test for normality
#' is used.
#' 
#' If \sQuote{locscatt} is equal to \dQuote{robust} then within the procedure,
#' robust estimates of mean and covariance are provided using \sQuote{covMcd()}
#' from package robustbase.
#' 
#' To provide estimates for the corresponding p-values, i.e. to compute the
#' probability of obtaining a result at least as extreme as the one that was
#' actually observed under the null hypothesis, we use Monte Carlo techniques
#' where we check how often the statistic of the underlying data is more
#' extreme than statistics obtained from simulated normal distributed data with
#' the same (column-wise-) mean(s) and (co)variance.
#' 
#' @param x either a numeric vector, or a data.frame, or a matrix
#' @param R Number of Monte Carlo simulations to obtain p-values
#' @param locscatt standard for classical estimates of mean and (co)variance.
#' robust for robust estimates using \sQuote{covMcd()} from package robustbase
#' @return \item{statistic }{The result of the corresponding test statistic}
#' \item{method }{The chosen method (univariate, angle or radius)}
#' \item{p.value }{p-value}
#' @note These functions are use by \code{\link{adtestWrapper}}.
#' @author Karel Hron, Matthias Templ
#' @seealso \code{\link{adtestWrapper}}
#' @references Anderson, T.W. and Darling, D.A. (1952) Asymptotic theory of
#' certain goodness-of-fit criteria based on stochastic processes. \emph{Annals
#' of Mathematical Statistics}, \bold{23} 193-212.
#' @keywords htest
#' @export
#' @importFrom MASS mvrnorm
#' @importFrom robustbase covMcd
#' @examples
#' 
#' adtest(rnorm(100))
#' data(machineOperators)
#' x <- machineOperators
#' adtest(pivotCoord(x[,1:2]))
#' adtest(pivotCoord(x[,1:3]))
#' adtest(pivotCoord(x))
#' adtest(pivotCoord(x[,1:2]), locscatt="robust")
#' 
adtest=function(x, R=1000, locscatt="standard") {   
	#DNAME <- deparse(substitute(x))	
	if( R < 1 ) stop("choose a higher value for R")
	if( R < 50 ) warning("maybe, the estimation of the p-value(s) is not accurate; choose a higher value for R")
	cv <- function(x, type) {
		classical <- function(x){
			if( (length(dim(x)) < 1) | is.vector(x) ){
	 			list(mean=mean(as.numeric(x), na.rm=TRUE), varmat=var(x, na.rm=TRUE))
			} else {
				list(mean=colMeans(x, na.rm=TRUE), varmat=cov(x))  
			}
		}

		robust <- function(x){
			if( (length(dim(x)) < 1) | is.vector(x) ){
				list(mean=median(x), varmat=mad(x)^2)
			} else {
				v <- robustbase::covMcd(x)
				list(mean=v$center, varmat=v$cov)
			}
		}
		switch(type,
				standard = classical(x),
				robust = robust(x))
	}
	
	centre <- function(x, type) {
		switch(type,
				mean = mean(as.numeric(x)),
				median = median(as.numeric(x)),
				trimmed = mean(as.numeric(x), trim = .1))
	}
    if(locscatt=="standard") location <- "mean" else location <- "median"    

	### 1-dim:
	if((length(dim(x)) < 1) | is.vector(x) ){
	  x <- as.vector(x)
	  n <- length(x)
      if (n < 8 ) 
        stop("sample size must be greater than 7")
      stat=function(x, N=n, location="mean"){
        x <- sort(x[complete.cases(x)])
		estCv <- cv(x, locscatt)
        p <- pnorm((x - estCv$mean)/sqrt(estCv$varmat))
        h <- (2 * seq(1:N) - 1) * (log(p) + log(1 - rev(p)))
        A <- (25/N^2-4/N-1)*(centre(h, location)+N)
      }
      A=stat(x, location=location)
	  estCv <- cv(x, locscatt)
	  mv <- estCv$mean
	  varmat <- estCv$varmat
      n=length(c(x))
	  p <- sapply(X=1:R, FUN=function(X,...){ stat(rnorm(n, mv, sqrt(varmat)), location=location) })
      pvalue=mean(p>A)
      RVAL <- list(statistic = c(A = A), method = "A-D univariate normality test", 
        p.value=pvalue)
    } else if (dim(x)[2] == 2){
	### 2-dim:
	  n <- nrow(x)
	  stat=function(x, N=n, location="mean"){
		estCv <- cv(x, locscatt)
		varmat <- estCv$varmat
		mu <- estCv$mean
		u <- (1/sqrt(det(varmat)))*((x[,1]-mu[1])*sqrt(varmat[2,2])-(x[,2]-mu[2])*(varmat[1,2]/sqrt(varmat[2,2])))
		v <- (x[,2]-mu[2])/sqrt(varmat[2,2])
		teta <- atan(v/u)+(1-sign(u))*pi/2+(1+sign(u))*(1-sign(v))*pi/2
		z=teta/(2*pi)
		p=sort(z)
		h <- (2 * seq(1:N) - 1) * (log(p) + log(1 - rev(p)))
		A <- -N-centre(h, location)
	  }
	A=stat(x, location=location)  
	estCv <- cv(x, locscatt)
	varmat <- estCv$varmat
	mv <- estCv$mean	
	p <- sapply(X=1:R, FUN=function(X,...){ stat(mvrnorm(n, mv, sqrt(varmat)), location=location) })
	pvalue=mean(p>A)
	RVAL <- list(statistic = c(A = A), method = "A-D bivariate normality test", 
			 p.value=pvalue)
  } else { 
	### >= 3:
		n <- nrow(x)		
		stat=function(x, N=n, location="mean"){ 
			estCv <- cv(x, locscatt)
			#varmat <- var(x)
			#mu <- apply(x,2,mean)
			u <- mahalanobis(x, center=estCv$mean, cov=estCv$varmat)
			z <- pchisq(u,ncol(x))
			p=sort(z) 
			h <- (2 * seq(1:N) - 1) * (log(p) + log(1 - rev(p)))
			A <- -N-centre(h, location)
#par(mfrow=c(1,2)); plot(p) ; plot(h)
		} 
		A=stat(x, location=location)    
		#print(paste("A =", A))
		estCv <- cv(x, locscatt)		
		# estCv$mean=colMeans(x)  #weg
		 #estCv$varmat=var(x) #weg
		p=numeric(R)
		n=nrow(x)
		p <- sapply(X=1:R, FUN=function(X,...){ stat(mvrnorm(n, estCv$mean, estCv$varmat), location=location) })
		pvalue=mean(p>A)
		RVAL <- list(statistic = A, method = "A-D radius test", 
				p.value=pvalue)
  }		
		
	class(RVAL) <- "htest"
	return(RVAL)
	
}


######################################################################
#Cramer-vom Mises
#
#cvmtest=function (x) 
#{
#    DNAME <- deparse(substitute(x))
#    x <- sort(x[complete.cases(x)])
#    n <- length(x)
#    if (n < 8) 
#        stop("sample size must be greater than 7")
#    p <- pnorm((x - mean(x))/sd(x))
#    W <- (1/(12 * n) + sum((p - (2 * seq(1:n) - 1)/(2 * n))^2))*((2*n+1)/(2*n))
#    RVAL <- list(statistic = c(W = W), method = "Cramer-von Mises normality test", 
#        data.name = DNAME)
#    class(RVAL) <- "htest"
#    return(RVAL)
#}
#
#######################################################################
##Watson test
#
#wattest=function (x) 
#{
#    DNAME <- deparse(substitute(x))
#    x <- sort(x[complete.cases(x)])
#    n <- length(x)
#    if (n < 8) 
#        stop("sample size must be greater than 7")
#    p <- pnorm((x - mean(x))/sd(x))
#    W <- (1/(12 * n) + sum((p - (2 * seq(1:n) - 1)/(2 * n))^2))*((2*n+1)/(2*n))
#    WW <- W-((2*n+1)/2)*(mean(p)-1/2)^2
#    RVAL <- list(statistic = c(WW = WW), method = "Watson normality test", 
#        data.name = DNAME)
#    class(RVAL) <- "htest"
#    return(RVAL)
#}