R/Wilks.test.R
In rrcov: Scalable Robust Estimators with High Breakdown Point

Documented in Wilks.test Wilks.test.data.frame Wilks.test.default Wilks.test.formula Wilks.test.matrix

Wilks.test <- function(x, ...) UseMethod("Wilks.test")

#' @export
Wilks.test.formula <- function(formula, data, ..., subset, na.action)
{
    m <- match.call(expand.dots = FALSE)
    m$... <- NULL
    m[[1]] <- as.name("model.frame")
    m <- eval.parent(m)
    if (.check_vars_numeric(m))
        stop("Wilks test applies only to numerical variables")

    Terms <- attr(m, "terms")
    grouping <- model.response(m)
    x <- model.matrix(Terms, m)

    xint <- match("(Intercept)", colnames(x), nomatch=0)
    if(xint > 0) x <- x[, -xint, drop=FALSE]

    res <- Wilks.test.default(x, grouping, ...)
    res$terms <- Terms

    ## fix up call to refer to the generic, but leave arg name as `formula'
    cl <- match.call()
    cl[[1]] <- as.name("Wilks.test")
    res$call <- cl
    res$contrasts <- attr(x, "contrasts")
    res$xlevels <- .getXlevels(Terms, m)
    res$na.action <- attr(m, "na.action")
    res
}

#' @export
Wilks.test.data.frame <- function(x, ...)
{
    res <- Wilks.test(structure(data.matrix(x), class="matrix"), ...)
    cl <- match.call()
    cl[[1]] <- as.name("Wilks.test")
    res$call <- cl
    res
}

#' @export
Wilks.test.matrix <- function(x, grouping, ..., subset, na.action)
{
    if(!missing(subset)) {
        x <- x[subset, , drop = FALSE]
        grouping <- grouping[subset]
    }
    if(!missing(na.action)) {
        dfr <- na.action(structure(list(g = grouping, x = x),
                                   class = "data.frame"))
        grouping <- dfr$g
        x <- dfr$x
    }

    res <- Wilks.test.default(x, grouping, ...)
    cl <- match.call()
    cl[[1]] <- as.name("Wilks.test")
    res$call <- cl
    res
}

##
## Default S3 method for Wilks.test
##
##  x   - matrix or data frame
##  grp - grouping variable - factor specifying the class for each observation
##  approximation   -
##  method  - "c" for standard estimators of the mean and variance, "mcd" for robust estimates based on MCD.
##  xq  - used only when method="mcd". Multiplication factor for the approximate Chi2 distribution. If NULL
##          simulation will be performed to determine xq and xd (default is NULL)
##  xd  - used only when method="mcd". Degrees of freedom for the approximate Chi2 distribution. If NULL
##          simulation will be performed to determine xq and xd (default is NULL)
##  nrep - number of trials for the simulation for estimating xq and xd (default is 3000)
##  trace   -   whether to provide trace output (default is FALSE)
##

#' @export
Wilks.test.default <- function(x,
                            grouping,
                            method=c("c", "mcd", "rank"),
                            approximation=c("Bartlett", "Rao", "empirical"), 
                            xd=NULL, xq=NULL, xfn = NULL, xwl=NULL, 
                            nrep=3000,
                            trace=FALSE, ...){

    alpha <- 0.5
##    approximation <- "Bartlett"
##    approximation <- "empirical"

    cl <- match.call()
    method <- match.arg(method)
    approximation <- match.arg(approximation)
    dname <- deparse(substitute(x))

    if(is.data.frame(x))
        x <- data.matrix(x)
    else if (!is.matrix(x))
        x <- matrix(x, length(x), 1,
            dimnames = list(names(x), deparse(substitute(x))))

    ## drop all rows with missing values (!!) :
    na.x <- !is.finite(x %*% rep(1, ncol(x)))
    ok <- !na.x
    x <- x[ok, , drop = FALSE]
    dx <- dim(x)
    if(!length(dx))
        stop("All observations have missing values!")

    grouping <- grouping[ok]

    n <- nrow(x)
    p <- ncol(x)

    if(method == "rank")
        x <- apply(x, 2, rank)

    ww <- .wilks(x, grouping, method, alpha=alpha)
    nk <- ww$nk
    ng <- length(nk)
    wilks <- ww$wilks

    METHOD = NULL
    PARAMETER = NULL
    if(approximation == "Bartlett") {

        ## Bartlett's ChiSq approximation
        Y <- log(wilks)
        if(method == "mcd"){
            if(missing(xd) || is.null(xd) || missing(xq) || is.null(xq)){
                xqd <- simulateChi2(nrep=nrep, nk, p, trace=trace, alpha=alpha)
                xd <- xqd$xd
                xq <- xqd$xq
                xfn <- xqd$xfn
                xwl <- xqd$xwl
            }
            STATISTIC <- wilks
            chi2.value <- Y/xd
            df <- xq
            names(STATISTIC) <- "Wilks' Lambda"     # "Chi2-approximation (simulated)"
            METHOD <- "Robust One-way MANOVA (Bartlett Chi2)"
        }else{
            STATISTIC <- wilks
            chi2.value <-  -(n - 1 - (p+ng)/2)*Y
            df <- p*(ng-1)
            names(STATISTIC) <- "Wilks' Lambda"     # "Chi2-approximation"
            METHOD <- "One-way MANOVA (Bartlett Chi2)"
        }
        PVAL <- 1-pchisq(chi2.value, df)
        PARAMETER <- c(chi2.value, df)
        names(PARAMETER) <- c("Chi2-Value", "DF")
    }else if(approximation == "Rao"){
## Rao's F approximation
        t1 <- p*p*(ng-1)*(ng-1) - 4
        t2 <- p*p + (ng-1)*(ng-1) - 5
        t <- if(t1==0 | t2 == 0) 1 else sqrt(t1/t2)
##      t <- sqrt((p*p*(ng-1)*(ng-1) - 4)/(p*p + (ng-1)*(ng-1) - 5))

        df1 <- p*(ng-1)
        df2 <- t*((n-1) - (p + ng)/2) - (p*(ng-1) - 2)/2        #   instead of n - the total number of observations - use
                                                                #   the sum of weights ?
        Y <- wilks^(1/t)
        STATISTIC <- df2*(1-Y)/(df1*Y)
        PVAL <- 1-pf(STATISTIC, df1, df2)
        PARAMETER <- c(df1,df2)
        names(STATISTIC) <- "F-approximation"
        names(PARAMETER) <- c("DF1", "DF2")
    }else if(approximation == "empirical") {
        Y <- log(wilks)
        if(method == "mcd"){
            if(missing(xd) || is.null(xd) || missing(xq) || is.null(xq)){
                xqd <- simulateChi2(nrep=nrep, nk, p, trace=trace, alpha=alpha)
                xd <- xqd$xd
                xq <- xqd$xq
                xfn <- xqd$xfn
                xwl <- xqd$xwl
            }
            STATISTIC <- wilks
            METHOD <- "Robust One-way MANOVA (empirical distribution)"
            PVAL <- xfn(wilks)

        }else{
            STATISTIC <- wilks
            chi2.value <-  -(n - 1 - (p+ng)/2)*Y
            df <- p*(ng-1)
            names(STATISTIC) <- "Wilks' Lambda"     # "Chi2-approximation"
            METHOD <- "One-way MANOVA (Bartlett Chi2)"
            PVAL <- 1-pchisq(chi2.value, df)
            PARAMETER <- c(chi2.value, df)
            names(PARAMETER) <- c("Chi2-Value", "DF")
        }
    }

    ans <- list(statistic=STATISTIC,
                parameter=PARAMETER,
                p.value=PVAL,
                estimate=ww$group.means,
                method=METHOD,
                data.name=dname,
                W=ww$W,
                T=ww$T,
                wilks=wilks,
                xd=xd,
                xq=xq,
                xfn=xfn,
                xwl=xwl)

    cl[[1]] <- as.name("Wilks.test")
    ans$call <- cl
    class(ans) <- "htest"
    ans
}

covMWcd <-function(x, grouping, alpha=0.75){

    ## compute group means, pool the observations and compute the common
    ##  covariance matrix
    covMWcd.B <- function(){
        group.means <- matrix(0, ng, p)
        for(i in 1:ng){
            mcd <- CovMcd(x[which(grouping == lev[i]),], alpha=alpha)
            group.means[i,] <- getCenter(mcd)
        }

        mcd <- CovMcd(x - group.means[g,], alpha=alpha)

        ans <- list(center=group.means, wcov=getCov(mcd), mah=getDistance(mcd), wt=mcd@wt)

        class(ans) <- "mwcd"
        attr(ans, "call") <- sys.call()
        return(ans)
    }

    x <-as.matrix(x)
    p <- ncol(x)
    n <- nrow(x)

    g <- as.factor(grouping)
    lev <- levels(g)
    ng <- length(lev)

    covMWcd.B()
 }

##
##  Find by simulation the parameters 'd' and 'q' of the approximate Chi2 distribution
##  for the robust Wilks Lambda test based on MCD
##  for a given p, g and n=\sum ni
##
##  nrep    - number of trials (3000)
##  nk      - array of integers giving the group sizes, e.g. nk=c(20, 20)
##  p       - dimension
##
simulateChi2 <- function(nrep=3000, nk, p, trace=FALSE, alpha=0.75){
    if(missing(nk))
        stop("The number and size of groups must be provided!")
    if(missing (p))
        stop("The dimensipon 'p' must be provided")

    if(trace)
        cat("\nFind approximate distribution...\n")

    ptm <- proc.time()

    n <- sum(nk)
    ng <- length(nk)
    grp <- as.factor(rep(1:ng, nk))
    wl <- rwl <- vector(mode = "numeric", length = nrep)

    for(i in 1:nrep){
#        x <- as.matrix(mvrnorm(n=n, mu=rep(0,p), Sigma=diag(rep(1,p))))
        x <- matrix(rnorm(n*p), ncol=p)

        tt <- .wilks(x, grp, method="c", alpha=alpha)
        rtt <- .wilks(x, grp, method="mcd", alpha=alpha)

        wl[i] <- tt$wilks
        rwl[i] <- rtt$wilks
    }

    Y <- log(wl)
    RY <- log(rwl)

    xmean <- mean(Y)
    xvar <- var(Y)
    xq <- 2*xmean*xmean/xvar
    xd <- xmean/xq

    rxmean <- mean(RY)
    rxvar <- var(RY)
    rxq <- 2*rxmean*rxmean/rxvar
    rxd <- rxmean/rxq
    rxfn <- ecdf(rwl)

    if(trace){
        cat(sprintf('\n  nrep=%5.0f p=%4.0f ng=%2.0f n=%5.0f', nrep,p,ng,n))
        cat(sprintf('\n  alpha=%5.2f', alpha))
        cat(sprintf('\n         mean   var    d     q'))
        cat(sprintf('\n                      %5.3f %5.3f', -1/(n-1-(p*ng)/2), p*(ng-1)))
        cat(sprintf('\n  WILKS: %5.3f %5.3f %5.3f %5.3f', xmean, xvar, xd, xq))
        cat(sprintf('\nR WILKS: %5.3f %5.3f %5.3f %5.3f', rxmean, rxvar, rxd, rxq))
        cat("\n")
        cat(" Elapsed time: ", (proc.time() - ptm)[1],"\n")
    }
    invisible(list(xd=rxd, xq=rxq, xfn=rxfn, xwl=rwl))
}

.wilks <- function(x, grouping, method, alpha=0.75){
    n <- nrow(x)
    p <- ncol(x)
    if(!is.factor(g <- grouping))
        g <- as.factor(grouping)

    ## get rid of empty groups
    grouping <- g <- as.factor(as.character(g))

    lev <- levels(g)
    counts <- as.vector(table(g))
    if(any(counts == 0)) {
        stop(paste("group(s)", paste(lev[counts == 0], collapse=" "),"are empty"))
    }
    ng <- length(counts)

    if(method == "c" | method == "rank"){
        wts <- rep(1, n)
    }else if(method == "mcd"){
        mcd <- covMWcd(x, grouping=grouping, alpha=alpha)
        wts <- mcd$wt
    }else
        stop("Undefined method: ", method)

    group.means <- matrix(0,ng,p)
    for(i in 1:ng){
        group.means[i,] <- cov.wt(x[which(grouping == lev[i]),], wt=wts[which(grouping == lev[i])])$center
    }

    wcross <- cov.wt((x - group.means[g, ]), wt=wts)
    wcross <- (sum(wts)-1) * wcross$cov
    tcross <- cov.wt(x, wt=wts)
    tcross <- (sum(wts)-1) * tcross$cov

    wilks <- det(wcross)/det(tcross)
    names(wilks) <- "Wilks' Lambda"
    names(group.means) <- names(x)

    dimnames(group.means) <- list(lev, dimnames(x)[[2]])

    list(nk=counts,
         wilks=wilks,
         W=wcross,
         T=tcross,
         group.means=group.means)
}

.wilksx <- function(x, grouping, method, alpha=0.75){
    n <- nrow(x)
    p <- ncol(x)
    if(!is.factor(g <- grouping))
        g <- as.factor(grouping)

    ## get rid of empty groups
    g <- as.factor(as.character(g))
    lev <- levels(g)
    counts <- as.vector(table(g))
    if(any(counts == 0)) {
        stop(paste("group(s)", paste(lev[counts == 0], collapse=" "),"are empty"))
    }
    ng <- length(counts)

    if(method == "c" | method == "rank"){
        wts <- rep(1, n)
    }else if(method == "mcd"){
        mcd <- covMWcd(x, grouping=grouping, alpha=alpha)
        wts <- mcd$wt
    }else
        stop("Undefined method: ", method)

    group.means <- matrix(0,ng,p)
    for(i in 1:ng){
        group.means[i,] <- cov.wt(x[which(grouping == lev[i]),], wt=wts[which(grouping == lev[i])])$center
    }

    wcross <-cov.wt((x - group.means[g, ]), wt=wts)
    wcross <- (sum(wts)-1) * wcross$cov
    tcross <- cov.wt(x, wt=wts)
    tcross <- (sum(wts)-1) * tcross$cov

    wilks <- det(wcross)/det(tcross)
    names(wilks) <- "Wilks' Lambda"
    names(group.means) <- names(x)

    dimnames(group.means) <- list(lev, dimnames(x)[[2]])

    list(nk=counts,
         wilks=wilks,
         W=wcross,
         T=tcross,
         group.means=group.means)
}

Any scripts or data that you put into this service are public.

rrcov documentation built on June 8, 2025, 12:38 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

rrcov
Scalable Robust Estimators with High Breakdown Point

R/Wilks.test.R
In rrcov: Scalable Robust Estimators with High Breakdown Point

Defines functions .wilksx .wilks simulateChi2 covMWcd Wilks.test.default Wilks.test.matrix Wilks.test.data.frame Wilks.test.formula Wilks.test

Documented in Wilks.test Wilks.test.data.frame Wilks.test.default Wilks.test.formula Wilks.test.matrix

Try the rrcov package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

rrcov Scalable Robust Estimators with High Breakdown Point

R/Wilks.test.R In rrcov: Scalable Robust Estimators with High Breakdown Point

Defines functions .wilksx .wilks simulateChi2 covMWcd Wilks.test.default Wilks.test.matrix Wilks.test.data.frame Wilks.test.formula Wilks.test

Documented in Wilks.test Wilks.test.data.frame Wilks.test.default Wilks.test.formula Wilks.test.matrix

Try the rrcov package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

rrcov
Scalable Robust Estimators with High Breakdown Point

R/Wilks.test.R
In rrcov: Scalable Robust Estimators with High Breakdown Point