R/dcov.R

Defines functions DCOR dcor dcov .dcov dcor.test dcov.test

Documented in dcor DCOR dcor.test dcov dcov.test

dcov.test <-
function(x, y, index=1.0, R=NULL) {
    ## check for valid number of replicates R
    method <- "Specify the number of replicates R (R > 0) for an independence test"
    if (! is.null(R)) {
      R <- floor(R)
      if (R < 1) R <- 0
      if (R > 0) 
        method <- "dCov independence test (permutation test)"
    } else {
      R <- 0
    }

    # distance covariance test for multivariate independence
    if (!inherits(x, "dist")) x <- dist(x)
    if (!inherits(y, "dist")) y <- dist(y)
    x <- as.matrix(x)
    y <- as.matrix(y)
    dst <- TRUE
    n <- nrow(x)
    m <- nrow(y)
    if (n != m) stop("Sample sizes must agree")
    if (! (all(is.finite(c(x, y)))))
        stop("Data contains missing or infinite values")
    
    stat <- dcorr <- reps <- 0
    dcov <- rep(0, 4)
    if (R > 0) reps <- rep(0, R)
    pval <- 1
    dims <- c(n, ncol(x), ncol(y), dst, R)

    # dcov = [dCov,dCor,dVar(x),dVar(y)]
    a <- .C("dCOVtest",
            x = as.double(t(x)),
            y = as.double(t(y)),
            byrow = as.integer(TRUE),
            dims = as.integer(dims),
            index = as.double(index),
            reps = as.double(reps),
            DCOV = as.double(dcov),
            pval = as.double(pval),
            PACKAGE = "energy")
    # test statistic is n times the square of dCov statistic
    stat <- n * a$DCOV[1]^2
    dcorr <- a$DCOV
    V <- dcorr[[1]]
    names(stat) <- "nV^2"
    names(V) <- "dCov"
    dataname <- paste("index ", index, ", replicates ", R, sep="")
    pval <- ifelse (R < 1, NA, a$pval)
    e <- list(
        statistic = stat,
        method = method,
        estimate = V,
        estimates = dcorr,
        p.value = pval,
        replicates = n* a$reps^2,
        n = n,
        data.name = dataname)
    class(e) <- "htest"
    return(e)
}

dcor.test <-
  function(x, y, index=1.0, R) {
    # distance correlation test for multivariate independence
    # like dcov.test but using dcor as the test statistic
    if (missing(R)) R <- 0
    R <- ifelse(R > 0, floor(R), 0)
    RESULT <- dcov.test(x, y, index=1.0, R) 
    # this test statistic is n times the square of dCov statistic
    DCOVteststat <- RESULT$statistic
    DCOVreplicates <- RESULT$replicates
    
    # RESULT$estimates = [dCov,dCor,dVar(x),dVar(y)]
    # dVar are invariant under permutation of sample indices
    estimates = RESULT$estimates
    names(estimates) <- c("dCov", "dCor", "dVar(X)", "dVar(Y)")
    
    DCORteststat <- RESULT$estimates[2]
    dvarX <- RESULT$estimates[3]
    dvarY <- RESULT$estimates[4]
    n <- RESULT$n
    
    if (R > 0) {
      DCORreps <- sqrt(DCOVreplicates / n) / sqrt(dvarX * dvarY)
      p.value <- (1 + sum(DCORreps >= DCORteststat)) / (1 + R)
    } else {
      p.value <- NA
      DCORreps <- NA
    }
    
    names(DCORteststat) <- "dCor"
    dataname <- paste("index ", index, ", replicates ", R, sep="")
    method <- ifelse(R > 0, "dCor independence test (permutation test)", 
                     "Specify the number of replicates R>0 for an independence test")
    e <- list(
      method = method,
      statistic = DCORteststat,
      estimates = estimates,
      p.value = p.value,
      replicates = DCORreps,
      n = n,
      data.name = dataname)
    class(e) <- "htest"
    return(e)
  }


.dcov <-
function(x, y, index=1.0) {
    # distance covariance statistic for independence
    # dcov = [dCov,dCor,dVar(x),dVar(y)]   (vector)
    # this function provides the fast method for computing dCov
    # it is called by the dcov and dcor functions
    if (!inherits(x, "dist")) x <- dist(x)
    if (!inherits(y, "dist")) y <- dist(y)
    x <- as.matrix(x)
    y <- as.matrix(y)
    dst <- TRUE
    n <- nrow(x)
    m <- nrow(y)
    if (n != m) stop("Sample sizes must agree")
    if (! (all(is.finite(c(x, y)))))
        stop("Data contains missing or infinite values")
    dims <- c(n, NCOL(x), NCOL(y), dst)
    idx <- 1:dims[1]
    DCOV <- numeric(4)
    a <- .C("dCOV",
            x = as.double(t(x)),
            y = as.double(t(y)),
            byrow = as.integer(TRUE),
            dims = as.integer(dims),
            index = as.double(index),
            idx = as.double(idx),
            DCOV = as.double(DCOV),
            PACKAGE = "energy")
    return(a$DCOV)
}

dcov <-
function(x, y, index=1.0) {
    # distance correlation statistic for independence
    return(.dcov(x, y, index)[1])
}

dcor <-
function(x, y, index=1.0) {
    # distance correlation statistic for independence
    return(.dcov(x, y, index)[2])
}



DCOR <-
function(x, y, index=1.0) {
    # distance covariance and correlation statistics
    # alternate method, implemented in R without .C call
    # this method is usually slower than the C version
  
    if (!inherits(x, "dist")) x <- dist(x)
    if (!inherits(y, "dist")) y <- dist(y)
    x <- as.matrix(x)
    y <- as.matrix(y)
    n <- nrow(x)
    m <- nrow(y)
    if (n != m) stop("Sample sizes must agree")
    if (! (all(is.finite(c(x, y)))))
        stop("Data contains missing or infinite values")
    if (index < 0 || index > 2) {
        warning("index must be in [0,2), using default index=1")
        index=1.0}

    stat <- 0
    dims <- c(n, ncol(x), ncol(y))

    Akl <- function(x) {
        d <- as.matrix(x)^index
        m <- rowMeans(d)
        M <- mean(d)
        a <- sweep(d, 1, m)
        b <- sweep(a, 2, m)
        return(b + M)
    }

    A <- Akl(x)
    B <- Akl(y)
    dCov <- sqrt(mean(A * B))
    dVarX <- sqrt(mean(A * A))
    dVarY <- sqrt(mean(B * B))
    V <- sqrt(dVarX * dVarY)
    if (V > 0)
      dCor <- dCov / V else dCor <- 0
    return(list(dCov=dCov, dCor=dCor, dVarX=dVarX, dVarY=dVarY))
}

Try the energy package in your browser

Any scripts or data that you put into this service are public.

energy documentation built on Feb. 22, 2021, 5:08 p.m.