R/indepTests.R
In copula: Multivariate Dependence with Copulas

Documented in dependogram indepTest indepTestSim multIndepTest multSerialIndepTest serialIndepTest serialIndepTestSim

## Copyright (C) 2012 Marius Hofert, Ivan Kojadinovic, Martin Maechler, and Jun Yan
##
## This program is free software; you can redistribute it and/or modify it under
## the terms of the GNU General Public License as published by the Free Software
## Foundation; either version 3 of the License, or (at your option) any later
## version.
##
## This program is distributed in the hope that it will be useful, but WITHOUT
## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
## FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
## details.
##
## You should have received a copy of the GNU General Public License along with
## this program; if not, see <http://www.gnu.org/licenses/>.

##' Computes the sum of binomial coefficients
binom.sum <- function(n,k) sum(choose(n, 0:k))

##' Multivariate independence test based on the empirical
##' copula process as proposed by Christian Genest and Bruno
##' Rémillard (2004), Test 13:2, pages 335-369.
##'
##' @title Simulate the distribution of TA for A of cardinality 2 to m under independence
##' @param n sample size
##' @param p dimension
##' @param m up to subsets of cardinality m
##' @param N number of simulations
##' @param verbose display progress bar if TRUE
##' @return an object of class 'indepTestDist'
##' @author Ivan Kojadinovic
indepTestSim <- function(n, p, m=p, N=1000, verbose = interactive())
{
    if (!is.numeric(n) || (n <- as.integer(n)) < 2)
        stop("n should be an integer greater than 2")
    if (!is.numeric(p) || (p <- as.integer(p)) < 2)
        stop("p should be an integer greater than 2")
    if (!is.numeric(m) || (m <- as.integer(m)) < 2 || m > p)
        stop(paste("m should be an integer greater than 2 and smaller than",p))
    if (!is.numeric(N) || (N != as.integer(N)) || N <= 5)
	stop("N should be an integer greater than 5")
    if(N < 100 && getOption("copula:warn.idTS", TRUE))
	warning("N should be at least 100")

    sb <- binom.sum(p,m)

    r. <- .C(simulate_empirical_copula,
             n = as.integer(n),
             N = as.integer(N),
             p = as.integer(p),
             m = as.integer(m),
             TA0 = double(N * (sb - p - 1)),
             G0 = double(N),
             subsets = integer(sb),
             subsets.char = character(sb),
             fisher0 = double(N),
             tippett0 = double(N),
             as.integer(verbose))

    structure(class = "indepTestDist",
              list(sample.size = n,
                   data.dimension = p,
                   max.card.subsets = m,
                   number.repetitions = N,
                   subsets = r.$subsets.char[(p+2):sb],
                   subsets.binary = r.$subsets[(p+2):sb],
                   dist.statistics.independence = matrix(r.$TA0,N,sb-p-1),
                   dist.global.statistic.independence = r.$G0,
                   dist.fisher.independence = r.$fisher0,
                   dist.tippett.independence = r.$tippett0))
}

##' Multivariate independence test based on the empirical
##' copula process as proposed by Christian Genest and Bruno
##' Rémillard (2004), Test 13:2, pages 335-369.
##'
##' @title Test in itself
##' @param x the data
##' @param d an object of class 'indepTestDist'
##' @param alpha
##' @return an object of class 'indepTest'
##' @author Ivan Kojadinovic
indepTest <- function(x, d, alpha=0.05)
{
    if (!is.numeric(x <- as.matrix(x)))
        stop("data should be numerical")
    if (anyNA(x))
        stop("data cannot contain missing values")
    if (nrow(x) < 2)
        stop("data should contain more than 2 rows")
    if (!inherits(d, "indepTestDist"))
	stop("d should be obtained by indepTestSim()")
    if (ncol(x) != d$data.dimension)
      stop("d was not obtained from simulations based on data whose dimension is equal to that of x")
    if (nrow(x) != d$sample.size)
      warning("d was not obtained from simulations based on data whose size is equal to that of x")
    if (!is.numeric(alpha) || alpha <= 0 || alpha >= 1)
        stop("the significance level alpha is not properly set")

    m <- d$max.card.subsets
    p <- ncol(x)
    N <- as.integer(d$number.repetitions)

    ## transform data to ranks
    x <- apply(x,2,rank)

    sb <- binom.sum(p,m)

    ## perform test
    r. <- .C(empirical_copula_test,
             as.double(x),
             nrow(x),
             as.integer(p),
             as.integer(m),
             as.double(d$dist.statistics.independence),
             as.double(d$dist.global.statistic.independence),
             as.integer(N),
             as.integer(d$subsets.binary),
             TA = double(sb - p - 1),
             G = double(1),
             pval = double(sb - p - 1),
             fisher = double(1),
             tippett = double(1),
             globpval = double(1),
             as.double(d$dist.fisher.independence),
             as.double(d$dist.tippett.independence))

    ## compute critical values at the alpha level
    beta <- (1 - alpha)^(1 / (sb - p -1))
    critical <- numeric(0)
    from <- 1
    for (j in 2:m)
    {
        to <- from + choose(p,j) - 1
        crit <- sort(as.double(d$dist.statistics.independence[,from:to]))[round(beta * N * (to - from + 1))]
        critical <- c(critical,rep(crit,choose(p,j)))
        from <- to + 1
    }

    structure(class = "indepTest",
              list(subsets=d$subsets,statistics=r.$TA, critical.values=critical,
                   pvalues = r.$pval, fisher.pvalue=r.$fisher, tippett.pvalue=r.$tippett, alpha=alpha,
                   beta=beta, global.statistic=r.$G, global.statistic.pvalue=r.$globpval))
}


##' Serial independence test based on the empirical
##' copula process as proposed by Christian Genest and Bruno
##' Rémillard (2004), Test 13:2, pages 335-369.
##'
##' @title Simulate the distribution of the TAs for A of cardinality 2 to m
##' containing 1 under serial independence
##' @param n sample size
##' @param lag.max maximum lag
##' @param m subsets of cardinality up to m
##' @param N number of simulations
##' @param verbose display progress bar if TRUE
##' @return an object of class 'serialIndepTestDist'
##' @author Ivan Kojadinovic
serialIndepTestSim <- function(n, lag.max, m=lag.max+1, N=1000, verbose = interactive())
{
    if (!is.numeric(n) || (n <- as.integer(n)) < 2)
        stop("n should be an integer greater than 2")
    if (!is.numeric(lag.max) || (lag.max <- as.integer(lag.max)) < 1)
        stop("lag.max should be an integer greater than 1")

    p <- lag.max + 1 ## dimension of the virtual data

    if (n-p+1 < 2)
      stop("wrong number of lags with respect to the sample size")
    if (!is.numeric(m) || (m <- as.integer(m)) < 2 || m > p)
        stop(gettextf("m should be an integer greater than 2 and smaller than %d", p))
    if (!is.numeric(N) || (N != as.integer(N)) || N <= 10)
	stop("N should be an integer greater than 10")
    if(N < 100) warning("N should be at least 100")
    if (!is.numeric(lag.max) || (p <- as.integer(lag.max) + 1) <= 1 || n-p+1 < 2)
      stop("wrong number of lags")

    sb <- binom.sum(p-1,m-1)

    R <- .C(simulate_empirical_copula_serial,
             n = as.integer(n-p+1),
             N = as.integer(N),
             p = as.integer(p),
             m = as.integer(m),
             TA0 = double(N * (sb - 1)),
             G0 = double(N),
             subsets = integer(sb),
             subsets.char = character(sb),
             fisher0 = double(N),
             tippett0 = double(N),
             as.integer(verbose))

    structure(class = "serialIndepTestDist",
	      list(sample.size = n,
		   lag.max = lag.max,
		   max.card.subsets = m,
		   number.repetitions = N,
		   subsets = R $subsets.char[2:sb],
		   subsets.binary = R $subsets[2:sb],
		   dist.statistics.independence = matrix(R $TA0,N,sb-1),
		   dist.global.statistic.independence = R $G0,
		   dist.fisher.independence = R $fisher0,
		   dist.tippett.independence = R $tippett0))
} ## end{ serialIndepTestSim }


##' Serial independence test based on the empirical
##' copula process as proposed by Christian Genest and Bruno
##' Rémillard (2004), Test 13:2, pages 335-369.
##'
##' @title Test in itself
##' @param x the data
##' @param d an object of class 'serialIndepTestDist'
##' @param alpha the asymptotic level of the test
##' @return an object of class 'indepTest'
##' @author Ivan Kojadinovic
serialIndepTest <- function(x, d, alpha=0.05)
{
    if (!is.numeric(x))
      stop("data should be numerical")
    x <- as.numeric(x)
    if (anyNA(x))
      stop("data cannot contain missing values")
    if (length(x) < 2)
      stop("data should contain more than 2 observations")
    if (!inherits(d, "serialIndepTestDist"))
      stop("d should result from  serialIndepTestSim()")
    n <- length(x)
    if (n != as.integer(d$sample.size))
      warning("d was not obtained from simulations based on data whose size is equal to that of x")
    if (!is.numeric(alpha) || alpha <= 0 || alpha >= 1)
      stop("the significance level alpha is not properly set")

    ## transform data to pseudo-obs
    x <- rank(x)/n

    m <- d$max.card.subsets
    p <- as.integer(d$lag.max) + 1
    n <- n - p + 1
    N <- as.integer(d$number.repetitions)

    sb <- binom.sum(p-1,m-1)

    ## perform test
    r. <- .C(empirical_copula_test_serial,
            as.double(x),
            as.integer(n),
            as.integer(p),
            as.integer(m),
            as.double(d$dist.statistics.independence),
            as.double(d$dist.global.statistic.independence),
            as.integer(N),
            as.integer(d$subsets.binary),
            TA = double(sb - 1),
            G = double(1),
            pval = double(sb - 1),
            fisher = double(1),
            tippett = double(1),
            globpval = double(1),
            as.double(d$dist.fisher.independence),
            as.double(d$dist.tippett.independence))

    ## compute critical values at the alpha level
    beta <- (1 - alpha)^(1 / (sb -1))
    critical <- numeric(0)
    from <- 1
    for (j in 1:(m-1))
    {
        to <- from + choose(p-1,j) - 1
        crit <- sort(as.double(d$dist.statistics.independence[,from:to]))[round(beta * N * (to - from + 1))]
        critical <- c(critical,rep(crit,choose(p-1,j)))
        from <- to + 1
    }

    structure(class = "indepTest",
	      list(subsets=d$subsets,statistics=r.$TA, critical.values=critical,
		   pvalues = r.$pval, fisher.pvalue=r.$fisher, tippett.pvalue=r.$tippett, alpha=alpha,
		   beta=beta, global.statistic=r.$G, global.statistic.pvalue=r.$globpval))
}

##' Independence test among random vectors based on the empirical
##' copula process
##'
##' @title
##' @param x data
##' @param d dimension of the data
##' @param m consider subsets up to cardinality m
##' @param N number of bootstrap replicates
##' @param alpha asymptotic nominal level
##' @param verbose display progress bar if TRUE
##' @return an object of class 'indepTest'
##' @author Ivan Kojadinovic
multIndepTest <- function(x, d, m=length(d), N=1000, alpha=0.05,
                          verbose = interactive())
{
    if (!is.numeric(x <- as.matrix(x)))
        stop("data should be numerical")
    if (anyNA(x))
        stop("data cannot contain missing values")
    if ((n <- nrow(x)) < 2)
        stop("data should contain more than 2 rows")
    if (!is.numeric(d) || any((d <- as.integer(d)) < 1) || sum(d) != ncol(x))
	stop("wrong vector 'd' of dimensions")

    p <- length(d)

    if (!is.numeric(m) || (m <- as.integer(m)) < 2 || m > p)
        stop(paste("m should be an integer greater than 2 and smaller than",p))
    if (!is.numeric(N) || (N != as.integer(N)) || N <= 10)
	stop("N should be an integer greater than 10")
    if(N < 100) warning("N should be at least 100")
    if (!is.numeric(alpha) || alpha <= 0 || alpha >= 1)
        stop("the significance level alpha is not properly set")

    ## transform data to pseudo-observations
    x <- apply(x,2,rank)/n

    sb <- binom.sum(p,m)

    ## block indices
    b <- c(0,cumsum(d))

    ## bootstrap
    bootstrap <- .C(bootstrap_MA_I,
                    n = as.integer(n),
                    N = as.integer(N),
                    p = as.integer(p),
                    b = as.integer(b),
                    U = as.double(x),
                    m = as.integer(m),
                    MA0 = double(N * (sb - p - 1)),
                    I0 = double(N),
                    subsets = integer(sb),
                    subsets.char = character(sb),
                    as.integer(verbose))

    subsets <- bootstrap$subsets.char[(p+2):sb]
    subsets.binary <- bootstrap$subsets[(p+2):sb]
    dist.statistics.independence <- matrix(bootstrap$MA0,N,sb-p-1)

    ## perform test
    r. <- .C(empirical_copula_test_rv,
             as.double(x),
             as.integer(n),
             as.integer(p),
             as.integer(b),
             as.integer(m),
             as.double(bootstrap$MA0),
             as.double(bootstrap$I0),
             as.integer(N),
             as.integer(subsets.binary),
             MA = double(sb - p - 1),
             I = double(1),
             pval = double(sb - p - 1),
             fisher = double(1),
             tippett = double(1),
             Ipval = double(1)
	     )

    ## compute critical values at the alpha level
    beta <- (1 - alpha)^(1 / (sb - p - 1))
    critical <- numeric(sb - p - 1)

    for (k in 1:(sb - p - 1))
      critical[k] <- sort(dist.statistics.independence[,k])[round(beta * N)]

     structure(class = "indepTest",
	       list(subsets=subsets,statistics=r.$MA, critical.values=critical,
		    pvalues = r.$pval, fisher.pvalue=r.$fisher, tippett.pvalue=r.$tippett,
		    alpha=alpha, beta=beta,
		    global.statistic=r.$I, global.statistic.pvalue=r.$Ipval))
}

##' Multivariate serial independence test based on the empirical
##' copula process -- see AISM paper for more details
##'
##' @title Multivariate serial independence test based
##' @param x the data
##' @param lag.max the maximum lag
##' @param m consider subsets of cardinality up to m
##' @param N number of permutations
##' @param alpha asymptotic level of the test
##' @param verbose display progress bar if TRUE
##' @return an object of class 'indepTest'
##' @author Ivan Kojadinovic
multSerialIndepTest <- function(x, lag.max, m=lag.max+1, N=1000, alpha=0.05,
                                verbose = interactive())
{
    if (!is.numeric(x <- as.matrix(x)))
        stop("data should be numerical")
    if (anyNA(x))
        stop("data cannot contain missing values")
    if ((n <- nrow(x)) < 2)
        stop("data should contain more than 2 rows")
    if (!is.numeric(lag.max) || (p <- as.integer(lag.max) + 1) <= 1 || n-p+1 < 2)
      stop("wrong number of lags")
    if (!is.numeric(m) || ((m <- as.integer(m)) < 2) || m > p)
        stop(paste("m should be an integer greater than 2 and smaller than",p))
    if (!is.numeric(N) || (N != as.integer(N)) || N <= 10)
	stop("N should be an integer greater than 10")
    if(N < 100) warning("N should be at least 100")
    if (!is.numeric(alpha) || alpha <= 0 || alpha >= 1)
        stop("the significance level alpha is not properly set")

    q <- ncol(x)

    ## transform data to ranks
    x <- apply(x,2,rank)/n

    n <- n-p+1 ## number of rows of p-dimensional virtual data

    sb <- binom.sum(p-1,m-1)

    ## bootstrap
    bootstrap <- .C(bootstrap_serial,
                    n = as.integer(n),
                    N = as.integer(N),
                    p = as.integer(p),
                    q = as.integer(q),
                    U = as.double(x),
                    m = as.integer(m),
                    MA0 = double(N * (sb - 1)),
                    I0 = double(N),
                    subsets = integer(sb),
                    subsets.char = character(sb),
                    as.integer(verbose))

    subsets <- bootstrap$subsets.char[2:sb]
    subsets.binary <- bootstrap$subsets[2:sb]
    dist.statistics.independence <- matrix(bootstrap$MA0,N,sb-1)

    ## perform test
    r. <- .C(empirical_copula_test_rv_serial,
             as.double(x),
             as.integer(n),
             as.integer(p),
             as.integer(q),
             as.integer(m),
             as.double(bootstrap$MA0),
             as.double(bootstrap$I0),
             as.integer(N),
             as.integer(subsets.binary),
             MA = double(sb - 1),
             I = double(1),
             pval = double(sb - 1),
             fisher = double(1),
             tippett = double(1),
             Ipval = double(1))

    ## compute critical values at the alpha level
    beta <- (1 - alpha)^(1 / (sb - 1))
    critical <- numeric(0)
    from <- 1
    for (j in 1:(m-1))
    {
        to <- from + choose(p-1,j) - 1
        crit <- sort(as.double(dist.statistics.independence[,from:to]))[round(beta * N * (to - from + 1))]
        critical <- c(critical,rep(crit,choose(p-1,j)))
        from <- to + 1
    }

    structure(class = "indepTest",
	      list(subsets=subsets,statistics=r.$MA, critical.values=critical,
		   pvalues = r.$pval, fisher.pvalue=r.$fisher, tippett.pvalue=r.$tippett, alpha=alpha,
		   beta=beta, global.statistic=r.$I, global.statistic.pvalue=r.$Ipval))
}

##' Displays a 'dependogram' from an object of class 'indepTest'
##'
##' @title Displays a 'dependogram'
##' @param test an object of class 'indepTest'
##' @param pvalues logical; if TRUE display p-values, otherwise statistics
##' @param print additional printed information
##' @return nothing
##' @author Ivan Kojadinovic
dependogram <- function(test, pvalues=FALSE, print=FALSE)
{
  if (!inherits(test, "indepTest"))
    stop("'test' should be obtained by means of the functions indepTest, multIndepTest, serialIndepTest, or multSerialIndepTest")
  stopifnot(is.logical(pvalues), is.logical(print))

  op <- par(las=3); on.exit(par(op))

  l <- length(test$statistics)
  if (pvalues)
    {
      plot(c(1,1:l),c(0,test$pvalues), type="h", ylab="p-value per subset", xlab="",
           axes=FALSE, main="Dependogram")
      axis(1, at=1:l, labels=test$subsets)
      axis(2)
      points(1:l,rep(1 - test$beta, l),pch=20)
    }
  else ## not pvalues
    {
      plot(c(1,1:l),c(0,test$statistics), type="h", ylab="statistic per subset", xlab="",
           axes=FALSE, main="Dependogram")
      axis(1, at=1:l, labels=test$subsets)
      axis(2)
      points(1:l,test$critical.values,pch=20)
    }
  if (print)
    {
      cat("The subset statistics, p-values and critical values are:\n")
      print(data.frame(subset=test$subsets, statistic=test$statistics,
                       pvalue=test$pvalues, critvalue=test$critical.values))
      cat("The critical values are such that the simultaneous acceptance region \n",
          "has probability 1 -", test$alpha, "under the null.\n")
      cat("The individual rejection probability for any statistic obtained from the Mobius \n",
          "decomposition is 1 -", test$beta, "under the null.\n\n")
    }
}


## summary and show methods for class 'indepTest'
print.indepTest <- function(x, ...)
{
  cat("\nGlobal Cramer-von Mises statistic:", x$global.statistic,
      "with p-value", x$global.statistic.pvalue, "\n")
  cat("Combined p-values from the Mobius decomposition:\n")
  cat("  ", x$fisher.pvalue, " from Fisher's rule,\n")
  cat("  ", x$tippett.pvalue, " from Tippett's rule.\n")
  invisible(x)
}
Any scripts or data that you put into this service are public.
copula documentation built on April 3, 2025, 8:50 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
copula
Multivariate Dependence with Copulas

R/indepTests.R
In copula: Multivariate Dependence with Copulas

Defines functions print.indepTest dependogram multSerialIndepTest multIndepTest serialIndepTest serialIndepTestSim indepTest indepTestSim binom.sum

Documented in dependogram indepTest indepTestSim multIndepTest multSerialIndepTest serialIndepTest serialIndepTestSim

Try the copula package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

copula Multivariate Dependence with Copulas

R/indepTests.R In copula: Multivariate Dependence with Copulas

Defines functions print.indepTest dependogram multSerialIndepTest multIndepTest serialIndepTest serialIndepTestSim indepTest indepTestSim binom.sum

Documented in dependogram indepTest indepTestSim multIndepTest multSerialIndepTest serialIndepTest serialIndepTestSim

Try the copula package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

copula
Multivariate Dependence with Copulas

R/indepTests.R
In copula: Multivariate Dependence with Copulas