Using the FDR controlling test procedure of Bodnar, Dickhaus (2014)"

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library("pbapply")
library("MHTcop")

This vignette demonstrates the usage of the FDR controlling procedure of T. Bodnar and T. Dickhaus (2014). The paper is available at https://projecteuclid.org/euclid.ejs/1414588192. All following references to equations and theorems refer to this paper.

Performing FDR controlling tests using ac_fdr.test

Consider, for example, p-values $P_i$ given by $$ \begin{aligned} P_i&=\begin{cases} U_i&\text{for }i=1,\cdots,m_0\ \Phi\left(\mu_i+\Phi^{-1}(U_i)\right)&\text{for }i=m_0+1,\cdots,m \end{cases} \end{aligned} $$ where $U=(U_1,\cdots,U_m)^T$ is a vector of marginally uniformly distributed random variables following a Clayton copula with parameter $\eta=1$. These can be generated as follows:

library(copula)
set.seed(1)
m <- 20
m0 <- 0.8*m
p_values <- rCopula(1,onacopulaL(copClayton,list(1,1:20)))
mu<-runif(m-m0, min=-1, max=-1/2)
p_values[1,(m0+1):m]<-pnorm(sqrt(m)*mu+qnorm(p_values[(m0+1):m]),0,1)

The FDR controlled simultaneous test of the hypotheses $H_i:\mu_i=0$ versus the alternatives $K_i:\mu_i<0$ can then be performed in terms of these p-values:

ac_fdr.test(p_values,setTheta(copClayton,1),m0,0.05,1e4)$test

For a discussion of the methods than can be utilised for empirical copula calibration (i.e. estimating the copula parameter from observations) see section 5 of T. Bodnar and T. Dickhaus (2014).

Generating Figures 1 and 5

set.seed(0);
calc_F_zst <- function(m,theta,cop,z_star,fname) {
  fname <- paste('./sim_data', fname, sep="/") 
  cat("Calculating F_Z(z_star) for the",cop@name,"copula and m =",m,"\n")
  if(!file.exists(fname)) {
    res <- list(theta=theta,F_zst=pbsapply(theta,function(theta) {
        X <- cdf.Z(copula::setTheta(cop,theta),z_star(m,0.05,theta))
      }))
    saveRDS(res,fname)
    res
  } else {
    readRDS(fname)
  }
}

plot_F_zst <- function(data) {
  theta <- data$theta
  F_zst <- data$F_zst
  plot(theta,F_zst, main="",  xlab=(expression(eta)), ylab=(expression(F[Z](z~"*"))), axes=FALSE,frame=TRUE, type="n", lty=1, lwd = 2, col = "black")
  lines(theta,F_zst,lty=1, lwd = 2, col = "black")
  axis(side=1, at=seq(0, 20, 1), label=seq(0, 20, 1), lwd=1)
  axis(side=2, at=seq(0.0, 1.0, 0.1), label=seq(0.0, 1.0, 0.1), lwd=1)

  plot(theta[theta>=5],F_zst[theta>=5], main="",  xlab=(expression(eta)), ylab=(expression(F[Z](z~"*"))), axes=FALSE,frame=TRUE, type="n", lty=1, lwd = 2, col = "black")
  lines(theta[theta>=5],F_zst[theta>=5],lty=1, lwd = 2, col = "black")
  axis(side=1, at=seq(0, 20, 1), label=seq(0, 20, 1), lwd=1)
  e <- floor(log10(max(F_zst[theta>=5])))-1
  ticks <- do.call(seq,as.list(c(round(range(F_zst[theta>=5]),-e),10^e)))
  axis(side=2, at=ticks, label=ticks, lwd=1)
}

The distribution function at $z$ can be evaluated by

cop <- copula::copClayton
theta <- 2
z <- 1
cdf.Z(copula::setTheta(cop,theta),z)

where cop is a copula object (i.e. copula::copClayton). Thus for the simulation it is only necessary to specify the calculation of $z^*$. For the Clayton copula this is given by

z_star_Clayton <- function(m,q,eta) {
  log(m) / ((m/q)^eta - (1/q)^eta)
}

Note that, to be compatible with the copula-package, this example uses a generator that differs from the one used in the paper (cf. page 2218). The generator used for the Clayton copula is given by $\psi(x)=(1+x)^{-1/\eta}$ which leads to qualitatively similar results to those obtained in the paper.

For the Gumbel copula $z^*$ can be calculated by

z_star_Gumbel <- function(m,q,eta) {
  log(m) / ((-log(q/m))^eta-(-log(q))^eta)
}

Note: Since z_star_Gumbel is very small for larger theta (for example z_star_Gumbel(200,0.05,15) is of the same magnitude as $10^{-14}$) a Monte Carlo approximation to the distribution function is utilized for larger theta. In the paper this was done for all theta.

To see the full code for generating the figures execute the following code:

v <- vignette('fdr-test',package = 'MHTcop')
file.edit(paste(v$Dir,'doc',v$R,sep='/'))

Figure 1 (for the Clayton copula)

Figure 1 a)

theta <- seq(10^(-20),20+10^(-20),0.1)
cat("Generating Figure 1 a)\n")
plot_F_zst(calc_F_zst(20,theta,copula::copClayton,z_star_Clayton,'fdr_figure1a.rds'));

Figure 1 b)

cat("Generating Figure 1 b)\n")
plot_F_zst(calc_F_zst(200,theta,copula::copClayton,z_star_Clayton,'fdr_figure1b.rds'));

Figure 5 (for the Gumbel copula)

Figure 5 a)

theta<- seq(1+10^(-15),20+10^(-15),0.1)

cat("Generating Figure 5 a)\n")
plot_F_zst(calc_F_zst(20,theta,copula::copGumbel,z_star_Gumbel,'fdr_figure5a.rds'));

Figure 5 b)

cat("Generating Figure 5 b)\n")
plot_F_zst(calc_F_zst(200,theta,copula::copGumbel,z_star_Gumbel,'fdr_figure5b.rds'));

Performing the simulations for Figures 2a), 3a), 4a) and Figures 6a), 7a), 8a)

plotBounds <- function(boundsData,sim,figTitle) {
  bounds <- boundsData$lowerBound
  bounds[1] <- 0.8*min(boundsData$lowerBound)
  bounds[2] <- 1.2*boundsData$upperBound
  plot(theta,bounds, main="",  xlab=(expression(eta)), ylab="FDR", axes=FALSE,frame=TRUE, type="n", lty=1, lwd = 2, col = "black")
  title(main=paste("FDR, ",figTitle,"Copula"))
  lines(theta,rep.int(boundsData$upperBound,length(theta)),lty=2, lwd = 2, col = "red")
  lines(theta,boundsData$sharperUpperBound,lty=1, lwd = 2, col = "black")
  lines(theta,boundsData$lowerBound,lty=2, lwd = 2, col = "blue")
  lines(theta,sim$FDR, lwd = 2, col = "darkgreen")
  axis(side=1, at=seq(0, 20, 1), label=seq(0, 20, 1), lwd=1)
  axis(side=2, at=seq(0.0, 0.05, 0.01), label=seq(0.0, 0.05, 0.01), lwd=1)
  legend(10, 0.02, c("Upper Bound","Sharper Upper Bound","Lower Bound","Simulated Values"), lty=c(2,1,2,1), lwd = c(2,2,2,2), col = c("red","black","blue","darkgreen"), bg = "white")
}

plotPower <- function(sim,figTitle) {
  plot(c(theta,theta),c(sim$empiricalPowerImproved,sim$empiricalPower), main="",  xlab=(expression(eta)), ylab="Average Power", axes=FALSE,frame=TRUE, type="n", lty=1, lwd = 2, col = "black")
  title(main=paste("Average Power,",figTitle,"Copula"))
  lines(theta,sim$empiricalPowerImproved,lty=1, lwd = 2, col = "black")
  lines(theta,sim$empiricalPower,lty=1, lwd = 2, col = "red")
  axis(side=1, at=seq(0, 20, 1), label=seq(0, 20, 1), lwd=1)
  axis(side=2, at=seq(0.5, 1.0, 0.01), label=seq(0.5, 1.0, 0.01), lwd=1)
  legend("topright", c("improved LSU","traditional LSU"), lty=c(1,1), lwd = c(2,2), col = c("black","red"), bg = "white")
}

plotFDRsd <- function(boundsData,sim,figTitle) {
  FDR_ub_sd<-sqrt(abs(boundsData$sharperUpperBound.var))
  FDR_fig_b <- FDR_ub_sd
  FDR_fig_b[1] <- 0.95*min(FDR_ub_sd)
  FDR_fig_b[2] <- 1.02*max(sim$FDR.sd)

  plot(theta,FDR_fig_b, main="",  xlab=(expression(eta)), ylab="Standard Deviation", axes=FALSE,frame=TRUE, type="n", lty=1, lwd = 2, col = "black")
  title(main=paste("Standard Deviation,",figTitle,"Copula"))
  lines(theta,FDR_ub_sd,lty=1, lwd = 2, col = "black")
  lines(theta,sim$FDR.sd,lty=1, lwd = 2, col = "red")
  axis(side=1, at=seq(0, 20, 1), label=seq(0, 20, 1), lwd=1)
  axis(side=2, at=seq(0.0, 0.20, 0.02), label=seq(0.0, 0.20, 0.02), lwd=1)
  legend(10, 0.08, c("Sharper Upper Bound","Simulated Values"), lty=c(1,1), lwd = c(2,2), col = c("black","red"), bg = "white")
}

set.seed(0);
#Sample size for Monte-Carlo approximations
NZ <- 1e5

alpha <- 0.05

Calculating the upper bound for the FDR according to corollary 3.1

The upper bound is calculated according to the formula given in corollary 3.1. The adjustement factor $\delta$ is calculated using the function ac_fdr.calc_delta:

calcBounds <- function(cop) {
  upperBound <- (m0/m)*alpha
  cat("Calculating upper bound for the",cop@name,"copula ( m =",m,")\n")
  delta <- pbsapply(theta,function(theta)
   ac_fdr.calc_delta(copula::setTheta(cop,theta),m,m0,
   alpha=alpha,num.reps=NZ,calc.var=TRUE))
  sharperUpperBound <- upperBound * delta[1,]
  sharperUpperBound.var <- upperBound^2 * delta[2,]
  delta <- delta[1,]
  lowerBound <- sapply(theta,function(theta){alpha*(m0/m)*
   (1+pgamma(log(m)/(m^(theta)-1),shape=1/(theta),scale=1)-
      pgamma((log(m)*(m^(theta)))/(m^(theta)-1),shape=1/(theta),scale=1))})
  list(upperBound=upperBound,sharperUpperBound=sharperUpperBound,
       sharperUpperBound.var=sharperUpperBound.var,
       delta=delta,lowerBound=lowerBound)
}
  ```
### Simulation study to determine the FDR under model (16)

Samples from model (16) for use in a Monte Carlo study of the FDR using the delta calculated previously by `calc_bounds`:

```r
simFDR <- function(cop,delta) {
  cat("Performing simulation for the",cop@name,"copula ( m =",m,")\n")
  Sim <- do.call(cbind,pblapply(seq_along(theta),function(l) {
    p_values<-matrix(0,3,m)
    q_k<-(alpha/m)*seq(1,m,1)
    p_values_data <- copula::rCopula(NZ,copula::onacopulaL(cop,list(theta[l],1:m)))
    data_f<-function(s)
    {
      p_values[1,]<-p_values_data[s,]
      p_values[2,1:m]<-0
      mu<-runif(m-m0, min=-1, max=-1/2)
      p_values[1,(m0+1):m]<-pnorm(sqrt(m)*mu+qnorm(p_values_data[s,(m0+1):m]),0,1)
      p_values[2,(m0+1):m]<-1
      sp_values<-matrix(0,3,m)
      sp_values<-p_values[,order(p_values[1,])]
      sp_values[3,]<-seq(1,m,1)

      #Calculations for the FDR
      R_m<-0
      V_m<-0
      if (sum(sp_values[1,]<q_k)>0)
      {
        R_m<-sum(max(sp_values[3,sp_values[1,]<q_k]))
        V_m<-R_m-sum(sp_values[2,1:R_m])
      }

      #Calculations for the power
      S_m<-0
      if (sum(sp_values[1,]<q_k)>0)
      {
        S_m<-sum(sp_values[2,1:sum(max(sp_values[3,sp_values[1,]<q_k]))])
      }

      #Calculations for the power of the improved procedure
      S_m.improved<-0
      if (sum(sp_values[1,]<q_k/delta[l])>0)
      {
        S_m.improved<-sum(sp_values[2,1:sum(max(sp_values[3,sp_values[1,]<q_k/delta[l]]))])
      }

      return(c(V_m/max(1,R_m),S_m/(m-m0),S_m.improved/(m-m0)))
    }
    data_in<-sapply(1:NZ,data_f)
    FDR.sd <- sd(data_in[1,])
    data_in<-rowMeans(data_in)
    data_in[4]<-FDR.sd
    return(data_in)
  }))
  list(FDR=Sim[1,],empiricalPower=Sim[2,],empiricalPowerImproved=Sim[3,],
       FDR.sd=Sim[4,])
}

The results

figure <-     c(c("2","3","4"),
                c("6","7","8"))
subFigure <-  c(c("a","a","a"),
                c("a","a","a"))
figTitle <-   c(rep("Clayton",3),
                rep("Gumbel-Hougaard",3))

thetaList <- list(seq(0.00001,20.00001,0.2),
                  seq(1.00001,20.00001,0.2))

i <- 0
j <- 0
for(cop in c(copula::copClayton,copula::copGumbel)) {
  j <- j + 1;
  theta <- thetaList[[j]]
  for(m in c(20)) {
    i <- i + 1;
    m0 <- 0.8*m
    fname = paste('./sim_data/fdr_figure',figure[i],subFigure[i],'.rds',sep='')
    if(!file.exists(fname)) {
      bounds <- calcBounds(cop);
      sim <- simFDR(cop,bounds$delta);
      res <- list(bounds=bounds,sim=sim)
      saveRDS(res,fname)
    } else {
      res <- readRDS(fname)
    }

    bounds <- res$bounds
    sim <- res$sim

    cat("Generating Figure",figure[i],subFigure[i],")\n")
    #pdf(file=paste0(paste("graphics/fdr_Figure",figure[i],subFigure[i],sep="_"),".pdf"), width=9, height=7);
    plotBounds(bounds,sim,figTitle[i])
    #ignore <- dev.off();

    i <- i + 1;
    cat("Generating Figure",figure[i],subFigure[i],")\n")
    #pdf(file=paste0(paste("graphics/fdr_Figure",figure[i],subFigure[i],sep="_"),".pdf"), width=9, height=7);
    plotPower(sim,figTitle[i])
    #ignore <- dev.off();

    i <- i + 1;
    cat("Generating Figure",figure[i],subFigure[i],")\n")
    #pdf(file=paste0(paste("graphics/fdr_Figure",figure[i],subFigure[i],sep="_"),".pdf"), width=9, height=7);
    plotFDRsd(bounds,sim,figTitle[i])
    #ignore <- dev.off();
  }
}


Try the MHTcop package in your browser

Any scripts or data that you put into this service are public.

MHTcop documentation built on May 2, 2019, 7:59 a.m.