This vignette walks throug the process of reproducing the simulation results presented in

Morrison, J., Witten, D., & Simon, N. (2016). Joint Adaptive Differential Estimation: A tool for comparative analysis of smooth genomic data types.

You will need the bsseq package and the methylKit package:

#BSmooth
source("https://bioconductor.org/biocLite.R")
biocLite("bsseq")

# methylKit dependencies
install.packages( c("data.table","devtools"))
source("http://bioconductor.org/biocLite.R")
biocLite(c("GenomicRanges","IRanges"))

# methylKit
library(devtools)
install_github("al2na/methylKit",build_vignettes=FALSE)

Binomial simulations with random effects.

These simulations are shown in secion 4.2. The steps outlined below require a specific directory structure. These commands should all be run in a directory that contains the subdirectories data/, f0/, path/, alt/, and cv/.

The mean curves used to produce the data are in the binom_profiles object in this package.

library(jadeSims)
library(ggplot2)
ggplot(data.frame(binom_profiles), aes(x=1:300)) +
  geom_line(aes(y=X1)) + 
  geom_line(aes(y=X2), color="blue") + 
  labs(x="Position", y="Profile")

Step 1: Produce the data. The function tile_binomial_data in this package produces data for a single group. The following produces one data set for both groups using a sample size of 10 in each group and no random effects.

#One data set:
sample.size <- c(10, 10)
full.counts <- matrix(nrow=300, ncol=0)
full.reads <- matrix(nrow=300, ncol=0)
for(i in 1:2){
  my.data <- replicate(n=sample.size[i], expr={
      z <-  tile_binomial_data(profile=binom_profiles[,i], mean.reads=10,
      reff.sd=0, mean.tile.length=30)
      cbind(z$y, z$reads)})
  my.counts <-  my.data[, 1, ]
  my.reads <-  my.data[, 2, ]
  full.counts <- cbind(full.counts, my.counts)
  full.reads <- cbind(full.reads, my.reads)
}
R <- list("Y"=full.counts, "READS"=full.reads, "sample.size"=sample.size)

To produce the data sets used in the paper use the wrapper generate_data_binomial which includes the correct seed. This function must be used in a dirctory containing a data/ subdirectory.

generate_data_binomial()
generate_data_binomial(seed=60282, start.rep=61, stop.rep=100)

The wrapper also calls a function alternatives_binomial which runs bsseq and methylKit and saves results to a file in the alt/ directory.

Step 2: Run JADE: This can take a while so it is better to do it in parallel. The function run_jade_binomial calculates the trendfiltering fits at gamma=0 and runs the full path of solutions for the complete data as well as the cross validation data sets.

file.prefix <- "binom_0_n1"
run_jade_binomial(file.prefix=file.prefix)

Step 3: Aggregate all the results into a useful data structure. Use the aggregate_sims function: This must be done for all 4 values of sigma_re:

for(re in c(0, 0.02, 0.05, 0.07)){
    file.prefix <- paste0("binom_", re )
    agg.fn <- paste0("agg/", file.prefix, "_tol_5e-3_agg.RData")
    agg <- aggregate_sims(file.prefix=file.prefix, which.reps=1:100, run.cv=TRUE, profiles=binom_profiles, save.file=agg.fn, use.cv=TRUE)
}

Step 4: Make Plots

Figure 5:

par(mfrow=c(2, 2))
for(re in c(0, 0.02, 0.05, 0.07)){
      file.prefix <- paste0("binom_", re)
      agg.fn <- paste0("agg/binom_", re, "_tol_5e-3_agg.RData")
      agg <- getobj(agg.fn)

      plot_roc_curves(agg.obj=agg,
          which.stats=c("jade", "mk.agg.qval", "bss.qval"),
          cols=c("black", "violetRed", "blue"),
          ltys=c(1, 3, 2), points.pch=15:17,
          direction="vertical",
          main=bquote(~sigma["re"] == .(re)))
  }
  dev.off()

Normal simulations with auto regressive errors and with random effects (Section 4.1)

These simulations are produced using the same steps as above but with different functions.

Step 1: Generate data. This process uses the normal_data function. There are wrappers producing the exact set of data used in the paper.

generate_data_ar()
generate_data_ar(seed=95870, start.rep=61, stop.rep=100)
generate_data_re()
generate_data_re(seed=46916, start.rep=61, stop.rep=100)

Step 3: Run JADE:

For Auto-regressive errors

for(arsd in c(0.5, 1, 2)){
    for(rho in c(0, 0.2, 0.4)){
      for(rep in 1:100){
      file.prefix <- paste0("ar_sd", arsd, "_rho_", rho, "_n", rep)
      run_jade_normal(file.prefix=file.prefix)
    }
}

For random effects:

perc <- c(5, 10, 15, 20)
for(l in 1:4){
  for(rep in 1:100){
    file.prefix <- paste0("re_", perc[l], "_n", rep)
    run_jade_normal(file.prefix=file.prefix)
  }
}

Step 3: Aggregate

For the Autoregrssive settings:

for(sig in c(0.5, 1, 2)) {
  for(rho in c(0, 0.2, 0.4)) {
    file.prefix <- paste0("ar_sd", sig, "_rho_", rho )
    agg.fn <- paste0("agg/", file.prefix, "_tol5e-3_agg.RData")
    agg <- aggregate_sims(file.prefix=file.prefix, which.reps=1:100, run.cv=TRUE,
                          profiles=normal_profiles, save.file=agg.fn, use.cv=TRUE)
  }
}

For random effects settings:

sds <- c(2.18,2.12,  2.06, 2)
res <- c(0.5, 0.707, .866, 1)
perc <- c(5, 10, 15, 20)
for(j in 1:4){
    file.prefix <- paste0("re_", perc[j])
    agg.fn <- paste0("agg/", file.prefix, "_tol5e-3_agg.RData")
    agg <- aggregate_sims(file.prefix=file.prefix, which.reps=1:100, run.cv=TRUE,
                          profiles=normal_profiles, save.file=agg.fn, use.cv=TRUE)

}

Step 4: Plot Figure 3:

for(arsd in c(0.5, 1, 2)) {
  for(rho in c(0, 0.2, 0.4)) {
    file.prefix <- paste0("ar_sd", arsd, "_rho_", rho)
    agg.fn <- paste0("agg/ar_sd", arsd, "_rho_", rho,  "_tol5e-3_agg.RData")
    agg <- getobj(agg.fn)

    plot_roc_curves(agg.obj=agg,
          which.stats=c("jade", "tt.slim", "spline.slim", "locfit.slim"),
          points.pch=15:18,
          cols=c("black", "violetred", "blue", "seagreen"),
          direction="vertical",  ltys=c(1, 3, 2, 4), point.cex=2,
          main=bquote(~sigma == .(arsd) ~rho == .(rho)) )
  }
}
dev.off()

Figure 4:

par(mfrow=c(2, 2))

sds <- c(2.18,2.12,  2.06, 2)
res <- c(0.5, 0.707, .866, 1)
perc <- c(5, 10, 15, 20)
for(j in 1:4){
  file.prefix <- paste0("re_", perc[j])
  agg.fn <- paste0("agg/re_", perc[j],  "_tol_5e-3_agg.RData")
  agg <- getobj(agg.fn)

  plot_roc_curves(agg.obj=agg,
      which.stats=c("jade", "tt.slim", "spline.slim", "locfit.slim"),
      cols=c("black", "violetRed", "blue", "seagreen"),
      points.pch=15:18, ltys=c(1, 3, 2, 4),
      direction="vertical",
      main=bquote(~sigma == .(sds[j]) ~sigma["re"] == .(res[j])))
}
dev.off()


jean997/jadeSims documentation built on May 18, 2019, 11:44 p.m.