knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
knitr::opts_chunk$set(
                      warning=FALSE)

install DMRscaler

Download the most recent version of DMRscaler from github and place in working directory, if DMRscaler is downloaded to another directory, change the "path_to_DMRscaler" variable below to the path to the DMRscaler directory.

path_to_DMRscaler <- "../DMRscaler"

if(!require("devtools", quietly = TRUE )){
  install.packages("devtools")
  library("devtools")
}
if(!require("DMRscaler")){
  devtools::install(path_to_DMRscaler)
}

Here we add some artificial chromosomes with cpgs

simul_chrs <- data.frame(chr=c("chr1","chr2"),
                         seqlengths=c(100e6,50e6))
cpg_frequency <- 1e-4
set.seed(1)
simul_locs <- data.frame(chr=character(), pos=numeric())

## simulate some initial CpG positions
for(i in 1:nrow(simul_chrs)){
  simul_locs <- rbind(simul_locs,
                      data.frame(chr=simul_chrs$chr[i],
                                 pos=sort(sample(1:simul_chrs$seqlengths[i],
                                                 (simul_chrs$seqlengths[i])*cpg_frequency) )))
}

Next we add some additional cpg features to the artificial chromosomes to give a non-uniform distibution of cpgs across the genome

## add 100 random high CpG density areas to simulate CpG Islands with width between 1e2 and 1e4
num_features <- 100
min_width <- 1e2
max_width <- 1e4

for(i in sample(nrow(simul_locs), num_features)){
  temp_island_locs <- data.frame(chr=simul_locs[i,]$chr, pos=simul_locs[i,]$pos)
  temp_island_chr_seqlength <- simul_chrs$seqlengths[which(simul_chrs$chr==temp_island_locs$chr)]
  temp_island_locs <- data.frame(chr=simul_locs[i,]$chr,
                         pos=max(c(1,temp_island_locs$pos -
                                     floor(10^(runif(1,log10(min_width/2),log10(max_width/2))))  )):
                             min(c(temp_island_chr_seqlength,temp_island_locs$pos + 
                                     floor(10^(runif(1, log10(min_width/2), log10(max_width)))) )))

  min_density <- 1e-2
  max_density <- 1e-1
  temp_island_locs <- temp_island_locs[sample(1:nrow(temp_island_locs), 
                                              10^(runif(1,log10(nrow(temp_island_locs)*min_density),
                                                       log10(nrow(temp_island_locs)*max_density) ))),  ]

  simul_locs <- rbind(simul_locs, temp_island_locs)
}

simul_locs <- unique(simul_locs)
for(i in unique(simul_locs$chr)){
  which <- which(simul_locs$chr==i)
  simul_locs[which,]$pos <- sort(simul_locs[which,]$pos)
}

## simulate random noise  
simul_locs$pval <- 1
for(i in 1:nrow(simul_locs)){
  simul_locs$pval[i] <- t.test(rnorm(10),rnorm(10))$p.value
}
## add simulated DMRs
simul_dmrs <- data.frame(chr=c(rep("chr1", 5),rep("chr2", 5)),
                         start=c(c(10e6,40e6,50e6,55e6,80e6),c(10e6,20e6,25e6,30e6,35e6)),
                         width=c(c(1e3,1e3,1e6,1e4,1e5),c(1e3,1e3,1e4,1e4,1e6) ),
                         noise=c(rep(0.5,5), rep(0.25,5)))

for(i in 1:nrow(simul_dmrs)){

  min_density <- 1e1/simul_dmrs$width[i]
  max_density <- 5e1/simul_dmrs$width[i]
  temp_island_locs <- data.frame(chr=simul_dmrs$chr[i], pos=(simul_dmrs$start[i]):(simul_dmrs$start[i] + simul_dmrs$width[i]))
  temp_island_locs <- temp_island_locs[sample(1:nrow(temp_island_locs),
                                             10^(runif(1,log10(nrow(temp_island_locs)*min_density),
                                                       log10(nrow(temp_island_locs)*max_density)))),]
  for(j in 1:nrow(temp_island_locs)){
    temp_island_locs$pval[j] <- t.test(rnorm(10, mean = 0), rnorm(10, mean = 0))$p.value
  }

  which <- sample(1:nrow(temp_island_locs), floor(nrow(temp_island_locs)*(1-simul_dmrs$noise[i])))
  which <- sort(which)
  for(j in 1:length(which)){
    temp_island_locs$pval[which[j]] <- t.test(rnorm(10, mean = 0), rnorm(10, mean = 2))$p.value
  }
  simul_locs <- rbind(simul_locs, temp_island_locs)
}


for(i in unique(simul_locs$chr)){
  which <- which(simul_locs$chr == i)
  simul_locs[which,] <- simul_locs[which,][order(simul_locs[which,]$pos),]
  temp <- simul_locs[-which,]
  simul_locs<- simul_locs[which,][which(!duplicated(simul_locs[which,]$pos)), ]
  simul_locs <- rbind(simul_locs,temp)

}
for(i in unique(simul_locs$chr)){
  which <- which(simul_locs$chr==i)
  plot(simul_locs$pos[which],-log10(simul_locs$pval)[which] )
}
dmrscaler_output <- DMRscaler::dmrscaler(simul_locs,
                                         locs_pval_cutoff = 0.05,
                                         region_signif_cutoff = 0.05,
                                         window_sizes = c(2,4,8,16,32,64),
                                         region_signif_method = "bon",
                                         window_type = "k_nearest",
                                         output_type = "complete")
dmrscaler_output[[5]]


leroybondhus/dmrscaler documentation built on July 11, 2022, 4:56 a.m.