library(dmrseq) library(rtracklayer) library(ChIPpeakAnno) library(ezRun) library(patchwork) library(kableExtra) library(DSS) require(bsseq) library(qs2)
param <- readRDS('param.rds') n <- 100 gtf <- import(param$ezRef@refFeatureFile) gtf <- gtf[gtf$type == 'gene',] gtf <- gtf[gtf$gene_biotype == 'protein_coding',] names(gtf) <- gtf$gene_id param$genesAnnotFile <- file.path(dirname(param$ezRef@refFeatureFile),'genes_annotation_byGene.txt') genesAnnot <- ezRead.table(param$genesAnnotFile, row.names = NULL) genesAnnot <- genesAnnot[,c('gene_id', 'gene_name', 'description')] #Load result files: if(file.exists('bismarkBSseq_filtered.qs2')){ bismarkBSseq_filtered <- qs_read('bismarkBSseq_filtered.qs2') } else { bismarkBSseq_filtered <- readRDS("bismarkBSseq_filtered.rds") } if(file.exists('dmrseq_results.qs2')){ regions <- qs_read('dmrseq_results.qs2') } else { regions <- readRDS("dmrseq_results.rds") } if(file.exists('large_blocks.qs2')){ blocks <- qs_read('large_blocks.qs2') } else { blocks <- readRDS("large_blocks.rds") } if(file.exists('dss_results.qs2')){ dssResults <- qs_read('dss_results.qs2') } else { dssResults <- readRDS("dss_results.rds") }
settings = character() #settings["Reference:"] = param$refBuild settings["GroupingColumn:"] = param$grouping settings["sampleGroup:"] = param$sampleGroup settings["refGroup:"] = param$refGroup settings["qValue region threshold:"] = param$qVal settings["qValue per Site threshold:"] = param$qVal_perSite settings["minimum delta methylation difference for DSS:"] = param$minDelta settings["topRegions/Blocks:"] = n kable(settings, row.names=TRUE, col.names="Setting", format="html") %>% kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
r format(Sys.time(), '%Y-%m-%d %H:%M:%S')
##Candidates annotated regions/blocks # Exploring Significant Regions regions <- regions[!is.na(regions$beta),] sigRegions <- regions[regions$qval <= param$qVal, ] numSigRegions <- sum(regions$qval <= param$qVal) if(numSigRegions < 1){ cat('No significant regions detected. Instead top', min(length(regions),n),' regions reported \n') sigRegions <- regions[1:min(length(regions),n), ] } # Exploring Significant Blocks blocks <- blocks[!is.na(blocks$beta),] sigBlocks <- blocks[blocks$qval <= param$qVal, ] numSigBlocks <- sum(blocks$qval <= param$qVal) if(numSigBlocks < 1){ cat('No significant block detected. Instead top',min(length(blocks),n), 'blocks reported \n') sigBlocks <- blocks[1:min(length(blocks),n), ] } # Annotate significant regions/blocks asigRegions <- data.frame(annotatePeakInBatch(sigRegions,featureType = 'TSS', AnnotationData = gtf)) asigRegions <- merge(asigRegions, genesAnnot, by.x = 'feature', by.y = 'gene_id', all.x = TRUE) asigRegions <- asigRegions[order(asigRegions$pval),] cols <- colnames(asigRegions) cols <- cols[!(cols %in% c('index.start','index.end','index.width','peak'))] asigRegions <- asigRegions[,cols] asigBlocks <- data.frame(annotatePeakInBatch(sigBlocks,featureType = 'TSS', AnnotationData = gtf)) asigBlocks <- merge(asigBlocks, genesAnnot, by.x = 'feature', by.y = 'gene_id', all.x = TRUE) asigBlocks <- asigBlocks[order(asigBlocks$pval),] cols <- colnames(asigBlocks) cols <- cols[!(cols %in% c('index.start','index.end','index.width','peak'))] asigBlocks <- asigBlocks[,cols] # Export the significant regions to XLSX writexl::write_xlsx(asigRegions,"DMRSeq_topRegions.xlsx") writexl::write_xlsx(asigBlocks,"DMRSeq_topBlocks.xlsx")
dmls <- dssResults$dmls dmrs <- dssResults$dmrs if(!is.null(dmrs)){ sigRegionsDSS <- GRanges(dmrs) # Annotate significant regions/blocks asigRegionsDSS <- data.frame(annotatePeakInBatch(sigRegionsDSS,featureType = 'TSS', AnnotationData = gtf)) asigRegionsDSS <- merge(asigRegionsDSS, genesAnnot, by.x = 'feature', by.y = 'gene_id', all.x = TRUE) asigRegionsDSS <- asigRegionsDSS[order(asigRegionsDSS$diff.Methy),] cols <- colnames(asigRegionsDSS) cols <- cols[!(cols %in% c('index.start','index.end','index.width','peak'))] asigRegionsDSS <- asigRegionsDSS[,cols] writexl::write_xlsx(asigRegionsDSS,"DSS_topRegions.xlsx") # writexl::write_xlsx(dmrs, paste0('dmrs_',param$sampleGroup,'_vs_', param$refGroup, '.xlsx')) } if(!is.null(dmls)){ #sigSites <- GRanges(dmls) # Annotate significant regions/blocks #asigSites <- data.frame(annotatePeakInBatch(sigSites,featureType = 'TSS', AnnotationData = gtf)) #asigSites <- merge(asigSites, genesAnnot, by.x = 'feature', by.y = 'gene_id', all.x = TRUE) #asigSites <- asigSites[order(asigSites$diff.Methy),] #cols <- colnames(asigSites) #cols <- cols[!(cols %in% c('index.start','index.end','index.width','peak'))] #asigSites <- asigSites[,cols] writexl::write_xlsx(dmls,"DSS_topCpGs.xlsx") }
par(mar=c(10.1,4.1,4.1,2.1)) barplot(c(Regions_DMR = numSigRegions, Regions_DSS = nrow(dmrs), Blocks = numSigBlocks), col = 'royalblue', las = 2, main = 'Detected Candidates') par(mar=c(5.1,4.1,4.1,2.1))
myBoxplot <- function(vec, myTitle){ q <- quantile(vec, probs = c(0.05, 0.95)) vec <- vec[vec >= q[1] & vec <= q[2]] below_zero <- sum(vec < 0) above_zero <- sum(vec > 0) total <- length(vec) below_zero_percent <- below_zero / total * 100 above_zero_percent <- above_zero / total * 100 boxplot(vec, main = myTitle, horizontal = TRUE, col = "lightblue") range_x <- par("usr")[1:2] mid_y <- 1 text(range_x[1]+0.2 * diff(range_x), mid_y-0.5, labels = paste0("Hypomethylation: ", below_zero, " (", round(below_zero_percent, 1), "%)"), adj = c(0, 0.5), col = "red") text(range_x[2]-0.2 * diff(range_x), mid_y+0.5, labels = paste0("Hypermethylation: ", above_zero, " (", round(above_zero_percent, 1), "%)"), adj = c(1, 0.5), col = "blue") } par(mfrow = c(2, 2)) if(numSigRegions > 0) myBoxplot(sigRegions$stat,'Hypo- and Hypermethylation of candidate regions, DMR') if(nrow(dmrs) > 0) myBoxplot(sigRegionsDSS$diff.Methy, 'Hypo- and Hypermethylation of candidate regions, DSS') if(numSigBlocks > 0) myBoxplot(sigBlocks$stat, 'Hypo- and Hypermethylation of candidate blocks, DMR') if(nrow(dmls) > 0) myBoxplot(dmls$diff, 'Hypo- and Hypermethylation of candidate CpG Sites, DSS') par(mfrow = c(1, 1))
##global plots # Plot methylation distribution p1 <- plotEmpiricalDistribution(bismarkBSseq_filtered, testCovariate = "Condition") ggplot2::ggsave("custom_plot1.png", plot = p1, width = 10, height = 5, units = "in") knitr::include_graphics("custom_plot1.png")
# Plot coverage distribution p2 <- plotEmpiricalDistribution(bismarkBSseq_filtered, testCovariate = "Condition", type = "Cov", bySample = TRUE) ggplot2::ggsave("custom_plot2.png", plot = p2, width = 12, height = 5, units = "in") knitr::include_graphics("custom_plot2.png")
if(numSigRegions > 0){ par(mfrow = c(1, 2)) par(mar=c(10.1,4.1,4.1,2.1)) barplot(table(asigRegions$insideFeature), las = 2, col = 'royalblue', main = 'RegionAnnotation') par(mar=c(5.1,4.1,4.1,2.1)) barplot(table(asigRegions$feature_strand), las = 2, col = 'royalblue', main = 'Strand') par(mfrow = c(1, 1)) }
if(nrow(dmrs) > 0){ par(mfrow = c(1, 2)) par(mar=c(10.1,4.1,4.1,2.1)) barplot(table(asigRegionsDSS$insideFeature), las = 2, col = 'royalblue', main = 'RegionAnnotation') par(mar=c(5.1,4.1,4.1,2.1)) barplot(table(asigRegionsDSS$feature_strand), las = 2, col = 'royalblue', main = 'Strand') par(mfrow = c(1, 1)) }
format(Sys.time(), '%Y-%m-%d %H:%M:%S') ezSessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.