library(dmrseq) library(rtracklayer) library(ChIPpeakAnno) library(ezRun) library(patchwork) library(kableExtra)
param <- readRDS('param.rds') n <- 100 gtf <- import(param$ezRef@refFeatureFile) gtf <- gtf[gtf$type == 'gene',] gtf <- gtf[gtf$gene_biotype == 'protein_coding',] names(gtf) <- gtf$gene_id param$genesAnnotFile <- file.path(dirname(param$ezRef@refFeatureFile),'genes_annotation_byGene.txt') genesAnnot <- ezRead.table(param$genesAnnotFile, row.names = NULL) genesAnnot <- genesAnnot[,c('gene_id', 'gene_name','description')] #Explore results: bismarkBSseq_filtered <- readRDS("bismarkBSseq_filtered.rds") regions <- readRDS("dmrseq_results.rds") blocks <- readRDS("large_blocks.rds")
settings = character() #settings["Reference:"] = param$refBuild settings["GroupingColumn:"] = param$grouping settings["sampleGroup:"] = param$sampleGroup settings["refGroup:"] = param$refGroup settings["qValue threshold:"] = param$qval settings["topRegions/Blocks:"] = n kable(settings, row.names=TRUE, col.names="Setting", format="html") %>% kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
r format(Sys.time(), '%Y-%m-%d %H:%M:%S')
##Candidates annotated regions/blocks # Exploring Significant Regions regions <- regions[!is.na(regions$beta),] sigRegions <- regions[regions$qval <= param$qval, ] numSigRegions <- sum(regions$qval <= param$qval) if(numSigRegions < 1){ cat('No significant regions detected. Instead top', min(length(regions),n),' regions reported \n') sigRegions <- regions[1:min(length(regions),n), ] } # Exploring Significant Blocks blocks <- blocks[!is.na(blocks$beta),] sigBlocks <- blocks[blocks$qval <= param$qval, ] numSigBlocks <- sum(blocks$qval <= param$qval) if(numSigBlocks < 1){ cat('No significant block detected. Instead top',min(length(blocks),n), 'blocks reported \n') sigBlocks <- blocks[1:min(length(blocks),n), ] } # Annotate significant regions/blocks asigRegions <- data.frame(annotatePeakInBatch(sigRegions,featureType = 'TSS', AnnotationData = gtf)) asigRegions <- merge(asigRegions, genesAnnot, by.x = 'feature', by.y = 'gene_id', all.x = TRUE) asigRegions <- asigRegions[order(asigRegions$pval),] cols <- colnames(asigRegions) cols <- cols[!(cols %in% c('index.start','index.end','index.width','peak'))] asigRegions <- asigRegions[,cols] asigBlocks <- data.frame(annotatePeakInBatch(sigBlocks,featureType = 'TSS', AnnotationData = gtf)) asigBlocks <- merge(asigBlocks, genesAnnot, by.x = 'feature', by.y = 'gene_id', all.x = TRUE) asigBlocks <- asigBlocks[order(asigBlocks$pval),] cols <- colnames(asigBlocks) cols <- cols[!(cols %in% c('index.start','index.end','index.width','peak'))] asigBlocks <- asigBlocks[,cols] # Export the significant regions to XLSX writexl::write_xlsx(asigRegions,"topRegions.xlsx") writexl::write_xlsx(asigBlocks,"topBlocks.xlsx")
barplot(c(Regions = numSigRegions, Blocks = numSigBlocks), col = 'royalblue', las = 2, main = 'Detected Candidates')
myBoxplot <- function(vec, myTitle){ q <- quantile(vec, probs = c(0.05, 0.95)) vec <- vec[vec >= q[1] & vec <= q[2]] below_zero <- sum(vec < 0) above_zero <- sum(vec > 0) total <- length(vec) below_zero_percent <- below_zero / total * 100 above_zero_percent <- above_zero / total * 100 boxplot(vec, main = myTitle, horizontal = TRUE, col = "lightblue") range_x <- par("usr")[1:2] mid_y <- 1 text(range_x[1]+0.2 * diff(range_x), mid_y-0.5, labels = paste0("Hypomethylation: ", below_zero, " (", round(below_zero_percent, 1), "%)"), adj = c(0, 0.5), col = "red") text(range_x[2]-0.2 * diff(range_x), mid_y+0.5, labels = paste0("Hypermethylation: ", above_zero, " (", round(above_zero_percent, 1), "%)"), adj = c(1, 0.5), col = "blue") } par(mfrow = c(1, 2)) if(numSigRegions > 0) myBoxplot(sigRegions$stat,'Hypo- and Hypermethylation of candidate regions') if(numSigBlocks > 0) myBoxplot(sigBlocks$stat, 'Hypo- and Hypermethylation of candidate blocks') par(mfrow = c(1, 1))
##global plots # Plot methylation distribution p1 <- plotEmpiricalDistribution(bismarkBSseq_filtered, testCovariate = "Condition") ggplot2::ggsave("custom_plot1.png", plot = p1, width = 12, height = 5, units = "in") knitr::include_graphics("custom_plot1.png")
# Plot coverage distribution p2 <- plotEmpiricalDistribution(bismarkBSseq_filtered, testCovariate = "Condition", type = "Cov", bySample = TRUE) ggplot2::ggsave("custom_plot2.png", plot = p2, width = 12, height = 7, units = "in") knitr::include_graphics("custom_plot2.png")
if(numSigRegions > 0){ par(mar=c(10.1,4.1,4.1,2.1)) barplot(table(asigRegions$insideFeature), las = 2, col = 'royalblue', main = 'RegionAnnotation') par(mar=c(5.1,4.1,4.1,2.1)) barplot(table(asigRegions$feature_strand), las = 2, col = 'royalblue', main = 'Strand') }
format(Sys.time(), '%Y-%m-%d %H:%M:%S') ezSessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.