library(dmrseq)
library(rtracklayer)
library(ChIPpeakAnno)
library(ezRun)
library(patchwork)
library(kableExtra)
param <- readRDS('param.rds')
n <- 100
gtf <- import(param$ezRef@refFeatureFile)
gtf <- gtf[gtf$type == 'gene',]
gtf <- gtf[gtf$gene_biotype == 'protein_coding',]
names(gtf) <- gtf$gene_id
param$genesAnnotFile <- file.path(dirname(param$ezRef@refFeatureFile),'genes_annotation_byGene.txt')
genesAnnot <- ezRead.table(param$genesAnnotFile, row.names = NULL)
genesAnnot <- genesAnnot[,c('gene_id', 'gene_name','description')]

#Explore results:
bismarkBSseq_filtered <- readRDS("bismarkBSseq_filtered.rds")
regions <- readRDS("dmrseq_results.rds")
blocks <- readRDS("large_blocks.rds")

{.tabset}

Settings

settings = character()
#settings["Reference:"] = param$refBuild
settings["GroupingColumn:"] = param$grouping
settings["sampleGroup:"] = param$sampleGroup
settings["refGroup:"] = param$refGroup
settings["qValue threshold:"] = param$qval
settings["topRegions/Blocks:"] = n
kable(settings, row.names=TRUE, 
      col.names="Setting", format="html") %>%
  kable_styling(bootstrap_options = "striped", full_width = F,
                position = "left")

r format(Sys.time(), '%Y-%m-%d %H:%M:%S')

Data Files

##Candidates annotated regions/blocks
# Exploring Significant Regions
regions <- regions[!is.na(regions$beta),]
sigRegions <- regions[regions$qval <= param$qval, ]
numSigRegions <- sum(regions$qval <= param$qval)
if(numSigRegions < 1){
    cat('No significant regions detected. Instead top', min(length(regions),n),' regions reported \n')
    sigRegions <- regions[1:min(length(regions),n), ]
}

# Exploring Significant Blocks
blocks <- blocks[!is.na(blocks$beta),]
sigBlocks <- blocks[blocks$qval <= param$qval, ]
numSigBlocks <- sum(blocks$qval <= param$qval)
if(numSigBlocks < 1){
    cat('No significant block detected. Instead top',min(length(blocks),n), 'blocks reported \n')
    sigBlocks <- blocks[1:min(length(blocks),n), ]
}
# Annotate significant regions/blocks
asigRegions <- data.frame(annotatePeakInBatch(sigRegions,featureType = 'TSS', AnnotationData = gtf))
asigRegions <- merge(asigRegions, genesAnnot, by.x = 'feature', by.y = 'gene_id', all.x = TRUE)
asigRegions <- asigRegions[order(asigRegions$pval),]
cols <- colnames(asigRegions)
cols <- cols[!(cols %in% c('index.start','index.end','index.width','peak'))]
asigRegions <- asigRegions[,cols]

asigBlocks <- data.frame(annotatePeakInBatch(sigBlocks,featureType = 'TSS', AnnotationData = gtf))
asigBlocks <- merge(asigBlocks, genesAnnot, by.x = 'feature', by.y = 'gene_id', all.x = TRUE)
asigBlocks <- asigBlocks[order(asigBlocks$pval),]
cols <- colnames(asigBlocks)
cols <- cols[!(cols %in% c('index.start','index.end','index.width','peak'))]
asigBlocks <- asigBlocks[,cols]


# Export the significant regions to XLSX
writexl::write_xlsx(asigRegions,"topRegions.xlsx")
writexl::write_xlsx(asigBlocks,"topBlocks.xlsx")

Candidate regions/blocks

topRegions.xlsx

topBlocks.xlsx

Full result objects for regions and blocks as RDS files

bismarkBSseq_filtered.rds

dmrseq_results.rds

large_blocks.rds

Differential Analysis Details

barplot(c(Regions = numSigRegions, Blocks = numSigBlocks), col = 'royalblue', las = 2, main = 'Detected Candidates')
myBoxplot <- function(vec, myTitle){
q <- quantile(vec, probs = c(0.05, 0.95))
vec <- vec[vec >= q[1] & vec <= q[2]]

below_zero <- sum(vec < 0)
above_zero <- sum(vec > 0)
total <- length(vec)
below_zero_percent <- below_zero / total * 100
above_zero_percent <- above_zero / total * 100

boxplot(vec, main = myTitle, horizontal = TRUE, col = "lightblue")
range_x <- par("usr")[1:2]
mid_y <- 1
text(range_x[1]+0.2 * diff(range_x), mid_y-0.5, 
     labels = paste0("Hypomethylation: ", below_zero, " (", round(below_zero_percent, 1), "%)"), 
     adj = c(0, 0.5), col = "red")
text(range_x[2]-0.2 * diff(range_x), mid_y+0.5, 
     labels = paste0("Hypermethylation: ", above_zero, " (", round(above_zero_percent, 1), "%)"), 
     adj = c(1, 0.5), col = "blue")
}

par(mfrow = c(1, 2))
if(numSigRegions > 0)
myBoxplot(sigRegions$stat,'Hypo- and Hypermethylation of candidate regions')
if(numSigBlocks > 0)
myBoxplot(sigBlocks$stat, 'Hypo- and Hypermethylation of candidate blocks')
par(mfrow = c(1, 1))

QC Plots

##global plots
# Plot methylation distribution
p1 <- plotEmpiricalDistribution(bismarkBSseq_filtered, testCovariate = "Condition")
ggplot2::ggsave("custom_plot1.png", plot = p1, width = 12, height = 5, units = "in")
knitr::include_graphics("custom_plot1.png")
# Plot coverage distribution
p2 <- plotEmpiricalDistribution(bismarkBSseq_filtered, testCovariate = "Condition", type = "Cov", bySample = TRUE)
ggplot2::ggsave("custom_plot2.png", plot = p2, width = 12, height = 7, units = "in")
knitr::include_graphics("custom_plot2.png")

Annotation Details

if(numSigRegions > 0){
    par(mar=c(10.1,4.1,4.1,2.1))
    barplot(table(asigRegions$insideFeature), las = 2, col = 'royalblue', main = 'RegionAnnotation')
    par(mar=c(5.1,4.1,4.1,2.1))
    barplot(table(asigRegions$feature_strand), las = 2, col = 'royalblue', main = 'Strand')
}

SessionInfo

format(Sys.time(), '%Y-%m-%d %H:%M:%S')
ezSessionInfo()


uzh/ezRun documentation built on Dec. 26, 2024, 9:53 a.m.