library(dmrseq)
library(rtracklayer)
library(ChIPpeakAnno)
library(ezRun)
library(patchwork)
library(kableExtra)
library(DSS)
require(bsseq)
library(qs2)
param <- readRDS('param.rds')
n <- 100
gtf <- import(param$ezRef@refFeatureFile)
gtf <- gtf[gtf$type == 'gene',]
gtf <- gtf[gtf$gene_biotype == 'protein_coding',]
names(gtf) <- gtf$gene_id
param$genesAnnotFile <- file.path(dirname(param$ezRef@refFeatureFile),'genes_annotation_byGene.txt')
genesAnnot <- ezRead.table(param$genesAnnotFile, row.names = NULL)
genesAnnot <- genesAnnot[,c('gene_id', 'gene_name', 'description')]

#Load result files:
if(file.exists('bismarkBSseq_filtered.qs2')){
    bismarkBSseq_filtered <- qs_read('bismarkBSseq_filtered.qs2')
} else {
    bismarkBSseq_filtered <- readRDS("bismarkBSseq_filtered.rds")
}
if(file.exists('dmrseq_results.qs2')){
    regions <- qs_read('dmrseq_results.qs2')
} else {
    regions <- readRDS("dmrseq_results.rds")
} 
if(file.exists('large_blocks.qs2')){
    blocks <- qs_read('large_blocks.qs2')
} else {
    blocks <- readRDS("large_blocks.rds")
}
if(file.exists('dss_results.qs2')){
    dssResults <- qs_read('dss_results.qs2')
} else {
    dssResults <- readRDS("dss_results.rds")
}

{.tabset}

Settings

settings = character()
#settings["Reference:"] = param$refBuild
settings["GroupingColumn:"] = param$grouping
settings["sampleGroup:"] = param$sampleGroup
settings["refGroup:"] = param$refGroup
settings["qValue region threshold:"] = param$qVal
settings["qValue per Site threshold:"] = param$qVal_perSite
settings["minimum delta methylation difference for DSS:"] = param$minDelta
settings["topRegions/Blocks:"] = n
kable(settings, row.names=TRUE, 
      col.names="Setting", format="html") %>%
  kable_styling(bootstrap_options = "striped", full_width = F,
                position = "left")

r format(Sys.time(), '%Y-%m-%d %H:%M:%S')

Data Files

##Candidates annotated regions/blocks
# Exploring Significant Regions
regions <- regions[!is.na(regions$beta),]
sigRegions <- regions[regions$qval <= param$qVal, ]
numSigRegions <- sum(regions$qval <= param$qVal)
if(numSigRegions < 1){
    cat('No significant regions detected. Instead top', min(length(regions),n),' regions reported \n')
    sigRegions <- regions[1:min(length(regions),n), ]
}

# Exploring Significant Blocks
blocks <- blocks[!is.na(blocks$beta),]
sigBlocks <- blocks[blocks$qval <= param$qVal, ]
numSigBlocks <- sum(blocks$qval <= param$qVal)
if(numSigBlocks < 1){
    cat('No significant block detected. Instead top',min(length(blocks),n), 'blocks reported \n')
    sigBlocks <- blocks[1:min(length(blocks),n), ]
}
# Annotate significant regions/blocks
asigRegions <- data.frame(annotatePeakInBatch(sigRegions,featureType = 'TSS', AnnotationData = gtf))
asigRegions <- merge(asigRegions, genesAnnot, by.x = 'feature', by.y = 'gene_id', all.x = TRUE)
asigRegions <- asigRegions[order(asigRegions$pval),]
cols <- colnames(asigRegions)
cols <- cols[!(cols %in% c('index.start','index.end','index.width','peak'))]
asigRegions <- asigRegions[,cols]

asigBlocks <- data.frame(annotatePeakInBatch(sigBlocks,featureType = 'TSS', AnnotationData = gtf))
asigBlocks <- merge(asigBlocks, genesAnnot, by.x = 'feature', by.y = 'gene_id', all.x = TRUE)
asigBlocks <- asigBlocks[order(asigBlocks$pval),]
cols <- colnames(asigBlocks)
cols <- cols[!(cols %in% c('index.start','index.end','index.width','peak'))]
asigBlocks <- asigBlocks[,cols]


# Export the significant regions to XLSX
writexl::write_xlsx(asigRegions,"DMRSeq_topRegions.xlsx")
writexl::write_xlsx(asigBlocks,"DMRSeq_topBlocks.xlsx")
dmls <- dssResults$dmls
dmrs <- dssResults$dmrs

if(!is.null(dmrs)){
    sigRegionsDSS <- GRanges(dmrs)
    # Annotate significant regions/blocks
    asigRegionsDSS <- data.frame(annotatePeakInBatch(sigRegionsDSS,featureType = 'TSS', AnnotationData = gtf))
    asigRegionsDSS <- merge(asigRegionsDSS, genesAnnot, by.x = 'feature', by.y = 'gene_id', all.x = TRUE)
    asigRegionsDSS <- asigRegionsDSS[order(asigRegionsDSS$diff.Methy),]
    cols <- colnames(asigRegionsDSS)
    cols <- cols[!(cols %in% c('index.start','index.end','index.width','peak'))]
    asigRegionsDSS <- asigRegionsDSS[,cols]
    writexl::write_xlsx(asigRegionsDSS,"DSS_topRegions.xlsx")        
    #   writexl::write_xlsx(dmrs, paste0('dmrs_',param$sampleGroup,'_vs_', param$refGroup, '.xlsx'))
}

if(!is.null(dmls)){
    #sigSites <- GRanges(dmls)
    # Annotate significant regions/blocks
    #asigSites <- data.frame(annotatePeakInBatch(sigSites,featureType = 'TSS', AnnotationData = gtf))
    #asigSites <- merge(asigSites, genesAnnot, by.x = 'feature', by.y = 'gene_id', all.x = TRUE)
    #asigSites <- asigSites[order(asigSites$diff.Methy),]
    #cols <- colnames(asigSites)
    #cols <- cols[!(cols %in% c('index.start','index.end','index.width','peak'))]
    #asigSites <- asigSites[,cols]
    writexl::write_xlsx(dmls,"DSS_topCpGs.xlsx")        
}

Candidate sites/regions/blocks

DMRSeq_topRegions.xlsx

DSS_topRegions.xlsx

DMRSeq_topBlocks.xlsx

DSS_topCpGs.xlsx

Differential Analysis Details

par(mar=c(10.1,4.1,4.1,2.1))
barplot(c(Regions_DMR = numSigRegions, Regions_DSS = nrow(dmrs), Blocks = numSigBlocks), col = 'royalblue', las = 2, main = 'Detected Candidates')
par(mar=c(5.1,4.1,4.1,2.1))
myBoxplot <- function(vec, myTitle){
q <- quantile(vec, probs = c(0.05, 0.95))
vec <- vec[vec >= q[1] & vec <= q[2]]

below_zero <- sum(vec < 0)
above_zero <- sum(vec > 0)
total <- length(vec)
below_zero_percent <- below_zero / total * 100
above_zero_percent <- above_zero / total * 100

boxplot(vec, main = myTitle, horizontal = TRUE, col = "lightblue")
range_x <- par("usr")[1:2]
mid_y <- 1
text(range_x[1]+0.2 * diff(range_x), mid_y-0.5, 
     labels = paste0("Hypomethylation: ", below_zero, " (", round(below_zero_percent, 1), "%)"), 
     adj = c(0, 0.5), col = "red")
text(range_x[2]-0.2 * diff(range_x), mid_y+0.5, 
     labels = paste0("Hypermethylation: ", above_zero, " (", round(above_zero_percent, 1), "%)"), 
     adj = c(1, 0.5), col = "blue")
}

par(mfrow = c(2, 2))
if(numSigRegions > 0)
myBoxplot(sigRegions$stat,'Hypo- and Hypermethylation of candidate regions, DMR')
if(nrow(dmrs) > 0)
myBoxplot(sigRegionsDSS$diff.Methy, 'Hypo- and Hypermethylation of candidate regions, DSS')
if(numSigBlocks > 0)
myBoxplot(sigBlocks$stat, 'Hypo- and Hypermethylation of candidate blocks, DMR')
if(nrow(dmls) > 0)
myBoxplot(dmls$diff, 'Hypo- and Hypermethylation of candidate CpG Sites, DSS')

par(mfrow = c(1, 1))

QC Plots

##global plots
# Plot methylation distribution
p1 <- plotEmpiricalDistribution(bismarkBSseq_filtered, testCovariate = "Condition")
ggplot2::ggsave("custom_plot1.png", plot = p1, width = 10, height = 5, units = "in")
knitr::include_graphics("custom_plot1.png")
# Plot coverage distribution
p2 <- plotEmpiricalDistribution(bismarkBSseq_filtered, testCovariate = "Condition", type = "Cov", bySample = TRUE)
ggplot2::ggsave("custom_plot2.png", plot = p2, width = 12, height = 5, units = "in")
knitr::include_graphics("custom_plot2.png")

Annotation Details

DMR

if(numSigRegions > 0){
    par(mfrow = c(1, 2))
    par(mar=c(10.1,4.1,4.1,2.1))
    barplot(table(asigRegions$insideFeature), las = 2, col = 'royalblue', main = 'RegionAnnotation')
    par(mar=c(5.1,4.1,4.1,2.1))
    barplot(table(asigRegions$feature_strand), las = 2, col = 'royalblue', main = 'Strand')
    par(mfrow = c(1, 1))
}

DSS

if(nrow(dmrs) > 0){
    par(mfrow = c(1, 2))
    par(mar=c(10.1,4.1,4.1,2.1))
    barplot(table(asigRegionsDSS$insideFeature), las = 2, col = 'royalblue', main = 'RegionAnnotation')
    par(mar=c(5.1,4.1,4.1,2.1))
    barplot(table(asigRegionsDSS$feature_strand), las = 2, col = 'royalblue', main = 'Strand')
    par(mfrow = c(1, 1))
}

SessionInfo

format(Sys.time(), '%Y-%m-%d %H:%M:%S')
ezSessionInfo()


uzh/ezRun documentation built on June 14, 2025, 1:29 p.m.