################################
###### get and save SAMPLE_QC_summary ####
#config <- conf; result_format = 'rds';
#sample_basename <- "DA01"; sample_basename <- "small_FAKE01";
getSampleQCSummary <- function(sample_basename, config, result_format = c('bed','rds')){
result_format <- checkFormat(result_format, supported = c('bed','rds'))
config_tools <- read.csv(file.path(config$tools_path, config$tools_config), stringsAsFactors = FALSE)
sampleQC <- list()
sampleQC$Sample_ID <- sample_basename
### trimmomatic
trimmomatic_out_logfile <- file.path(config$results_path,"logs",paste0(sample_basename,"_",config_tools[config_tools$tool=="trimmomatic","logfile"]))
if(file.exists(trimmomatic_out_logfile)){
trim_log <- readLines(trimmomatic_out_logfile)
trim_log <- str_split(trim_log[startsWith(trim_log, "Input Read Pairs")], " ")
sampleQC$Input_read_pairs <- as.double(trim_log[[1]][4])
sampleQC$Read_Pairs_Surviving_trimming <- as.double(trim_log[[1]][7])
sampleQC$Prc_Read_Pairs_Surviving_trimming <- 100 * sampleQC$Read_Pairs_Surviving_trimming/sampleQC$Input_read_pairs
}else{
print(paste0("File: ", trimmomatic_out_logfile, " does not exist!"))
}
### top_rmdups
rmdups_result_file <- file.path(config$results_path, config_tools[config_tools$proces=="mark_duplicates_top", "temp_results_dirs"], sample_basename)
top_rmdups_result_file <- file.path(paste0(rmdups_result_file,".top.rmdups_metrics.txt"))
if(file.exists(top_rmdups_result_file)){
top_dup <- readLines(top_rmdups_result_file)
top_dup <- makeDataFrame(data = unlist(str_split(top_dup[startsWith(top_dup, "SAMPLE")], "\t")),
header = tolower(unlist(str_split(top_dup[startsWith(top_dup, "LIBRARY")], "\t"))))
sampleQC$Prc_duplicated_reads_top <- as.numeric(top_dup[,"percent_duplication"])*100
}else{
print(paste0("File: ", top_rmdups_result_file, " does not exist!"))
}
### bottom_rmdups
bottom_rmdups_result_file <- file.path(paste0(rmdups_result_file,".bottom.rmdups_metrics.txt"))
if(file.exists(bottom_rmdups_result_file)){
bottom_dup <- readLines(bottom_rmdups_result_file)
bottom_dup <- makeDataFrame(data = unlist(str_split(bottom_dup[startsWith(bottom_dup, "SAMPLE")], "\t")),
header = tolower(unlist(str_split(bottom_dup[startsWith(bottom_dup, "LIBRARY")], "\t"))))
sampleQC$Prc_duplicated_reads_bottom <- as.numeric(bottom_dup[,"percent_duplication"])*100
}else{
print(paste0("File: ", bottom_rmdups_result_file, " does not exist!"))
}
### basic_mapping
basic_mapping_metrics_result_file <- file.path(config$results_path, config_tools[config_tools$proces=="basic_mapping_metrics","temp_results_dirs"], sample_basename)
basic_mapping_metrics_result_file <- paste0(basic_mapping_metrics_result_file,"_basic_mapping_metrics.txt")
if(file.exists(basic_mapping_metrics_result_file)){
metrics <- readLines(basic_mapping_metrics_result_file)
headerStartLine <- which(startsWith(metrics,"CATEGORY"))
metrics_df <- makeDataFrame(data = unlist(str_split(metrics[headerStartLine + 1], "\t")),
header = tolower(unlist(str_split(metrics[headerStartLine], "\t"))))
metrics_df <- rbind(metrics_df, unlist(str_split(metrics[headerStartLine + 2], "\t")))
metrics_df <- rbind(metrics_df, unlist(str_split(metrics[headerStartLine + 3], "\t")))
sampleQC$Number_of_reads_after_removing_duplicates <- metrics_df[metrics_df$category == "PAIR", "pf_reads"]
}else{
print(paste0("File: ", basic_mapping_metrics_result_file, " does not exist!"))
}
### flagstat
flagstat_result_file <- file.path(file.path(config$results_path,"logs"), paste0(sample_basename,"_",config_tools[config_tools$process=="flagstat_filtered_bam","logfile"]))
if(file.exists(flagstat_result_file)){
metrics <- readLines(flagstat_result_file)
metrics <- unlist(str_split(metrics[1]," "))
sampleQC$Number_of_reads_after_filtering <- as.numeric(metrics[1])
sampleQC$Prc_passed_filtering_step <- 100 * as.numeric(sampleQC$Number_of_reads_after_filtering)/as.numeric(sampleQC$Number_of_reads_after_removing_duplicates)
}else{
print(paste0("File: ", flagstat_result_file, " does not exist!"))
}
### on_target_reads
on_target_result_file <- file.path(config$results_path, config_tools[config_tools$proces=="on_target_reads","temp_results_dirs"], sample_basename)
on_target_result_file <- paste0(on_target_result_file,"_on_target_reads.txt")
if(file.exists(on_target_result_file)){
sampleQC$Number_of_on_target_reads <- as.numeric(yaml::yaml.load_file(on_target_result_file)[['number_of_on_target_reads']])
sampleQC$Prc_of_on_target_reads <- 100 * sampleQC$Number_of_on_target_read/as.numeric(sampleQC$Number_of_reads_after_removing_duplicates)
}else{
print(paste0("File: ", on_target_result_file, " does not exist!"))
}
### depth_of_cov
depth_of_cov_result_file <- file.path(config$results_path, config_tools[config_tools$proces=="depth_of_coverage","temp_results_dirs"], sample_basename)
depth_of_cov_result_file <- paste0(depth_of_cov_result_file,"_gatk_target_coverage.sample_summary")
if(file.exists(depth_of_cov_result_file)){
depth_summary <- readLines(depth_of_cov_result_file)
depth_summary <- makeDataFrame(data = unlist(str_split(depth_summary[2], "\t")),
header = tolower(unlist(str_split(depth_summary[1], "\t"))))
sampleQC$Mean_coverage <- as.numeric(depth_summary[1, "mean"])
}else{
print(paste0("File: ", on_target_result_file, " does not exist!"))
}
### Calculate conversion efficiency
qc_added <- FALSE
for(i in 1:length(config$meth_tool)){
methyl_result_file <- file.path(config$results_path, config_tools[config_tools$proces==config$meth_tool[i],"temp_results_dirs"], sample_basename)
methyl_result_data <- as.data.frame(readMethResult(paste0(methyl_result_file,".methylation_results.", result_format), version=2))
#head(methyl_result_data)
if(!is.null(methyl_result_data)){
qc <- get_conversion_efficiency(methyl_result_data, config)
if(!qc_added){
sampleQC <- c(sampleQC, qc)
qc_added <- TRUE
}
names(qc) <- paste0(names(qc),"_",config$meth_tool[i])
sampleQC <- c(sampleQC, qc)
}else{
print(paste0("methyl_result_data is empty!"))
}
}
return(sampleQC)
}
####################################################
###### get_conversion_efficiency ####
get_conversion_efficiency <- function(methyl_data, config){
qc <- list()
#go over all control names defined in config$ref_control_sequence_name
for(i in 1:length(config$ref_control_sequence_name)){
control_name <- config$ref_control_sequence_name[i]
control <- methyl_data[methyl_data$chr %in% control_name,]
conversion_eff <- c(100 * (1-sum(control$numCs)/sum(control$numTs)))
qc[paste0('Number_of_Cs_in_control_',control_name)] <- nrow(control)
qc[paste0('Conversion_eff_',control_name)] <- conversion_eff
#qc$Number_of_Cs_in_control <- nrow(control)
#qc$Conversion_eff <- conversion_eff
}
#Add to QC Sample report
methyl_res_panel_df_no_control <- methyl_data[!methyl_data$chr %in% config$ref_control_sequence_name,]
qc$Number_of_Cs_in_panel <- nrow(methyl_res_panel_df_no_control)
methyl_res_panel_df_no_control_CpG <- methyl_res_panel_df_no_control[methyl_res_panel_df_no_control$context %in% c('CG'),]
qc$Number_of_Cs_in_panel_CpG <- nrow(methyl_res_panel_df_no_control_CpG)
methyl_res_panel_df_no_control_nonCpG <- methyl_res_panel_df_no_control[methyl_res_panel_df_no_control$context %in% c('CHG','CHH'),]
qc$Number_of_Cs_in_panel_non_CpG <- nrow(methyl_res_panel_df_no_control_nonCpG)
qc$Number_of_Cs_in_panel_CpG_cov_min10 <- nrow(filterMethResult(methyl_data, config$ref_control_sequence_name, context = c('CG'), min_coverage = 10))
qc$Number_of_Cs_in_panel_non_CpG_cov_min10 <- nrow(filterMethResult(methyl_data, config$ref_control_sequence_name, context = c('CHG','CHH'), min_coverage = 10))
#methyl_res_panel_df_no_control_CpG_max9 <- methyl_res_panel_df_no_control_CpG[methyl_res_panel_df_no_control_CpG$coverage<10,]
qc$Number_of_Cs_in_panel_CpG_cov_max9 <- sum(methyl_res_panel_df_no_control_CpG$coverage<10)
qc$Prc_of_Cs_in_panel_CpG_cov_min10 <- 100 * (qc$Number_of_Cs_in_panel_CpG_cov_min10/qc$Number_of_Cs_in_panel_CpG)
qc$Prc_of_Cs_in_panel_CpG_cov_max9 <- 100 * (qc$Number_of_Cs_in_panel_CpG_cov_max9/qc$Number_of_Cs_in_panel_CpG)
return(qc)
}
####################################################
###### Table of QC common for all present samples ####
createAllQCSummary <- function(config){
config_tools <- read.csv(file.path(config$tools_path, config$tools_config), stringsAsFactors = FALSE)
samples <- list.files(file.path(config$results_path,"logs"),full.names = F)
samples <- samples[endsWith(samples, config_tools[config_tools$tool=="trimmomatic","logfile"])]
samples <- str_replace_all(samples, pattern = paste0("_",config_tools[config_tools$tool=="trimmomatic","logfile"]),replace = "")
for(s in samples){
print(paste0("Processing sample: ", s))
sampleQC <- getSampleQCSummary(s, config)
}
return(T)
}
####################################################
###### Table of QC common for all present samples ####
readAllQCSummary <- function(config, save = T){
input_dir <- file.path(config$results_path, "QC_report")
input_files <- list.files(input_dir, pattern = "\\_QC_summary.yml$", full.names = T)
if(length(input_files) == 0)
stop(paste0("There is no sample result files to create the report. Directory", input_dir))
qc_summary <- lapply(input_files, function(x) as.data.frame(yaml::read_yaml(x), stringsAsFactors = F))
qc_summary <- data.frame(rbindlist(qc_summary))
no_numeric_cols <- c("Sample_ID", "processing_time", "methratio_time", "bssnper_time")
for(curcol in names(qc_summary)){
if(!curcol %in% no_numeric_cols){
qc_summary[,curcol] <- as.numeric(qc_summary[,curcol])
}
}
if(save){
fwrite(qc_summary, file = file.path(config$results_path,"QC_report/","SummaryQC.csv"))
}
return(qc_summary)
}
####################################################
#### PLOT: Number of sites in CpG context covered by more/less than 10
plotSitesCpG <- function(cov_summary_data, config, meth_tool = NA, min_coverage = 10, pal = brewer.pal(8, "Dark2"), share = F, save = T, fontsize = 10){
if(is.na(meth_tool))
meth_tool <- config$meth_tool[1]
cov <- min_coverage
if(!cov %in% cov_summary_data$min_coverage){
warning(paste0('Coverage: ',cov, ' is not present in input cov_summary_data.'))
return(NULL)
}
cov_summary_data <- cov_summary_data[cov_summary_data$context == 'CpG' & (cov_summary_data$min_coverage == cov | cov_summary_data$max_coverage == cov),]
cov_summary_data$coverage <- paste0("cov>=",cov)
cov_summary_data$coverage[!is.na(cov_summary_data$max_coverage) & cov_summary_data$max_coverage == cov] <- paste0("cov<",cov)
cov_summary_data$coverage <- factor(cov_summary_data$coverage, levels = c(paste0("cov<",cov), paste0("cov>=",cov)))
gg <- ggplot(cov_summary_data, aes(fill=coverage, y=value, x=SampleID))
if(share){
gg <- gg + geom_bar(stat="identity", position="fill")
}else{
gg <- gg + geom_bar(stat="identity")
}
gg <- gg + theme_minimal() +
theme_light() + scale_fill_manual(values=pal[1:2]) +
theme(text = element_text(size = fontsize)) +
theme(axis.text.x = element_text(angle = 60, hjust = 1, size = fontsize)) +
theme(axis.text.y = element_text(hjust = 1, size = fontsize)) +
theme(legend.text = element_text(size = fontsize)) +
ylab("Number of sites") +
xlab("Sample ID") +
ggtitle(paste0("Number of sites in CpG context \ncovered by more/less than ", cov))
if(save){
ggsave(filename = file.path(config$results_path,"QC_report/",paste0(meth_tool,'_',"SummarySitesCovBy",cov,'.',config$plot_format)), plot = gg, device=config$plot_format)
}
return(gg)
}
####################################################
#### PLOT: Number of sites covered by minimum 10 reads \nseparately for sites in CpG and non-CpG context
plotSitesNonCpG <- function(cov_summary_data, config, meth_tool = NA, min_coverage = 10, pal = brewer.pal(8, "Dark2"), share = F, save = T, fontsize = 10){
if(is.na(meth_tool))
meth_tool <- config$meth_tool[1]
cov <- min_coverage
if(!cov %in% cov_summary_data$min_coverage){
warning(paste0('Coverage: ',cov, ' is not present in input cov_summary_data.'))
return(NULL)
}
cov_summary_data <- cov_summary_data[cov_summary_data$min_coverage == cov,]
cov_summary_data$context <- factor(cov_summary_data$context, levels = c("CpG","non-CpG"))
gg <- ggplot(cov_summary_data, aes(fill=context, y=value, x=SampleID))
if(share){
gg <- gg + geom_bar(stat="identity", position="fill")
}else
gg <- gg + geom_bar(stat="identity")
gg <- gg + theme_minimal() +
theme_light() + scale_fill_manual(values=pal[3:4]) +
theme(text = element_text(size = fontsize)) +
theme(axis.text.x = element_text(angle = 60, hjust = 1, size = fontsize)) +
theme(axis.text.y = element_text(hjust = 1, size = fontsize)) +
theme(legend.text = element_text(size = fontsize)) +
ylab("Number of sites") +
xlab("Sample ID") +
ggtitle(paste0("Number of cytosines covered by at least ",cov," reads \nin the CpG and non-CpG context"))
if(save){
ggsave(filename=file.path(config$results_path,"QC_report/",paste0(meth_tool,'_',"SummarySitesCovBy",cov,"CpGnonCpG.",config$plot_format)), plot=gg, device=config$plot_format)
}
return(gg)
}
####################################################
###### Plot Basic MethylKit stats for one sample #######
plotSingleMethStats <- function(single_meth_data){
par(mfrow=c(1,2))
getMethylationStats(single_meth_data, plot=T, both.strands=F, labels=F)
grid()
getCoverageStats(single_meth_data, plot=T, both.strands=F, labels=F)
grid()
par(mfrow=c(1,1))
return(1)
}
####################################################
###### Generate and save Basic MethylKit stats for all samples #######
plotMethStats <- function(meth_data, config, meth_tool = NA, pal = brewer.pal(8, "Dark2"), save = T){
if(is.na(meth_tool))
meth_tool <- config$meth_tool[1]
for(i in 1:length(meth_data)){
openPlotFile(file.path(config$results_path, 'QC_report', paste0(meth_tool,"_",meth_data[[i]]@sample.id,"_histCpGStats.", config$plot_format)))
plotSingleMethStats(meth_data[[i]])
dev.off()
}
return(length(meth_data))
}
####################################################
###### PLOT: Generate and save Coverage boxplot for all samples (by default log10 coverage)
plotMethStatsSummary <- function(meth_data, config, meth_tool = NA, log = TRUE, pal = brewer.pal(8, "Dark2"), save = T, fontsize = 10){
if(is.na(meth_tool))
meth_tool <- config$meth_tool[1]
sampleCov <- lapply(meth_data, function(x){
data.frame(coverage = x$coverage, sample_id = x@sample.id)
})
sampleCov <- rbindlist(sampleCov)
sampleCov$sample_id <- factor(sampleCov$sample_id)
xlab <- "Read coverage per base"
outfile_sufix <- ""
if(log){
sampleCov$coverage = log10(sampleCov$coverage)
xlab <- "Read coverage per base [log10]"
outfile_sufix <- "Log10"
}
gg <- ggplot(sampleCov, aes(x=sample_id, y=coverage)) +
geom_boxplot(outlier.colour="darkred", outlier.shape=1, outlier.size=0.5, fill = pal[1], color="black") +
theme_minimal() +
theme(text = element_text(size = fontsize)) +
theme(axis.text.x = element_text(angle = 60, hjust = 1, size = fontsize)) +
theme(axis.text.y = element_text(hjust = 1, size = fontsize)) +
theme(legend.text=element_text(size = fontsize)) +
#theme(plot.title = element_text(hjust = 0.5, size = 14)) +
ggtitle("CpG coverage for the experiment") +
scale_y_continuous(name = xlab) +
scale_x_discrete(name = "Sample ID")
if(save){
plotfile <- file.path(config$results_path,'QC_report',paste0(meth_tool,'_','SummaryCpGCoverage',outfile_sufix,".",config$plot_format))
ggsave(plotfile, gg)
}
return(gg)
}
####################################################
### PLOT: Beta Values Boxplot
#config <- conf; meth_data <- methData; sample_size = 100000; pal = brewer.pal(8, "Dark2"); save = T; fontsize = 10;
plotBetaValuesSummary <- function(meth_data, config, meth_tool = NA, sample_size = 100000, pal = brewer.pal(8, "Dark2"), save = T, fontsize = 10){
if(is.na(meth_tool))
meth_tool <- config$meth_tool[1]
data_context <- attr(meth_data, 'context')
plot_title <- paste0("Beta values of ", data_context, " sites across samples")
head(meth_data)
percentage_meth <- lapply(meth_data, function(x){x$numCs/x$coverage})
methyl_levels <- list()
for (i in 1:length(meth_data)){
methyl_levels[[i]] <- data.frame(sample.id = meth_data[[i]]@sample.id,
beta_values = as.numeric(percentage_meth[[i]]),
stringsAsFactors = F)
if(sample_size > 0 & !is.na(sample_size) & sample_size < nrow(methyl_levels[[i]])){
methyl_levels[[i]] <- (methyl_levels[[i]])[sample(nrow(methyl_levels[[i]]), sample_size), ]
}
}
methyl_levels <- rbindlist(methyl_levels)
methyl_levels <- methyl_levels[order(methyl_levels$sample.id),]
methyl_levels <- data.frame(methyl_levels)
methyl_levels$sample.id <- factor(methyl_levels$sample.id, levels = sort(unique(methyl_levels$sample.id)))
gg <- ggplot(methyl_levels, aes(x=sample.id, y=beta_values)) +
geom_boxplot(outlier.colour="red", outlier.shape=42, outlier.size=3, notch=FALSE, fill=pal[1], color="black") +
theme_minimal() +
theme(text = element_text(size = fontsize)) +
theme(axis.text.x = element_text(angle = 60, hjust = 1, size = fontsize)) +
theme(axis.text.y = element_text(hjust = 1, size = fontsize)) +
theme(legend.position="none") +
ggtitle(plot_title) +
xlab("Sample ID") +
ylab("Beta value")
if(save){
plotfile <- file.path(config$results_path,'QC_report', paste0(meth_tool,'_','BetaValuesSummary', str_replace(data_context,"-",""), '.', config$plot_format))
ggsave(plotfile, gg)
}
return(gg)
}
####################################################
### PLOT: Methylation Levels
plotMethLevels <- function(meth_data, config, meth_tool = NA, breaks = c(0,0.1,0.2,0.4,0.6,0.8,0.9,1), pal = brewer.pal(8, "Dark2"), share = F, save = T, fontsize = 10){
if(is.na(meth_tool))
meth_tool <- config$meth_tool[1]
percentage_meth <- lapply(meth_data, function(x){x$numCs/x$coverage } )
methyl_levels <- list()
for (i in 1:length(meth_data)){
cnt <- table(cut(percentage_meth[[i]], breaks = breaks, right = F, include.lowest = T))
methyl_levels[[i]] <- data.frame(sample.id = meth_data[[i]]@sample.id,
frequency = as.numeric(cnt),
methyl_level = paste0(gsub("]",'',gsub(")",'',gsub(",",'-',gsub("\\[",'',names(cnt))))),"%"),
stringsAsFactors = F)
}
methyl_levels <- rbindlist(methyl_levels)
methyl_levels <- methyl_levels[order(methyl_levels$sample.id),]
methyl_levels <- data.frame(methyl_levels)
class(methyl_levels$sample.id)
methyl_levels$sample.id <- factor(methyl_levels$sample.id, levels = sort(unique(methyl_levels$sample.id)))
methyl_levels$methyl_level <- factor(methyl_levels$methyl_level, levels = rev(sort(unique(methyl_levels$methyl_level))))
gg <- ggplot(methyl_levels, aes(fill=methyl_level, y=frequency, x=sample.id))
if(share){
gg <- gg + geom_bar(stat="identity", position="fill")
}else
gg <- gg + geom_bar(stat="identity")
gg <- gg + theme_minimal() +
scale_fill_manual(values = pal) +
theme(text = element_text(size = fontsize)) +
theme(axis.text.x = element_text(angle = 60, hjust = 1, size = fontsize)) +
theme(axis.text.y = element_text(hjust = 1, size = fontsize)) +
theme(legend.text=element_text(size = fontsize)) +
ggtitle("Frequency of CpGs according to \nspecific methylation level ranges") +
xlab("Sample ID") +
ylab("Frequency")
if(save){
plotfile <- file.path(config$results_path,'QC_report',paste0(meth_tool,"_",'SummaryMethylationLevel.',config$plot_format))
ggsave(plotfile, gg)
}
return(gg)
}
####################################################
###### PLOT: Hyper/Hypo CpG genom & islands annotation for each sample.
#meth_data <- methData
#config <- conf
plotCpGAnnotation <- function(meth_data, config, meth_tool = NA, hypo_hyper_def = c(0.2,0.8), pal = brewer.pal(8, "Dark2"), share = F, save = T, fontsize = 10){
if(is.na(meth_tool))
meth_tool <- config$meth_tool[1]
cpg.gene <- NULL
cpg.island <- NULL
if(str_trim(config$ref_data_CpGGenomAnnotation) != ""){
cpg.gene <- readTranscriptFeatures(file.path(config$ref_data_path, config$ref_data_CpGGenomAnnotation))
}
if(str_trim(config$ref_data_CpgIslandAnnotation) != ""){
cpg.island <- readFeatureFlank(file.path(config$ref_data_path, config$ref_data_CpgIslandAnnotation), feature.flank.name=c("CpGi","shores"))
}
percentage_meth <- lapply(meth_data, function(x){ x$numCs/x$coverage } )
meth_data_hypo <- list()
meth_data_hyper <- list()
for (i in 1:(length(meth_data))){
meth_data_hypo[[meth_data[[i]]@sample.id]] <- meth_data[[i]][percentage_meth[[i]] < hypo_hyper_def[1]]
meth_data_hyper[[meth_data[[i]]@sample.id]] <- meth_data[[i]][percentage_meth[[i]] >= hypo_hyper_def[2]]
}
plotList <- list()
if(!is.null(cpg.gene)){
plotList[[length(plotList)+1]] <- plotCpGGenomAnnotation(meth_data_hyper, config, meth_tool, cpg.gene, pal, subtitle = "Hypermethylated", share, save, fontsize)
plotList[[length(plotList)+1]] <- plotCpGGenomAnnotation(meth_data_hypo, config, meth_tool, cpg.gene, pal, subtitle = "Hypomethylated", share, save, fontsize)
}
if(!is.null(cpg.island)){
plotList[[length(plotList)+1]] <- plotCpGIslandsAnnotation(meth_data_hyper, config, meth_tool, cpg.island, pal, subtitle = "Hypermethylated", share, save, fontsize)
plotList[[length(plotList)+1]] <- plotCpGIslandsAnnotation(meth_data_hypo, config, meth_tool, cpg.island, pal, subtitle = "Hypomethylated", share, save, fontsize)
}
return(plotList)
}
####################################################
###### PLOT: Hyper/Hypo CpG genom annotation for each sample.
plotCpGGenomAnnotation <- function(meth_data, config, meth_tool = NA, gene_annot_data, pal = brewer.pal(8, "Dark2"), subtitle = "", share = F, save = T, fontsize = 10){
if(is.na(meth_tool))
meth_tool <- config$meth_tool[1]
annot_summary <- lapply(meth_data, function(x){
annot <- annotateWithGeneParts(as(x,"GRanges"), gene_annot_data)
d <- data.frame(reshape2::melt(t(getTargetAnnotationStats(annot, percentage=TRUE,precedence=F))))
d$Var1 <- x@sample.id
names(d) <- c("Sample_Id", "Gene_Part", "Frequency")
return(d)
})
annot_summary <- rbindlist(annot_summary)
annot_summary <- annot_summary[order(annot_summary$Sample_Id),]
annot_summary$Sample_Id <- factor(annot_summary$Sample_Id, levels = sort(unique(annot_summary$Sample_Id)))
gg <- ggplot(annot_summary, aes(fill=Gene_Part, y=Frequency, x=Sample_Id))
if(share){
gg <- gg + geom_bar(stat="identity", position="fill")
}else{
gg <- gg + geom_bar(stat="identity")
}
gg <- gg + theme_minimal() +
scale_fill_manual(values = pal) +
theme(text = element_text(size = fontsize)) +
theme(axis.text.x = element_text(angle = 60, hjust = 1, size = fontsize)) +
theme(axis.text.y = element_text(hjust = 1, size = fontsize)) +
theme(legend.text=element_text(size = fontsize)) +
ggtitle(paste(subtitle,"CpGs annotated to \ngenomic regions")) +
xlab("Sample ID") +
ylab("Frequency")
if(save){
ggsave(filename=file.path(config$results_path,"QC_report/",paste0(meth_tool,"_","Summary",subtitle,"CpGGenomAnnotation.",config$plot_format)), plot=gg, device=config$plot_format)
}
return(gg)
}
####################################################
###### PLOT: Hyper/Hypo CpG islands annotation for each sample.
plotCpGIslandsAnnotation <- function(meth_data, config, meth_tool = NA, cpg_annot_data, pal = brewer.pal(8, "Dark2"), subtitle = "", share = F, save = T, fontsize = 10){
if(is.na(meth_tool))
meth_tool <- config$meth_tool[1]
annot_summary <- lapply(meth_data, function(x){
annot <- annotateWithFeatureFlank(as(x,"GRanges"), cpg_annot_data$CpGi, cpg_annot_data$shores, feature.name="CpGi", flank.name="shores")
d <- data.frame(reshape2::melt(t(getTargetAnnotationStats(annot, percentage=TRUE,precedence=F))))
d$Var1 <- x@sample.id
names(d) <- c("Sample_Id", "Region", "Frequency")
return(d)
})
annot_summary <- rbindlist(annot_summary)
annot_summary <- annot_summary[order(annot_summary$Sample_Id),]
annot_summary$Sample_Id <- factor(annot_summary$Sample_Id, levels = sort(unique(annot_summary$Sample_Id)))
gg <- ggplot(annot_summary, aes(fill=Region, y=Frequency, x=Sample_Id))
if(share){
gg <- gg + geom_bar(stat="identity", position="fill")
}else{
gg <- gg + geom_bar(stat="identity")
}
gg <- gg + theme_minimal() +
scale_fill_manual(values = pal) +
theme(text = element_text(size = fontsize)) +
theme(axis.text.x = element_text(angle = 60, hjust = 1, size = fontsize)) +
theme(axis.text.y = element_text(hjust = 1, size = fontsize)) +
theme(legend.text=element_text(size = fontsize)) +
ggtitle(paste(subtitle,"CpGs annotated to \nCpG-islands")) +
xlab("Sample ID") +
ylab("Frequency")
if(save){
ggsave(filename=file.path(config$results_path,"QC_report/",paste0(meth_tool,"_","Summary",subtitle,"CpGIslandsAnnotation.",config$plot_format)), plot=gg, device=config$plot_format)
}
return(gg)
}
###################################################
##### Number of Common CpG Shared Between Samples
plotCntCommonCpG <- function(meth_data, config, meth_tool = NA, pal = brewer.pal(8, "Dark2"), save = T, fontsize = 10){
if(is.na(meth_tool))
meth_tool <- config$meth_tool[1]
commonCpg <- lapply(meth_data, function(x){
data.frame(cpg = paste0(x$chr, "_", x$start))
})
commonCpg <- as.data.frame(table(rbindlist(commonCpg)))
gg <- ggplot(commonCpg, aes(x=Freq)) +
geom_bar(fill = pal[1]) +
theme_minimal() +
scale_x_continuous(breaks = round(seq(min(1), max(length(meth_data), by = 1),1))) +
theme(text = element_text(size = fontsize)) +
theme(axis.text.x = element_text(angle = 60, hjust = 1, size = fontsize)) +
theme(axis.text.y = element_text(hjust = 1, size = fontsize)) +
theme(legend.text=element_text(size = fontsize)) +
ggtitle("Common CpG shared between samples") +
xlab("Number of samples") +
ylab("Number of common CpG")
if(save){
ggsave(filename=file.path(config$results_path,"QC_report/",paste0(meth_tool,"_","SummaryCntCommonCpG.",config$plot_format)), plot=gg, device=config$plot_format)
}
return(gg)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.