#' Plot bwa mapping stats
#'
#' Plot bwa mapping rate summrized by samtools
#'
#' Mapping rate of each sample is generated by \code{samtools stats sorted.bam};
#'
#' Out stats is extracted using grep and sed command:
#'
#' \code{grep ^SN | cut -f 2,3 | sed -n '3p;7p;8p;9p;10p;11p;13p;15p;16p;19p;20p;23p;24p;25p;31p;32p;38p;40p'};
#'
#' Stats of all samples in analysis were merged into a single table.
#'
#' @param mapping_stats merged mapping stats file
#' @param sample_limit sample number limit to show detail inform of each sample
#' @param out_prefix output file prefix, default is NULL, don't output file
#' @examples
#' mapping_stats <- system.file("extdata", "all_sample.mapping.xls", package = "omplotr")
#' # show mapping summary
#' om_bwa_mapping_plot(mapping_stats, 5)
#' # show detail sample information
#' om_bwa_mapping_plot(mapping_stats, 10)
om_bwa_mapping_plot <- function(mapping_stats, sample_limit, out_prefix=NULL) {
mapping_plot_order <- c('reads mapped', 'reads mapped and paired',
'reads properly paired', 'reads unmapped')
mapping_df <- read.delim(mapping_stats)
colnames(mapping_df)[1] <- 'Item'
sample_number <- dim(mapping_df)[2] - 1
mapping_df$Item <- stringr::str_replace(mapping_df$Item, ":", "")
mapping_per_df <- mapping_df[, -1]
mapped_reads <- as.numeric(mapping_per_df[1, ])
mapping_per_df <- as.data.frame(t(as.matrix(t(mapping_per_df)) / mapped_reads))
mapping_per_df$Item <- mapping_df$Item
mapping_per_df <- dplyr::filter(mapping_per_df, Item %in% mapping_plot_order)
mapping_per_df$Item <- factor(mapping_per_df$Item,
levels = mapping_plot_order)
m_mapping_per_df <- reshape2::melt(mapping_per_df, id.vars = c('Item'))
if (sample_number <= 9) {
sample_cols <- RColorBrewer::brewer.pal(sample_number, 'Set1')
} else {
sample_cols <- colorRampPalette(RColorBrewer::brewer.pal(9, 'Set1'))(sample_number)
}
if (sample_number < sample_limit) {
map_plot <- ggplot(m_mapping_per_df, aes(Item, value, fill=variable)) +
geom_bar(stat = 'identity', position = position_dodge(),
color='white') +
scale_fill_manual(values = sample_cols)
} else {
map_plot <- ggplot(m_mapping_per_df, aes(Item, value, color=Item)) +
geom_boxplot() +
geom_jitter(alpha=0.8) +
scale_color_manual(values = sample_cols)
}
map_plot <- map_plot +
theme_onmath() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid.major.x = element_blank()) +
xlab('') + ylab('Percentage of Reads') +
guides(color=guide_legend(title = 'SampleID')) +
scale_y_continuous(labels = scales::percent)
if (! is.null(out_prefix)) {
out_dir <- dirname(out_prefix)
path_name <- save_mkdir(out_dir)
save_ggplot(ggplot_out = map_plot,
output = out_prefix)
}
return(map_plot)
}
#' Plot Reads coverage distribution
#'
#' Plot bwa mapping coverage summrized by samtools
#'
#' Mapping coverage of each sample is generated by \code{samtools stats sorted.bam};
#'
#' Then extracted using \code{grep}
#'
#' \code{grep ^COV| cut -f 3,4}
#'
#' @param coverage_table coverage stats table of all samples
#' @param sample_limit sample number limit to show detail inform of each sample
#' @param max_depth Sequencing depth limit to show in plot
#' @param out_prefix output file prefix, default is NULL, don't output file
#'
#' @examples
#'
#' cov_stats <- system.file("extdata", "all_sample.genome.cov.xls", package = "omplotr")
#'
#' # show coverage summary
#' om_reads_cov_plot(cov_stats, 5, 100)
#' # show detail sample information
#' om_reads_cov_plot(cov_stats, 10, 100)
om_reads_cov_plot <- function(coverage_table, sample_limit, max_depth, out_prefix=NULL) {
cov.data <- read.table(coverage_table,header = T, check.names = F)
colnames(cov.data)[1] <- "Depth"
cov_sum <- apply(cov.data[,c(-1)],2,sum)
sample_num <- dim(cov.data)[2] -1
cov_cum <- 1 - sweep(cumsum(cov.data[,c(-1)]),2,cov_sum,`/`)
plot_breaks <- c(seq(0, max_depth/2, max_depth/10), max_depth)
cum.data <- cbind(cov.data$Depth,cov_cum)
colnames(cum.data)[1] <- "Depth"
cum_melt <- reshape2::melt(cum.data,id.vars = 1)
colnames(cum_melt) <- c("Depth","SampleID","value")
sample_col <- palette_colors(pal_name = 'Set1',
sample_num = sample_num)
# sample order by max depth cov desc order
order_df <- dplyr::filter(cum_melt, Depth == max_depth)
order_df <- dplyr::arrange(order_df, dplyr::desc(value))
sample_order <- order_df$SampleID
cum_melt$SampleID <- factor(cum_melt$SampleID, levels = sample_order)
if (sample_num < sample_limit) {
cov_fig <- ggplot(cum_melt, aes(Depth, value, colour = SampleID)) +
geom_line(size = 1) +
scale_x_continuous(breaks = plot_breaks,
limits = c(0, max_depth)) +
scale_color_manual(values = sample_col)
} else {
plot_depth <- seq(max(max_depth/20, 5), max_depth/2, max(max_depth/20, 5))
sel_cum_melt <- dplyr::filter(cum_melt, Depth %in% plot_depth)
cov_fig <- ggplot(sel_cum_melt, aes(Depth, value, group=Depth, color=Depth)) +
geom_boxplot() +
geom_jitter(alpha=0.8, position=position_jitter(0.8)) +
scale_fill_gradient(guide = F)
}
cov_fig <- cov_fig +
scale_y_continuous(labels = scales::percent) +
ylab("Fraction of Reads \u2265 Depth") +
theme_onmath()
if (! is.null(out_prefix)) {
out_dir <- dirname(out_prefix)
path_name <- save_mkdir(out_dir)
save_ggplot(ggplot_out = cov_fig,
output = out_prefix)
}
return(cov_fig)
}
#' Transform variants summary to plot data for ggplot
#'
#' @param stats_name type of summary file
#' @param stats_dir directory of summary file
#' @param impact_map_file snpeff effect impact file
#' @param stats_prefix prefix of summary file
#' @param varRegion_order plot order of variant region
#'
#' @examples
#'
#' stats_names <- om_const_reseq_variant[['var_file_labs']][1:2]
#' stats_dir <- system.file("extdata", "variant_stats", package = "omplotr")
#' impact_map_file <- system.file("extdata", "variant_stats", "snpeff_varEffects.csv", package = "omplotr")
#' test_pie_stats <- lapply(stats_names, om_var_pie_stats, stats_dir=stats_dir, impact_map_file=impact_map_file)
#' head(test_pie_stats[[1]], 4)
om_var_pie_stats <- function(stats_name, stats_dir,
impact_map_file,
varRegion_order=NULL) {
stats_file <- file.path(stats_dir,
paste(stats_name,
'count.summary.txt', sep='.'))
if(! file.exists(stats_file)) {
warning(paste(stats_file, 'Not exists!'))
return(NULL)
}
impact_map <- read.csv(impact_map_file)
var_df <- read.delim(stats_file, row.names = 1)
var_df <- var_df[rowSums(var_df) > 0, ]
var_sum <- apply(var_df,2,sum)
p_var_df <- data.frame(t(t(var_df) / var_sum),
check.names = F)
p_var_df$Type <- rownames(p_var_df)
m_p_var_df <- reshape2::melt(p_var_df, id.vars = 'Type')
m_p_var_df$Percentage <- str_percent(m_p_var_df$value)
m_p_var_df$fig <- stats_name
m_p_var_df$Type <- stringr::str_replace(m_p_var_df$Type, ' ', '')
if (stats_name == 'varRegion') {
m_p_var_df <- dplyr::filter(m_p_var_df, ! Type %in% c('GENE', 'TRANSCRIPT'))
m_p_var_df$Type <- factor(m_p_var_df$Type,
levels = varRegion_order)
} else if (stats_name == 'varEffects') {
m_p_var_col <- colnames(m_p_var_df)
m_p_var_df <- merge(m_p_var_df, impact_map,
by.x = 'Type', by.y = 'effect')
m_p_var_df$fig <- paste(m_p_var_df$fig,
m_p_var_df$impact,
sep = '-')
m_p_var_df <- m_p_var_df[, m_p_var_col]
}
return(m_p_var_df)
}
#' Pie Plot of variant stats
#'
#' Using ggplot2 to generate pie plot of variant summary.
#' Variant summary data is generated using \code{om_var_pie_stats}.
#'
#' @param sample_var_stats variant stats of each sample
#' @param color_pal color palette name, default is "Set2"
#' @param out_prefix output prefix, default is NULL, no output
#'
#' @examples
#'
#' # pie plot data format
#' a_om_test_var_stats <- dplyr::filter(om_test_var_stats[[1]], variable == 'A')
#' head(a_om_test_var_stats)
#' # plot
#' om_var_pie_plot(a_om_test_var_stats)
om_var_pie_plot <- function(sample_var_stats, color_pal='Set2', out_prefix=NULL) {
type_num <- length(sample_var_stats$Type)
sample_name <- unique(sample_var_stats$variable)
plot_type <- unique(sample_var_stats$fig)
type_color <- palette_colors(pal_name = color_pal,
sample_num = type_num)
names(type_color) <- sample_var_stats$Type
lg_label <- paste(sample_var_stats$Type,
' (',
sample_var_stats$Percentage,
')', sep = '')
names(lg_label) <- sample_var_stats$Type
pie <- ggplot(sample_var_stats, aes(x = "", y=value, fill = Type)) +
geom_bar(width = 1, stat = "identity", color='white') +
coord_polar("y", start = 0) +
blank_theme() + theme(axis.text.x=element_blank()) +
scale_fill_manual(values = type_color,
labels = lg_label) +
guides(fill=guide_legend(title = '')) +
ggtitle(paste(sample_name, plot_type, 'Plot'))
if (! is.null(out_prefix)) {
out_dir <- dirname(out_prefix)
path_name <- save_mkdir(out_dir)
save_ggplot(ggplot_out = pie,
output = out_prefix)
}
return(pie)
}
#' Variant summary plot
#'
#' Variant portion distribution summary plot using ggplot2.
#' Variant summary data is generated using \code{om_var_pie_stats}.
#'
#' @param var_stats_df variant stats dataframe
#' @param out_prefix output prefix, default is NULL, no output
#'
#' @examples
#'
#' om_test_var_stats_df <- plyr::ldply(om_test_var_stats, data.frame)
#' om_var_summary_plot(om_test_var_stats_df)
om_var_summary_plot <- function(var_stats_df,
out_prefix=NULL,
width = 30,
height = 12) {
type_num <- length(unique(var_stats_df$Type))
box_col <- palette_colors(pal_name = 'Set1',
sample_num = type_num)
stats_summary <- ggplot(var_stats_df, aes(Type, value, color=Type)) +
geom_jitter(alpha=0.5) +
geom_boxplot() +
coord_flip() +
facet_wrap(~fig, scales = 'free', ncol = 5) +
guides(color=F) +
scale_y_continuous(labels = scales::percent) +
scale_color_manual(values = box_col) +
theme_onmath() +
xlab('') + ylab('')
if (! is.null(out_prefix)) {
out_dir <- dirname(out_prefix)
path_name <- save_mkdir(out_dir)
save_ggplot(ggplot_out = stats_summary,
output = out_prefix,
width = width,
height = height)
}
return(stats_summary)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.