This document contains all the analyses done on RNA data that was generated for Workman et al paper. Knit this R markdown file after you have successfully run drake::r_make()
.
knitr::opts_chunk$set(echo = TRUE) save_figures <- FALSE
Load the required libraries first:
pacman::p_load(pander, drake, knitr, ggpubr, here, tidyverse)
# functions for plotting double single and double-sided violins "%||%" <- function(a, b) { if (!is.null(a)) a else b } geom_one_sided_flat_violin <- function(mapping = NULL, data = NULL, stat = "ydensity", position = "dodge", trim = TRUE, scale = "area", show.legend = NA, inherit.aes = TRUE, ...) { layer(data = data, mapping = mapping, stat = stat,geom = GeomFlatViolinOneSided, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list(trim = trim, scale = scale, ...)) } #' @rdname ggplot2-ggproto #' @format NULL #' @usage NULL #' @export GeomFlatViolinOneSided <- ggproto("GeomFlatViolin", Geom, setup_data = function(data, params) { data$width <- data$width %||% params$width %||% (resolution(data$x, FALSE) * 0.9) # ymin, ymax, xmin, and xmax define the bounding rectangle for each group data %>% group_by(group) %>% mutate(ymin = min(y), ymax = max(y), xmin = x, xmax = x - width / 2) }, draw_group = function(data, panel_scales, coord) { # Find the points for the line to go all the way around data <- transform(data, xminv = x, xmaxv = x + violinwidth * (xmax - x)) # Make sure it's sorted properly to draw the outline newdata <- rbind(plyr::arrange(transform(data, x = xminv), y), plyr::arrange(transform(data, x = xmaxv), -y)) # Close the polygon: set first and last point the same # Needed for coord_polar and such newdata <- rbind(newdata, newdata[1,]) ggplot2:::ggname("geom_flat_violin", GeomPolygon$draw_panel(newdata, panel_scales, coord)) }, draw_key = draw_key_polygon, default_aes = aes(weight = 1, colour = "grey20", fill = "white", size = 0.5, alpha = NA, linetype = "solid"), required_aes = c("x", "y") ) geom_two_sided_flat_violin <- function(mapping = NULL, data = NULL, stat = "ydensity", position = "dodge", trim = TRUE, scale = "area", show.legend = NA, inherit.aes = TRUE, ...) { layer(data = data, mapping = mapping, stat = stat, geom = GeomFlatViolinTwoSided, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list(trim = trim, scale = scale, ...)) } #' @rdname ggplot2-ggproto #' @format NULL #' @usage NULL #' @export GeomFlatViolinTwoSided <- ggproto("GeomFlatViolin", Geom, setup_data = function(data, params) { data$width <- data$width %||% params$width %||% (resolution(data$x, FALSE) * 0.9) # ymin, ymax, xmin, and xmax define the bounding rectangle for each group data %>% group_by(group) %>% mutate(ymin = min(y), ymax = max(y), xmin = x, xmax = x - width / 2) %>% mutate(xmax = ifelse((group %% 2) == 0, x + width / 2, xmax) ) }, draw_group = function(data, panel_scales, coord) { # Find the points for the line to go all the way around data <- transform(data, xminv = x, xmaxv = x + violinwidth * (xmax - x)) # Make sure it's sorted properly to draw the outline newdata <- rbind(plyr::arrange(transform(data, x = xminv), y), plyr::arrange(transform(data, x = xmaxv), -y)) # Close the polygon: set first and last point the same # Needed for coord_polar and such newdata <- rbind(newdata, newdata[1,]) ggplot2:::ggname("geom_flat_violin", GeomPolygon$draw_panel(newdata, panel_scales, coord)) }, draw_key = draw_key_polygon, default_aes = aes(weight = 1, colour = "grey20", fill = "white", size = 0.5, alpha = NA, linetype = "solid"), required_aes = c("x", "y") ) panderOptions('table.alignment.default', 'left') panderOptions('table.alignment.rownames', 'left')
Now load the data:
loadd(rna_wo_data)
Here is a description of columns of dna_kr_data
:
col_names_df <- data.frame(Columns = c("dataset", "read_id", "tail_start_tf", "tail_end_tf", "samples_per_nt_tf", "tail_length_tf", "tail_start_np", "tail_end_np" , "read_rate_np", "tail_length_np", "qc_tag_np", "samples_per_nt_np", "barcode"), Description = c("Specifies the name of the conditions in Workman et al. data (N.B.: 60x and 60xb were combined into a single condition called 60x)", "Read ID", "tailfindr estimate of tail start", "tailfindr estimate of tail end", "tailfindr estimation of read_specific transloation rate in units of samples per nucleotide", "tailfindr tail length estimate", "Nanopolish tail start estimate", "Nanopolish tail end estimate (originally it is transcript_start in Nanopolish output)", "Nanopolish read rate", "Nanopolish estimation of tail length", "Nanopolish QC Tag", "Nanopolish estimation of read-specific translocation rate in units of samples per nucleotide calculated using the formula: 3012/read_rate", "The expected tail length"))
pander(col_names_df)
# re-arrange the factors in the data for proper display rna_wo_data$dataset <- fct_relevel(rna_wo_data$dataset, "100x", after = 6) rna_wo_data$barcode <- fct_relevel(rna_wo_data$barcode, "100", after = 6)
# define the function for computing standard error std_err <- function(x) sd(x, na.rm = TRUE)/sqrt(length(x)) # summarize the data and display a table summary_data <- rna_wo_data %>% group_by(dataset) %>% summarise(read_count = n(), mean_tf = mean(tail_length_tf, na.rm = TRUE), mean_np = mean(tail_length_np, na.rm = TRUE), median_tf = median(tail_length_tf, na.rm = TRUE), median_np = median(tail_length_np, na.rm = TRUE), std_dev_tf = sd(tail_length_tf, na.rm = TRUE), std_dev_np = sd(tail_length_np, na.rm = TRUE), std_err_tf = std_err(tail_length_tf), std_err_np = std_err(tail_length_np) ) summary_data %<>% mutate(cof_var_tf = std_dev_tf/mean_tf, cof_var_np = std_dev_np/mean_np) pander(summary_data)
# make long data long_data_tf_np_tail_length <- rna_wo_data %>% select(dataset, barcode, tail_length_tf, tail_length_np) %>% gather(key = 'tool', value = 'tail_length', tail_length_tf, tail_length_np) %>% mutate(tail_length = ifelse(tail_length >= 300, 300, tail_length)) %>% mutate(tool = fct_relevel(tool, c('tail_length_tf', 'tail_length_np'))) %>% mutate(dataset = fct_relevel(dataset, c('10x', '15x', '30x', '60x', '60xN', '80x', '100x'))) p <- ggplot(long_data_tf_np_tail_length, aes(x = dataset, y = tail_length, color = tool, fill = tool)) + geom_two_sided_flat_violin(position = position_nudge(x = 0, y = 0), alpha = .5) + facet_grid(~dataset, scales = 'free') + geom_hline(aes(yintercept = as.numeric(as.character(barcode))))
lengend_name <- 'Tool' legend_labels <- c('tailfindr', 'Nanopolish') p <- p + theme(panel.background = element_blank(), panel.grid.minor = ggplot2::element_line(colour="grey90", size=0.5), panel.grid.major = ggplot2::element_line(colour="grey90", size=0.5), panel.grid.major.x = element_blank(), axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank(), legend.direction = "horizontal", legend.position = "top") + scale_color_brewer(palette = "Paired", name = lengend_name, labels = legend_labels) + scale_fill_brewer(palette = "Paired", name = lengend_name, labels = legend_labels) + ggplot2::scale_y_continuous(minor_breaks = seq(0, 300, 25)) + ylab('Tail length (nt)') + labs(fill = 'Tool') p
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.