Nothing
#' Protein coverage distribution
#'
#' Plots the distribution of protein coverages in a histogram.
#'
#' @param data a data frame that contains at least the input variables.
#' @param protein_identifier a character column in the \code{data} data frame that contains protein
#' identifiers.
#' @param coverage a numeric column in the \code{data} data frame that contains protein coverage
#' in percent. This information can be obtained using the \code{\link{sequence_coverage}} function.
#' @param sample optional, a character column in the \code{data} data frame that contains sample names.
#' Please only provide this argument if you want to facet the distribution plot by sample
#' otherwise do not provide this argument.
#' @param interactive a logical value that specifies whether the plot should be interactive
#' (default is FALSE).
#'
#' @return A protein coverage histogram with 5 percent binwidth. The vertical dotted line
#' indicates the median.
#' @import dplyr
#' @import ggplot2
#' @importFrom tidyr drop_na
#' @importFrom magrittr %>%
#' @importFrom plotly ggplotly
#' @importFrom stringr str_sort
#' @export
#'
#' @examples
#' set.seed(123) # Makes example reproducible
#'
#' # Create example data
#' data <- create_synthetic_data(
#' n_proteins = 100,
#' frac_change = 0.05,
#' n_replicates = 3,
#' n_conditions = 2,
#' method = "effect_random"
#' )
#'
#' # Plot sequence coverage
#' qc_sequence_coverage(
#' data = data,
#' protein_identifier = protein,
#' coverage = coverage
#' )
#' @seealso \code{\link{sequence_coverage}}
qc_sequence_coverage <- function(data,
protein_identifier,
coverage,
sample = NULL,
interactive = FALSE) {
result <- data %>%
dplyr::distinct({{ protein_identifier }}, {{ coverage }}, {{ sample }}) %>%
tidyr::drop_na({{ coverage }})
if (!missing(sample)) {
result <- result %>%
dplyr::mutate({{ sample }} := factor({{ sample }},
levels = unique(stringr::str_sort({{ sample }}, numeric = TRUE))
))
}
plot <- result %>%
ggplot2::ggplot(ggplot2::aes({{ coverage }})) +
ggplot2::geom_histogram(
binwidth = 5,
col = "black",
fill = "#5680C1",
boundary = 0,
size = 1
) +
ggplot2::geom_vline(xintercept = stats::median(dplyr::pull(result, {{ coverage }}), na.rm = TRUE), linetype = "dashed") +
ggplot2::labs(
title = "Protein coverage distribution",
x = "Coverage [%]",
y = "Number of proteins"
) +
ggplot2::scale_x_continuous(breaks = seq(from = 0, to = 100, by = 10)) +
{
if (!missing(sample)) ggplot2::facet_wrap(rlang::new_formula(NULL, rlang::enquo(sample)), scales = "free", ncol = 4)
} +
ggplot2::theme_bw() +
theme(
plot.title = ggplot2::element_text(size = 20),
axis.title.x = ggplot2::element_text(size = 15),
axis.text.y = ggplot2::element_text(size = 15),
axis.text.x = ggplot2::element_text(size = 12),
axis.title.y = ggplot2::element_text(size = 15),
strip.text = ggplot2::element_text(size = 15),
panel.border = ggplot2::element_rect(fill = NA),
strip.background = element_rect(fill = "white")
)
if (interactive == FALSE) {
return(plot)
}
plotly::ggplotly(plot)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.