Nothing
#' Check treatment enrichment
#'
#' `r lifecycle::badge('deprecated')`
#' This function was deprecated due to its name changing to `calculate_treatment_enrichment()`.
#'
#' @return A bar plot displaying the percentage of all detect proteins and all significant proteins
#' that bind to the treatment. A Fisher's exact test is performed to calculate the significance of
#' the enrichment in significant proteins compared to all proteins. The result is reported as a
#' p-value. If \code{plot = FALSE} a contingency table in long format is returned.
#' @keywords internal
#' @export
treatment_enrichment <- function(...) {
# This function has been renamed and is therefore deprecated.
lifecycle::deprecate_warn("0.2.0",
"treatment_enrichment()",
"calculate_treatment_enrichment()",
details = "This function has been renamed."
)
calculate_treatment_enrichment(...)
}
#' Check treatment enrichment
#'
#' Check for an enrichment of proteins interacting with the treatment in significantly changing
#' proteins as compared to all proteins.
#'
#' @param data a data frame contains at least the input variables.
#' @param protein_id a character column in the \code{data} data frame that contains the protein
#' accession numbers.
#' @param is_significant a logical column in the \code{data} data frame that indicates if the
#' corresponding protein has a significantly changing peptide. The input data frame may contain
#' peptide level information with significance information. The function is able to extract protein
#' level information from this.
#' @param binds_treatment a logical column in the \code{data} data frame that indicates if the
#' corresponding protein binds to the treatment. This information can be obtained from different
#' databases, e.g. UniProt.
#' @param group optional, character column in the \code{data} data frame that contains information by
#' which the analysis should be grouped. The analysis will be performed separately for each of the
#' groups. This is most likely a column that labels separate comparisons of different conditions.
#' In protti the `assign_missingness()` function creates such a column automatically.
#' @param treatment_name a character value that indicates the treatment name. It will be included
#' in the plot title.
#' @param plot a logical value indicating whether the result should be plotted or returned as a
#' table.
#' @param fill_colours a character vector that specifies the fill colours of the plot.
#' @param fill_by_group a logical value that specifies if the bars in the plot should be filled by group
#' if the group argument is provided. Default is `FALSE`.
#' @param facet_n_col a numeric value that specifies the number of columns the facet plot should have if
#' a `group` column was provided.
#'
#' @return A bar plot displaying the percentage of all detected proteins and all significant proteins
#' that bind to the treatment. A Fisher's exact test is performed to calculate the significance of
#' the enrichment in significant proteins compared to all proteins. The result is reported as a
#' p-value. If \code{plot = FALSE} a contingency table in long format is returned.
#'
#' @import dplyr
#' @import ggplot2
#' @importFrom tidyr pivot_wider pivot_longer complete
#' @importFrom rlang .data as_name enquo ensym !!
#' @importFrom tibble column_to_rownames
#' @importFrom magrittr %>%
#' @importFrom purrr map2_dfr
#' @export
#'
#' @examples
#' # Create example data
#' data <- data.frame(
#' protein_id = c(paste0("protein", 1:50)),
#' significant = c(
#' rep(TRUE, 20),
#' rep(FALSE, 30)
#' ),
#' binds_treatment = c(
#' rep(TRUE, 10),
#' rep(FALSE, 10),
#' rep(TRUE, 5),
#' rep(FALSE, 25)
#' ),
#' group = c(
#' rep("A", 5),
#' rep("B", 15),
#' rep("A", 15),
#' rep("B", 15)
#' )
#' )
#'
#' # Plot treatment enrichment
#' calculate_treatment_enrichment(
#' data,
#' protein_id = protein_id,
#' is_significant = significant,
#' binds_treatment = binds_treatment,
#' treatment_name = "Rapamycin",
#' plot = TRUE
#' )
#'
#' # Plot treatment enrichment by group
#' calculate_treatment_enrichment(
#' data,
#' protein_id = protein_id,
#' group = group,
#' is_significant = significant,
#' binds_treatment = binds_treatment,
#' treatment_name = "Rapamycin",
#' plot = TRUE,
#' fill_by_group = TRUE
#' )
#'
#' # Calculate treatment enrichment
#' enrichment <- calculate_treatment_enrichment(
#' data,
#' protein_id = protein_id,
#' is_significant = significant,
#' binds_treatment = binds_treatment,
#' plot = FALSE
#' )
#'
#' enrichment
calculate_treatment_enrichment <- function(data,
protein_id,
is_significant,
binds_treatment,
group = NULL,
treatment_name,
plot = TRUE,
fill_colours = protti::protti_colours,
fill_by_group = FALSE,
facet_n_col = 2) {
# to avoid note about no global variable binding.
. <- NULL
group_missing <- missing(group)
# group by the "group" argument if provided
if (!group_missing) {
data <- data %>%
dplyr::ungroup() %>%
dplyr::distinct({{ protein_id }}, {{ is_significant }}, {{ binds_treatment }}, {{ group }}) %>%
dplyr::group_by({{ protein_id }}, {{ group }}) %>%
dplyr::mutate({{ is_significant }} := ifelse(sum({{ is_significant }}, na.rm = TRUE) > 0,
TRUE,
FALSE
)) %>%
dplyr::ungroup() %>%
dplyr::distinct()
# Create contingency table
cont_table <- data %>%
dplyr::group_by({{ binds_treatment }}, {{ is_significant }}, {{ group }}) %>%
dplyr::summarize(n = dplyr::n_distinct(!!rlang::ensym(protein_id)), .groups = "drop") %>%
dplyr::group_by({{ group }}) %>%
tidyr::complete({{ binds_treatment }}, {{ is_significant }}, fill = list(n = 0)) %>%
dplyr::ungroup()
fisher_test <- cont_table %>%
split(dplyr::pull(., {{ group }})) %>%
purrr::map2_dfr(
.y = names(.),
.f = ~ {
ftest <- .x %>%
dplyr::select(-{{ group }}) %>%
tidyr::pivot_wider(names_from = {{ is_significant }}, values_from = .data$n) %>%
tibble::column_to_rownames(var = rlang::as_name(rlang::enquo(binds_treatment))) %>%
as.matrix() %>%
stats::fisher.test()
data.frame(pval = ftest$p.value) %>%
dplyr::mutate({{ group }} := .y)
}
)
cont_table <- cont_table %>%
dplyr::left_join(fisher_test, by = rlang::as_name(rlang::enquo(group))) %>%
dplyr::arrange({{ group }})
} else {
data <- data %>%
dplyr::ungroup() %>%
dplyr::distinct({{ protein_id }}, {{ is_significant }}, {{ binds_treatment }}) %>%
dplyr::group_by({{ protein_id }}) %>%
dplyr::mutate({{ is_significant }} := ifelse(sum({{ is_significant }}, na.rm = TRUE) > 0,
TRUE,
FALSE
)) %>%
dplyr::ungroup() %>%
dplyr::distinct()
# Create contingency table
cont_table <- data %>%
dplyr::group_by({{ binds_treatment }}, {{ is_significant }}) %>%
dplyr::summarize(n = dplyr::n_distinct(!!rlang::ensym(protein_id)), .groups = "drop") %>%
tidyr::complete({{ binds_treatment }}, {{ is_significant }}, fill = list(n = 0))
fisher_test <- cont_table %>%
tidyr::pivot_wider(names_from = {{ is_significant }}, values_from = .data$n) %>%
tibble::column_to_rownames(var = rlang::as_name(rlang::enquo(binds_treatment))) %>%
as.matrix() %>%
stats::fisher.test()
cont_table <- cont_table %>%
dplyr::mutate(pval = fisher_test$p.value)
}
if (plot == FALSE) {
return(cont_table)
}
# Add p-value to group name for plot
# also group to correctly calculate the total number of proteins in the next step
if (!group_missing) {
cont_table <- cont_table %>%
dplyr::mutate(group_pval = paste0(
{{ group }}, " (p-value: ",
ifelse(.data$pval < 0.01,
formatC(.data$pval,
format = "e", digits = 1
),
round(.data$pval, digits = 2)
),
")"
)) %>%
dplyr::group_by({{ group }})
}
enrichment_plot <- cont_table %>%
dplyr::mutate(total = sum(.data$n)) %>%
dplyr::mutate(
name = dplyr::case_when(
{{ binds_treatment }} == FALSE &
{{ is_significant }} == FALSE ~ "non_sig_non_interactor",
{{ binds_treatment }} == FALSE &
{{ is_significant }} == TRUE ~ "sig_non_interactor",
{{ binds_treatment }} == TRUE &
{{ is_significant }} == FALSE ~ "non_sig_interactor",
{{ binds_treatment }} == TRUE &
{{ is_significant }} == TRUE ~ "sig_interactor"
)
) %>%
dplyr::select(-c({{ binds_treatment }}, {{ is_significant }})) %>%
tidyr::pivot_wider(names_from = .data$name, values_from = .data$n) %>%
dplyr::mutate(total_interactor = .data$non_sig_interactor + .data$sig_interactor) %>%
dplyr::mutate(total_sig = .data$sig_non_interactor + .data$sig_interactor) %>%
dplyr::mutate(
`All detected proteins` = .data$total_interactor / .data$total * 100,
`Significant proteins` = .data$sig_interactor / .data$total_sig * 100
) %>%
tidyr::pivot_longer(
cols = c(.data$`All detected proteins`, .data$`Significant proteins`),
names_to = "name",
values_to = "value"
) %>%
dplyr::mutate(count = ifelse(.data$name == "All detected proteins",
.data$total_interactor,
.data$sig_interactor
)) %>%
{
if (fill_by_group & !group_missing) {
ggplot2::ggplot(., ggplot2::aes(.data$name, .data$value, fill = .data$group)) +
ggplot2::geom_col(col = "black", size = 1.2) +
scale_fill_manual(values = fill_colours)
} else {
ggplot2::ggplot(., ggplot2::aes(.data$name, .data$value)) +
ggplot2::geom_col(fill = fill_colours[1], col = "black", size = 1.2)
}
} +
{
if (!group_missing) {
ggplot2::facet_wrap(~ .data$group_pval, ncol = facet_n_col)
}
} +
{
if (!group_missing) {
ggplot2::labs(
title = paste0(
"Proteins interacting with ",
treatment_name
),
x = "",
y = paste("Interact with", treatment_name, "[%]")
)
} else {
ggplot2::labs(
title = paste0(
"Proteins interacting with ",
treatment_name,
" (p-value: ",
ifelse(cont_table$pval < 0.01,
formatC(cont_table$pval,
format = "e", digits = 1
),
round(cont_table$pval, digits = 2)
),
")"
),
x = "",
y = paste("Interact with", treatment_name, "[%]")
)
}
} +
ggplot2::geom_text(aes(label = paste("n =", count)),
position = position_stack(vjust = 0.5),
size = 8
) +
ggplot2::theme_bw() +
ggplot2::theme(
plot.title = ggplot2::element_text(size = 20),
axis.text.x = ggplot2::element_text(size = 15),
axis.text.y = ggplot2::element_text(size = 15),
axis.title.y = ggplot2::element_text(size = 15),
strip.text = ggplot2::element_text(size = 15),
legend.title = ggplot2::element_text(size = 15),
legend.text = ggplot2::element_text(size = 13),
strip.background = element_blank()
)
if (plot == TRUE) {
return(enrichment_plot)
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.