#############################
# These functions currently generate a number of deprecation warnings related
# to changes in the usage of ".data" usage in data masking and tidy selection
# starting in tidyselect 1.2.0.
# These deprecation warnings will be addressed in future versions
#############################
#' Correlate cell line response with gene mutations
#'
#' For each of the 19.537 genes present in the
#' \code{\link{data_mutations}} data set, perform a Mann-Whitney test
#' \code{\link[stats]{wilcox.test}} to determine whether cell lines containing a mutant version of the gene respond
#' differently than cell lines with the wild-type version of the gene. The Mann-Whitney
#' test is chosen, rather than the Student's t-test, because it does not
#' assume that the response values in the two groups are normally distributed.
#'
#' @param data A tibble.
#' @param response Column containing response data
#' @param ids Column containing DepMap IDs of cell lines
#' @param fdr False discovery rate. Number between 0 and 1 representing the
#' likelihood that a gene predicted to be significant is actually a false-positive.
#'
#' @return A tibble with 19,537 rows and 4 columns. Each row contains the
#' correlation values for a single gene.
#' \describe{
#' \item{gene}{Hugo gene symbol}
#' \item{effect}{The relative increase or decrease in cell line response
#' associated with mutation of the gene. Calculated as log2( mutant response / wildtype response )}
#' \item{p.value}{Probability that the null hypothesis is true (there is
#' no relationship between gene mutation and cell line response)}
#' \item{significant}{Whether the correlation is deemed significant after
#' multiple hypothesis correction with the given false discovery rate}
#' }
#'
#' @export
#'
#' @examples
#' # Setup example data set
#' df <- tibble::tibble(
#' CellLine = c("LS513", "253-J", "NIH:OVCAR-3"),
#' DepMapID = c("ACH-000007", "ACH-000011", "ACH-000001"),
#' logIC50 = c(-2.8, -4.04, -6.23)
#' )
#'
#' cor_mutations(
#' data = df,
#' response = "logIC50",
#' ids = "DepMapID",
#' fdr = 0.01
#' )
cor_mutations <- function(data, response, ids = "depmap_id", fdr = 0.05) {
data %>%
# Join with mutation data set
dplyr::inner_join(
cellpanelr::data_mutations(),
# Note order needs to be flipped in setNames (yvar, xvar)
by = stats::setNames("depmap_id", ids),
suffix = c("", ".depmap")
) %>%
dplyr::group_by(.data$gene) %>%
dplyr::summarise(
# effect is log2(mutant/wildtype)
effect = log2(mean(.data[[response]][.data$mutant == TRUE]) /
mean(.data[[response]][.data$mutant == FALSE])),
p.value = ifelse(
# For genes with no mutant or wild-type in data set, let p.value = NA
dplyr::n_distinct(.data$mutant) == 2,
# Use Mann-Whitney (Wilcoxon) U test b/c it doesn't
# assume normal distribution of data
stats::wilcox.test(.data[[response]] ~ .data$mutant)[["p.value"]],
NA
),
) %>%
dplyr::arrange(.data$p.value) %>%
# Adjust p-values using Benjamini-Hochberg method (https://www2.rockefeller.edu/qbio_sc/resources/l04.pdf)
dplyr::mutate(adj.p = stats::p.adjust(.data$p.value, method = "BH")) %>%
dplyr::mutate(significant = .data$adj.p <= fdr)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.