earlycross: Extensions to Seurat

Documented in ComputeClustersByTrt

#' Given a Seurat object and treatment variable, compare
#' clusters between levels of the treatment
#'
#' @param obj Seurat object
#' @param trt_var Treatment variable
#' @param rep_var Replicate/Individual ID variable
#' @param ci Whether to plot binomial confidence intervals (Jeffreys)
#' @param group_by Name of grouping variable (default is Idents)
#'
#' @importFrom magrittr '%>%'
#' @examples
#' pbmc_small$trt <- sample(c('drug', 'control'), ncol(pbmc_small), replace=TRUE)
#' pbmc_small$genotype <- sample(c('1', '2', '3'), ncol(pbmc_small), replace=TRUE)
#' head(ComputeClustersByTrt(pbmc_small, trt, genotype))
#'
#' @export
ComputeClustersByTrt <- function(obj, trt_var, rep_var, group_by = NULL,
                                 ci = TRUE, ci_alpha = 0.05) {
    # For curly curly {{ syntax see
    # https://www.tidyverse.org/blog/2019/06/rlang-0-4-0/
    meta <- obj@meta.data %>%
        as.data.frame() %>%
        tibble::rownames_to_column("cell")
    if (is.null(group_by)) {
        meta$ident <- Idents(obj)
    } else {
        meta$ident <- meta[[group_by]]
    }

    grp_dat <- dplyr::group_by(meta, {
        {
            trt_var
        }
    }, {
        {
            rep_var
        }
    }) %>%
        dplyr::mutate(N_tot = dplyr::n()) %>%
        dplyr::ungroup() %>%
        dplyr::group_by({
            {
                trt_var
            }
        }, {
            {
                rep_var
            }
        }, ident, N_tot)
    grp_dat <- grp_dat %>%
        dplyr::summarize(N = dplyr::n(), .groups = 'keep') %>%
        dplyr::mutate(frac = N/N_tot)

    # Put binomial confidence intervals around the cell abundance estimates Use
    # Jeffreys interval -- posterior dist'n is Beta(x + 1/2, n – x + 1/2)
    grp_stats <- dplyr::mutate(grp_dat, cells_per_thousand = frac * 1000) %>%
        dplyr::mutate(trt_var = {{ trt_var }})
    if (ci) {
        grp_stats <- dplyr::mutate(grp_stats,
            lower = qbeta(ci_alpha/2, N + 1/2, N_tot - N + 1/2),
            upper = qbeta(1 - ci_alpha/2, N + 1/2, N_tot - N + 1/2),
            lower_per_thousand = lower * 1000,
            upper_per_thousand = upper * 1000)
    }
    grp_stats
}