#' Find marker genes for a given set of cells, and filter out highly expressed genes from cells in a reference Seurat object.
#'
#' @name FindFilteredMarkers
#' @author Jack Leary
#' @description Used to filter candidate marker genes for a subcluster Seurat object. Prevents markers from being chosen if they are highly expressed in other celltypes.
#' @import magrittr
#' @importFrom Seurat Idents DefaultAssay FindAllMarkers
#' @importFrom dplyr filter mutate group_by summarise across ungroup select pull
#' @importFrom stats quantile
#' @param obj.1 The object containing clusters that you want marker genes for. Defaults to NULL.
#' @param obj.2 The object containing reference data against which we'll filter the markers in \code{obj.1}. Defaults to NULL.
#' @param ident.1 The group identifier for \code{obj.1}. Defaults to "seurat_clusters".
#' @param ident.2 The group identifier for \code{obj.2}. Defaults to "seurat_clusters".
#' @param de.method The differential expression method used in \code{\link[Seurat]{FindAllMarkers}}. Defaults to "wilcox".
#' @param fdr.cutoff The cutoff used to remove DE genes with non-significant adjusted \emph{p}-values. Defaults to 0.05.
#' @param log2fc.cutoff The log2FC cutoff used, in part, to determine whether a gene is differentially expressed. Defaults to 0.25.
#' @param perc.cutoff The percentile cutoff used to find highly expressed genes in other cluster. Defaults to 0.9.
#' @param extra.cell.filter An optional list of extra cells to filter out of \code{obj.2} other than the cells in \code{obj.1}. Defaults to NULL.
#' @return A data.frame of markers for each cluster in \code{obj.1} filtered by genes considered highly expressed in clusters in \code{obj.2}.
#' @seealso \code{\link{FindSpecificMarkers}}
#' @seealso \code{\link[Seurat]{FindAllMarkers}}
#' @export
#' @examples
#' \dontrun{
#' FindFilteredMarkers(obj.1 = subclust_obj,
#' obj.2 = full_obj,
#' ident.1 = "label",
#' ident.2 = "seurat_clusters")
#' FindFilteredMarkers(obj.1 = subclust_obj,
#' obj.2 = full_obj,
#' de.method = "t",
#' fdr.cutoff = 0.01,
#' log2fc.cutoff = 1,
#' perc.cutoff = 0.95)
#' }
FindFilteredMarkers <- function(obj.1 = NULL,
obj.2 = NULL,
ident.1 = "seurat_clusters",
ident.2 = "seurat_clusters",
de.method = "wilcox",
fdr.cutoff = 0.05,
log2fc.cutoff = 0.25,
perc.cutoff = 0.9,
extra.cell.filter = NULL) {
# check inputs
if (is.null(obj.1) || is.null(obj.2)) { stop("You must provide two non-NULL Seurat objects to FindFilteredMarkers().") }
if (!ident.1 %in% colnames(obj.1@meta.data)) { stop("ident.1 not in obj.1 metadata.") }
if (!ident.2 %in% colnames(obj.2@meta.data)) { stop("ident.2 not in obj.2 metadata.") }
Seurat::Idents(obj.1) <- ident.1
Seurat::Idents(obj.2) <- ident.2
Seurat::DefaultAssay(obj.1) <- "SCT"
Seurat::DefaultAssay(obj.2) <- "SCT"
markers <- Seurat::FindAllMarkers(obj.1,
logfc.threshold = log2fc.cutoff,
test.use = de.method,
only.pos = TRUE,
verbose = FALSE,
random.seed = 312) %>%
dplyr::filter(p_val_adj < fdr.cutoff)
# filter obj.1 cells from obj.2
obj.2 <- obj.2[, !rownames(obj.2@meta.data) %in% rownames(obj.1@meta.data)]
if (!is.null(extra.cell.filter)) {
obj.2 <- obj.2[, !rownames(obj.2@meta.data) %in% extra.cell.filter]
}
gene_means_by_clust <- t(obj.2@assays$SCT@data) %>%
as.data.frame() %>%
dplyr::mutate(label_fine = obj.2[[ident.2]][, 1]) %>%
dplyr::group_by(label_fine) %>%
dplyr::summarise(dplyr::across(where(is.numeric), mean)) %>%
dplyr::ungroup()
high_exp_genes <- c()
loop_celltypes <- unique(obj.2[[ident.2]][, 1])
for (i in loop_celltypes) {
top_exp_genes <- gene_means_by_clust %>%
dplyr::filter(label_fine == i) %>%
dplyr::select(-label_fine) %>%
t() %>%
as.data.frame() %>%
dplyr::filter(V1 > stats::quantile(V1, perc.cutoff)) %>%
dplyr::mutate(gene = rownames(.)) %>%
dplyr::pull(gene)
high_exp_genes <- c(high_exp_genes, top_exp_genes)
}
high_exp_genes <- unique(high_exp_genes)
markers %<>% dplyr::filter(!gene %in% high_exp_genes)
return(markers)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.