#' Calculate AUC for marker list
#'
#' Calculate the area under the curve (AUC) for each gene set for each
#' observation using the AUCell package
#'
#' @param object Seurat object
#' @param features A list of vectors of features for expression programs;
#' each entry should be a vector of feature names
#' @param name Name of the feature set (e.g. celltype) used as assay name
#' @param assay Name of assay to use
#' @param slot Name of slot to use
#' @param force.recalc Wheter to force recalculation
#' @returns SeuratObject with AUC added as 'assay'
#'
#' @export
#'
AddAUC <- function(
object=NULL,
features=NULL,
name=NULL,
assay = NULL,
slot = "data",
force.recalc = FALSE
) {
stopifnot(
!is.null(object)
)
# Select assay
if (is.null(assay)) {
assay <- Seurat::DefaultAssay(object)
} else if (!assay %in% names(object@assays)) {
stop("Assay provided does not exists.")
}
# Check features
if (is.null(features)) {
stop("No features supplied. Aborting.")
} else if (class(features) != "list") {
stop("Please specify a list of features.")
} else if (!all(unlist(features) %in% rownames(object[[assay]]))) {
index <- rownames(object[[assay]])
frm <- list()
for (i in names(features)) {
frm[[i]] <- features[[i]][!features[[i]] %in% index]
features[[i]] <- features[[i]][features[[i]] %in% index]
}
frm <- stringr::str_c(unlist(frm), collapse = ", ")
warning(paste("Some features do not exist in assay", assay, "and will be removed:", frm))
}
# Check assay name
ind <- c(names(object), names(object@meta.data))
if (is.null(name)) {
stop("Please specify a name for the assay to store AUCs")
} else if (name %in% names(object@assays) & force.recalc == FALSE) {
warning(paste("Assay", name, " already exists. To replace set force.recalc = TRUE"))
run_aucell <- FALSE
} else if (name %in% names(object@assays) & force.recalc == TRUE) {
warning(paste("Assay", name, " exists but will be overwritten."))
run_aucell <- TRUE
} else if (name %in% ind) {
stop(paste(name, "already present in object. Please change name."))
} else {
run_aucell <- TRUE
}
# Run AUCell
if (run_aucell) {
ranks <- AUCell::AUCell_buildRankings(
exprMat = slot(object@assays[[assay]], slot)
)
aucs <- AUCell::AUCell_calcAUC(features, ranks) # 400 - 7000
aucs <- as.data.frame(t(aucs@assays@data$AUC))
object[[name]] <- Seurat::CreateAssayObject(data = t(aucs))
}
return(object)
}
#' Create heatmap of gene expression
#'
#' @param object Seurat object
#' @param features Character of genes to plot
#' @param coldata Annotation for columns
#' @param rowdata Annotation for rows
#' @param rowdata_label Name of the row annotation
#' @param order_rows Whether to order rows by group
#' @param remove_duplicates Whether to remove duplicated rows
#' @param collapse_replicates Whether to average over groups of cells
#' @param coldata_group_max Maxiumum number of categories for each
#' column annotation
#' @param cells Cells to plot
#' @param heatmap_colors Name of color scale to use (default: "RdBu")
#' @param heatmap_color_dir Direction of heatmap color scale
#' @param annotation_colors List of color assignments for annotations
#' @param scale Whether to scale expression data
#' @param assay Which assay to take expression data from
#' @param slot Which slot of the assay to use ("data", "counts")
#' @param limits Limits of the color scale
#' @param title Plot title
#' @param show_rownames Whether to show rownames
#' @param show_colnames Whether to show colnames
#' @param gaps_row Numeric vector specifying row gaps
#' @param ... Other parameters passed to pheatmap
#' @export
heatmap_expression <- function(
object = NULL,
features = NULL,
coldata = NULL,
rowdata = NULL,
rowdata_label = "group",
order_rows = FALSE,
remove_duplicates = FALSE,
collapse_replicates = TRUE,
coldata_group_max = 100,
cells = colnames(object),
heatmap_colors = "RdBu",
heatmap_color_dir = -1,
annotation_colors = NULL,
scale = TRUE,
assay = NULL,
slot = "data",
limits = c(-2, 2),
title = "Differentially expressed genes",
show_rownames = FALSE,
show_colnames = FALSE,
gaps_row = NULL,
...
) {
stopifnot(
!is.null(object),
cells %in% colnames(object),
is.logical(order_rows),
is.logical(remove_duplicates),
is.logical(collapse_replicates),
is.logical(scale)
)
if (is.null(assay)) {
assay <- Seurat::DefaultAssay(object)
}
if (is.null(features)) {
ind <- rownames(slot(ds[[assay]], slot))
if (length(ind) <= 250) {
warning("No markers specified. Defaulting to whole assay.")
features <- ind
} else {
stop("No features specified. Aborting.")
}
}
# Subset by cells ------------------------------------------------------------
object <- subset(object, cells = cells)
# Column annotations ---------------------------------------------------------
if (is.null(coldata)) {
message("Coldata not specified. Defaulting to Idents")
coldata <- factor(Seurat::Idents(object))
cann <- data.frame(
row.names = colnames(object),
Idents = coldata
)
} else if (all(coldata %in% names(object@meta.data))) {
cann <- data.frame(row.names = colnames(object))
for (i in coldata) {
x <- object@meta.data[[i]]
x[is.na(x)] <- "NA"
if (length(unique(x)) <= coldata_group_max) {
cann[[i]] <- factor(x)
} else {
cann[[i]] <- x
}
}
} else {
stop("Coldata has not been specified correctly. Exiting.")
}
# Row data & annotations -----------------------------------------------------
# Detect features in assay
index <- which(features %in% rownames(slot(object[[assay]], slot)))
if (length(index) == 0) {
stop("No matching features specified.")
} else if (length(index) < length(features)) {
leftout <- features[!features %in% features[index]]
warning(paste("The following features were not found in the assay:",
paste(leftout, collapse = ", "), ". Will be removed..."))
}
if (is.null(rowdata)) {
# No rowdata specified
rann <- NA
features <- features[index]
} else if (class(rowdata) %in% c("character", "factor") &
length(rowdata)==length(features)) {
# One vector of labels
features <- features[index]
rowdata <- rowdata[index]
# Vector levels
if (class(rowdata) == "character") {
rowdata <- factor(rowdata, unique(rowdata))
} else {
rowdata <- factor(rowdata)
}
# Create annotation data.frame
if (remove_duplicates) {
ind <- which(!duplicated(features))
features <- features[ind]
rann <- data.frame(row.names = features, label = rowdata[ind])
names(rann) <- rowdata_label
} else {
rann <- data.frame(row.names = make.unique(features, sep = "-"),
label = rowdata)
names(rann) <- rowdata_label
rann$duplicated <- factor(duplicated(features))
}
} else if (class(rowdata) == "data.frame" &
nrow(rowdata) == length(features)) {
# Data frame with multiple annotations
stop("Data.frame for row annotations is not supported yet!")
} else {
warning(
"Row annotations do not fit the supplied features and will be ignored..."
)
rann <- NA
}
# Summarize count data -------------------------------------------------------
# Create groups
ind <- unlist(lapply(lapply(cann, unique), length))
cat_in <- names(cann)[ind <= coldata_group_max]
cat_out <- names(cann)[ind > coldata_group_max]
lvls <- row.names(unique_combinations(subset(cann, select=cat_in)))
groups <- combinations(subset(cann, select = cat_in))
lvls <- lvls[lvls %in% groups]
groups <- factor(groups, lvls)
# Summarize across replicates (e.g. cells)
if (collapse_replicates) {
mat <- summarize_groups(slot(object[[assay]], slot)[features, ],
groups
)
smry <- unique_combinations(subset(cann, select=cat_in))
cann$cells <- 1
temp <- data.frame(group = groups, cells = 1)
for (i in cat_out) {
temp$num <- cann[[i]]
x <- dplyr::summarise(dplyr::group_by(temp, group), num=mean(num))
smry[[i]] <- x$num[match(rownames(smry), x$group)]
}
x <- dplyr::summarise(dplyr::group_by(temp, group), cells=sum(cells))
smry[["log10_cells"]] <- log10(x$cells[match(rownames(smry), x$group)])
ind <- rownames(smry) %in% colnames(mat)
smry <- subset(smry, ind)
cann <- smry
} else {
mat <- as.matrix(
slot(object[[assay]], slot)[features, order(groups)]
)
}
rownames(mat) <- make.unique(rownames(mat), sep="-")
# Color encoding -------------------------------------------------------------
# Scaling
if (scale) {
mat <- t(scale(t(mat)))
}
# Annotation colors
ann_colors <- list()
# Column annotations
for (i in names(cann)) {
x <- unique(cann[[i]])
if (length(x) < 3) {
ann_colors[[i]] <- c("white", "black", "grey")[1:length(x)]
names(ann_colors[[i]]) <- levels(cann[[i]])
} else if (length(x) <= 9) {
ann_colors[[i]] <- RColorBrewer::brewer.pal(length(x), "Set1")
names(ann_colors[[i]]) <- levels(cann[[i]])
} else if (length(x) <= 12) {
ann_colors[[i]] <- RColorBrewer::brewer.pal(length(x), "Paired")
names(ann_colors[[i]]) <- levels(cann[[i]])
} else if (length(x) <= coldata_group_max) {
ann_colors[[i]] <- NULL
} else {
ann_colors[[i]] <- RColorBrewer::brewer.pal(4, "Greens")
}
}
# Row annotations
for (i in names(rann)) {
x <- unique(rann[[i]])
if (length(x) < 3) {
ann_colors[[i]] <- c("white", "black", "grey")[1:length(x)]
names(ann_colors[[i]]) <- levels(rann[[i]])
} else if (length(x) <= 9) {
ann_colors[[i]] <- RColorBrewer::brewer.pal(length(x), "Set2")
names(ann_colors[[i]]) <- levels(rann[[i]])
} else if (length(x) <= 12) {
ann_colors[[i]] <- RColorBrewer::brewer.pal(length(x), "Paired")
names(ann_colors[[i]]) <- levels(rann[[i]])
} else if (length(x) <= coldata_group_max) {
ann_colors[[i]] <- NULL
} else {
ann_colors[[i]] <- RColorBrewer::brewer.pal(4, "Greens")
}
}
# Add manually specified colors
if (is.list(annotation_colors)) {
for (i in names(annotation_colors)) {
if (i %in% names(ann_colors) &
all(names(annotation_colors[[i]]) %in% names(ann_colors[[i]]))) {
ann_colors[[i]] <- annotation_colors[[i]]
}
}
}
# Breaks
breaks <- seq(from = min(limits), to = max(limits), length.out = 100)
# Color scale
rcbrew <- RColorBrewer::brewer.pal.info
if (class(heatmap_colors) != "character") {
stop("Heatmap colors must be specified as a character vector.")
}
if (length(heatmap_colors) > 1) {
warning("Multiple heatmap colors specified.
Trying to generate colorRampPalette.")
} else if (length(heatmap_colors) < 1) {
warning("No heatmap colors have been specified. Reverting to default.")
heatmap_colors <- "RdBu"
} else if (heatmap_colors %in% rownames(rcbrew)) {
print("Selecting color values from RColorBrewer...")
heatmap_colors <- RColorBrewer::brewer.pal(n = 9, name = heatmap_colors)
}
if (heatmap_color_dir == -1) {
heatmap_colors <- rev(heatmap_colors)
}
color_scale <- colorRampPalette(heatmap_colors)(length(breaks))
# Order ----------------------------------------------------------------------
# Rows
if (order_rows) {
index <- order(rann[rowdata_label])
mat <- mat[features[index],]
}
# Plot -----------------------------------------------------------------------
plot <- pheatmap::pheatmap(
mat,
cluster_cols = FALSE,
cluster_rows = FALSE,
breaks = breaks,
annotation_col = cann,
annotation_row = rann,
annotation_colors = ann_colors,
show_colnames = show_colnames,
show_rownames = show_rownames,
color = color_scale,
gaps_col = head(as.numeric(cumsum(table(cann[,1]))), -1),
gaps_row = gaps_row,
main = title,
silent = TRUE,
...
)
return(plot)
}
#' Create violin plot of gene expression
#'
#' @param object Seurat object
#' @param features Vector of features, defaults to whole assay
#' @param max_features Maximum number of features to plot
#' @param assay Name of assay to use
#' @param slot Name of slot to use
#' @export
violin_expression <- function(object=NULL, features=NULL, coldata = NULL,
max.features=50,
assay=NULL, slot="data",
cells=colnames(object)) {
stopifnot(
class(object) == "Seurat",
all(cells %in% colnames(object))
)
if (is.null(assay)) {
assay <- Seurat::DefaultAssay(object)
}
if (is.null(features)) {
warning("No features specified. Defaulting to whole assay.")
features <-rownames(object@assays[[assay]])
if (length(features) > max.features) {
stop(paste("To many features in assay:", assay))
}
}
# Fetch data -----------------------------------------------------------------
mat <- slot(object[[assay]], slot)[features, ]
# Subset by cells ------------------------------------------------------------
mat <- mat[, cells]
# Convert to tidy format -----------------------------------------------------
tidyr::gather(mat)
# Plot -----------------------------------------------------------------------
plot <- ggplot2::ggplot()
return(plot)
}
#' Create dot plot of gene expression
#'
#' @param object Seurat object
#' @param features Vector of features
#' @param max_features Maximum number of features to plot
#' @param assay Name of assay to use
#' @param slot Name of slot to use
#' @export
violin_expression <- function(object=NULL, features=NULL, coldata = NULL,
max.features=50,
assay=NULL, slot="data",
cells=colnames(object)) {
stopifnot(
class(object) == "Seurat",
all(cells %in% colnames(object))
)
if (is.null(assay)) {
assay <- Seurat::DefaultAssay(object)
}
if (is.null(features)) {
warning("No features specified. Defaulting to whole assay.")
features <-rownames(object@assays[[assay]])
if (length(features) > max.features) {
stop(paste("To many features in assay:", assay))
}
}
# Subset by cells ------------------------------------------------------------
object <- subset(object, cells = cells)
# Fetch data -----------------------------------------------------------------
mat <- slot(object[[assay]], slot)[features, ]
# Convert to tidy format -----------------------------------------------------
tidyr::gather(mat)
# Plot -----------------------------------------------------------------------
plot <- ggplot2::ggplot()
return(plot)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.