#' GeneSummary
#'
#' @param data seurat object
#' @param identity column name in metadata describing required sample identity
#' @param split.by column name in metadat describing required annotation/classification identity
#' @param cells cells to use, defaults to all
#' @param do.norm option to run normalisation on data if not already done, set TRUE if required
#' @param features features to use, defaults to all
#' @param group.by secondary grouping identity, default is NULL
#'
#' @export
#'
#' @examples
#' output <- GeneSummary(organoid, features = c("GAPDH", "SIX2", "JAG1", "EPCAM", "NPHS1"))
GeneSummary <-function(data,
identity = "orig.ident",
split.by = "DKCC",
#idents = NULL,
features = rownames(data),
cells = colnames(data),
do.norm = T,
group.by = NULL
){
# gene proportion information
Idents(data) <- identity
df <- as.data.frame(table(data@meta.data[[paste0(identity)]],
data@meta.data[[paste0(split.by)]]))
id <- as.data.frame(table(data[[paste0(identity)]]))
colnames(id) <- c("Identity", "CellTotal")
colnames(df) <- c("Identity", "Component", "Cells")
df <- left_join(df, id, "Identity")
df$Pct <- round((df$Cells / df$CellTotal * 100), digits = 2)
DefaultAssay(data) <- "RNA"
# gene expression information
#cells <- unlist(x = CellsByIdentities(object = data, idents = NULL))
if (do.norm==T){data <- NormalizeData(data)}
data.features <- FetchData(object = data, vars = features,
cells = cells, slot = "data")
data.features$id <- if (is.null(x = group.by)) {
Idents(object = data)[cells, drop = TRUE]
} else {
data[[group.by, drop = TRUE]][cells, drop = TRUE]
}
if (!is.factor(x = data.features$id)) {
data.features$id <- factor(x = data.features$id)
}
data.features$id <- as.vector(x = data.features$id) #possibly counter productive to previous call but leave it in anyway
splits <- data[[split.by, drop = TRUE]][cells, drop = TRUE]
# rename id to be "sample___identity"
data.features$id <- paste(data.features$id, splits,
sep = "___")
# this function required for next section
PercentAbove <- function(x, threshold) {
return(length(x = x[x > threshold]) / length(x = x))
}
data.plot <- lapply(X = unique(x = data.features$id), FUN = function(ident) {
data.use <- data.features[data.features$id == ident,
1:(ncol(x = data.features) - 1), drop = FALSE]
avg.exp <- apply(X = data.use, MARGIN = 2, FUN = function(x) {
return(mean(x = expm1(x = x)))
})
pct.exp <- apply(X = data.use, MARGIN = 2, FUN = PercentAbove,
threshold = 0)
cell.pct <- apply(X = data.use, MARGIN = 2, FUN = length)
return(list(avg.exp = avg.exp, pct.exp = pct.exp, cell.pct = cell.pct))
})
names(x = data.plot) <- unique(x = data.features$id)
data.plot <- lapply(X = names(x = data.plot), FUN = function(x) {
data.use <- as.data.frame(x = data.plot[[x]])
data.use$features.plot <- rownames(x = data.use)
data.use$id <- x
return(data.use)
})
data.plot <- do.call(what = "rbind", args = data.plot)
if (length(x = unique(x = data.plot$id)) < 3) {
data.transform <- ""
warning("Only less than three identities present, the expression values will be not scaled",
call. = FALSE, immediate. = TRUE)
}
avg.exp.log <- sapply(X = unique(x = data.plot$features.plot),
FUN = function(x) {
data.use <- data.plot[data.plot$features.plot ==
x, "avg.exp"]
data.use <- log1p(x = data.use)
return(data.use)
})
avg.exp.scaled <- sapply(X = unique(x = data.plot$features.plot),
FUN = function(x) {
data.use <- data.plot[data.plot$features.plot ==
x, "avg.exp"]
data.use <- scale(x = data.use)
return(data.use)
})
avg.exp.scaled <- as.vector(x = t(x = avg.exp.scaled))
avg.exp.log <- as.vector(x = t(x = avg.exp.log))
data.plot$avg.exp.scaled <- avg.exp.scaled
data.plot$avg.exp.log <- avg.exp.log
data.plot$features.plot <- factor(x = data.plot$features.plot,
levels = features)
data.plot$pct.exp <- data.plot$pct.exp * 100
if (!is.null(split.by)){
sample.id <- stringr::str_split(data.plot$id, pattern = "___", simplify = T)
data.plot$id <- sample.id[,1]
data.plot$sample <- sample.id[,2]
}
#if (is.null(levels(data@meta.data[[split.by]]))){
# data.plot$sample <- factor(data.plot$sample, levels = as.character(unique(data@meta.data[[split.by]])))
#} else {
# data.plot$sample <- factor(data.plot$sample, levels = levels(data@meta.data[[split.by]]))
#}
data.plot <- left_join(df, data.plot, by = c("Component" = "sample", "Identity" = "id")) %>% select(c("Identity", "Component", "Cells", "Pct", "avg.exp", "pct.exp",
"features.plot", "avg.exp.scaled", "avg.exp.log"))
data.plot$Component <- data.plot$Component %>% factor(levels = unique(data.plot$Component))
return(data.plot)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.