Nothing
#' Cell-type naming with GSVA
#'
#' This function computes the mean expression of every cell-type before predicting the most likely cell-type using the GSVA method.
#'
#' This function inputs a Seurat object and uses the average normalized expression of each gene in each cluster to identify cell-types using the GSVA method.
#'
#' @rdname gsva_cellIdentify
#' @name gsva_cellIdentify
#'
#' @param pbmc Processed Seurat object without named cells.
#' @param theSpecies "human" or "mouse" -- it will determine which species cell-type markers will originate from.
#' @param naming_preference Once top cell-type markers are identified, naming_preferences will then extract CT markers within a more appropriate tissue type.
#' @param rda_path Path to pre-computed cell-type .gmt files (rda objects).
#' @param toSave If scMappR is allowed to write files and directories.
#'
#' @return List with the following elements:
#' \item{cellMarker}{Most likely cell-types predicted from CellMarker database.}
#' \item{panglao}{Most likely cell-types predicted from Panglao database.}
#' \item{avg_expression}{Average expression of each gene in each cell-type.}
#'
#' @importFrom ggplot2 ggplot aes geom_boxplot geom_text theme coord_flip labs element_text geom_bar theme_classic xlab ylab scale_fill_manual element_line
#' @importFrom pheatmap pheatmap
#' @importFrom graphics barplot plot
#' @importFrom Seurat AverageExpression CreateSeuratObject PercentageFeatureSet SCTransform SelectIntegrationFeatures PrepSCTIntegration FindIntegrationAnchors IntegrateData DefaultAssay RunPCA RunUMAP FindNeighbors FindClusters ScaleData FindMarkers
#' @importFrom GSVA gsva
#' @importFrom stats fisher.test median p.adjust reorder t.test sd var complete.cases ks.test dist shapiro.test mad
#' @importFrom utils combn read.table write.table head tail
#' @importFrom downloader download
#' @importFrom grDevices pdf dev.off colorRampPalette
#' @importFrom gprofiler2 gost
#' @importFrom gProfileR gprofiler
#' @importFrom pcaMethods prep pca R2cum
#' @importFrom limSolve lsei
#' @importFrom pbapply pblapply
#' @importFrom ADAPTS estCellPercent
#' @importFrom reshape melt
#'
#' @examples
#' \donttest{
#'
#' data(sm)
#' toProcess <- list(example = sm)
#' tst1 <- process_from_count(countmat_list = toProcess,name = "testProcess",
#' theSpecies = "mouse")
#' cellnames <- gsva_cellIdentify(pbmc = tst1, theSpecies = "mouse",
#' naming_preference = "brain", rda_path = "")
#'
#' }
#'
#' @export
#'
gsva_cellIdentify <- function(pbmc, theSpecies, naming_preference = -9, rda_path = "", toSave = FALSE) {
# this function inputs a Seurat object and uses the average normalized expression of each gene
# in each cluster to identify cell-types using the gsva method
# Args:
#pbmc: processed seurat object
# theSpecies: "human" or "mouse" -- it will determine which CT marker database to use -- there are some differences
# naming_preference: once top CT markers are identified, naming_preferences will then extract CT markers within a more appropriate tissue type etc.
# rda_path Path to precomputed cell-type gmt files (rda objects).
# Returns:
# A list containing the top cell-type marker for a cell-type using the panglao dataset as well as the cellMarker dataset
not_seurat <- class(pbmc)[1] == "Seurat"
if((not_seurat[1] == FALSE)[1]) {
stop("pbmc must be of class 'Seurat'")
}
naming_preferences <- c("brain", "epithelial", "endothelial", "blood", "connective","eye", "epidermis", "Digestive", "Immune", "pancreas", "liver", "reproductive", "kidney", "respiratory")
if(!naming_preference %in% naming_preferences) {
if(naming_preference != -9) {
message("Naming preference options")
message(naming_preferences)
stop("Naming preferences not in options (case sensitive) and isn't a non-choice (-9), please try again.")
}
}
if(!(theSpecies %in% c("human", "mouse"))) {
if(theSpecies != -9) {
stop("theSpecies is not 'human' 'mouse' or '-9' (case sensitive), please try again with this filled.")
}
}
if(!is.character(rda_path)) {
stop("rda_path must be of class character.")
}
if(!is.logical(toSave)) {
stop("toSave must be of class logical.")
}
avg_expr <- Seurat::AverageExpression(pbmc)
# identify average expression of clusters
# panglao
message(theSpecies)
thefiles <- list.files(path = rda_path, "_cell_markers.rda")
gmt_list <- "" # to help with the CRAN warning
cell_preference_final <- ""
if(theSpecies == "human") { # load cell marker database
if(length(thefiles) == 0) {
warning(paste0("Cell-marker databases are not present in ", rda_path, " downloading and loading data."))
#
datafile <- "human_cell_markers.rda"
metafile <- paste0(datafile)
url <- paste0("https://github.com/wilsonlabgroup/scMappR_Data/blob/master/",
metafile, "?raw=true")
destfile <- file.path(tempdir(), metafile)
downloader::download(url, destfile = destfile, mode = "wb")
load(destfile)
#
gmt_both <- gmt_list$gmt_both
gmt_cellmarker <- gmt_list$gmt_cellmarker
gmt_gobp <- gmt_list$gmt_gobp
gmt_panglao <- gmt_list$gmt_panglao
gmt_subtype <- gmt_list$gmt_subtype
} else {
load(paste0(rda_path,"/human_cell_markers.rda"))
gmt_both <- gmt_list$gmt_both
gmt_cellmarker <- gmt_list$gmt_cellmarker
gmt_gobp <- gmt_list$gmt_gobp
gmt_panglao <- gmt_list$gmt_panglao
gmt_subtype <- gmt_list$gmt_subtype
}
} else {
if(length(thefiles) == 0) {
warning(paste0("Cell-marker databases are not present in ", rda_path, " downloading and loading data."))
#
datafile <- "mouse_cell_markers.rda"
metafile <- paste0(datafile)
url <- paste0("https://github.com/wilsonlabgroup/scMappR_Data/blob/master/",
metafile, "?raw=true")
destfile <- file.path(tempdir(), metafile)
downloader::download(url, destfile = destfile, mode = "wb")
load(destfile)
gmt_both <- gmt_list$gmt_both
gmt_cellmarker <- gmt_list$gmt_cellmarker
gmt_gobp <- gmt_list$gmt_gobp
gmt_panglao <- gmt_list$gmt_panglao
gmt_subtype <- gmt_list$gmt_subtype
#
} else{
load(paste0(rda_path,"/mouse_cell_markers.rda"))
gmt_both <- gmt_list$gmt_both
gmt_cellmarker <- gmt_list$gmt_cellmarker
gmt_gobp <- gmt_list$gmt_gobp
gmt_panglao <- gmt_list$gmt_panglao
gmt_subtype <- gmt_list$gmt_subtype
}
}
gmt <- gmt_both
gbm <- GSVA::gsva(as.matrix(avg_expr$RNA), gmt, mx.diff=FALSE, verbose=FALSE, parallel.sz=1, min.sz= 5)
# complete GSVA of the average expression of each cell-types
gbm_pang <- gbm[grep("panglao", rownames(gbm)),] # cell-types from panglao
gbm_cellmarker <- gbm[-grep("panglao", rownames(gbm)),] # cell-types from cellmarker
# get the top 5 cell-types from each dataset
top5_pang <- list()
top5_cm <- list()
for(i in 1:ncol(gbm)) {
top5_pang[[i]] <- round(sort(gbm_pang[,i], decreasing = TRUE)[1:5],2)
top5_cm[[i]] <- round(sort(gbm_cellmarker[,i], decreasing = TRUE)[1:5],2)
}
if(length(thefiles) == 0) {
warning(paste0("Cell-marker preferences are not present in ", rda_path, " downloading and loading data."))
#
datafile <- "cell_preferences_categorized.rda"
metafile <- paste0(datafile)
url <- paste0("https://github.com/wilsonlabgroup/scMappR_Data/blob/master/",
metafile, "?raw=true")
destfile <- file.path(tempdir(), metafile)
downloader::download(url, destfile = destfile, mode = "wb")
load(destfile)
} else {
load(paste0(rda_path,"/cell_preferences_categorized.rda"))
}
top_ct <- function(x) {
# If you have a cell-type naming preference and cell-types that lie within that preference, those are extracted
# the top cell-type for
if(naming_preference != -9) {
# if they have the prefered tissue or cell-type to pick from
# prioritize those
mypref <- cell_preference_final[[naming_preference]]
top5_ct_pref <- names(x)
outcell_preference <- which(top5_ct_pref %in% mypref)
if(length(outcell_preference) > 0 ) {
y <- x[outcell_preference[1]]
return(paste0(names(y[1]),"_",unname(y[1])))
} else {
return(paste0(names(x[1]),"_",unname(x[1])))
}
} else {
return(paste0(names(x[1]),"_",unname(x[1])))
}
}
# return the top cell-type from each dataset in a manner is that cognisant of cell-type preferences
cm_top <- lapply(top5_cm, top_ct)
pang_top <- lapply(top5_pang, top_ct)
ave_expr_RNA <- avg_expr$RNA
l <- list(cellMarker = cm_top, panglao = pang_top, avg_expression = ave_expr_RNA)
return(l)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.