Nothing
#' Create "Marker_list" from Seurat object
#'
#' @param df Dataframe generated by "FindAllMarkers" function, recommend
#' to use parameter "group.by = "Cell_type"" and "only.pos = TRUE".
#' @param sources Type of markers sources to use. Be one of: `"Seurat"` or `"presto"`.
#' @param sort_by Marker sorting parameter, select "avg_log2FC" or "p_val_adj" or
#' "FSS" (Feature Significance Score, FSS, product value of `log2FC` and `Expression
#' ratio`). Default parameters use "sort_by = 'FSS'".
#' @param gene_filter The number of markers left for each cell type based
#' on the "sort_by" parameter's level of difference. Default parameters use
#' "gene_fliter = 20"
#'
#' @returns The standardized "Marker_list" in the SlimR package.
#' @export
#' @family Standardized_Marker_list_Input
#'
#' @importFrom utils head
#'
#' @examples
#'\dontrun{
#' # Example for Seurat sources markers
#' seurat_markers <- Seurat::FindAllMarkers(
#' object = sce,
#' group.by = "Cell_type",
#' only.pos = TRUE)
#'
#' Markers_list_Seurat <- Read_seurat_markers(seurat_markers,
#' sources = "Seurat",
#' sort_by = "avg_log2FC",
#' gene_filter = 20
#' )
#'
#' # Example for presto sources markers
#' seurat_markers <- dplyr::filter(
#' presto::wilcoxauc(
#' X = sce,
#' group_by = "Cell_type",
#' seurat_assay = "RNA"
#' ),
#' padj < 0.05, logFC > 0.5
#' )
#'
#' Markers_list_Seurat <- Read_seurat_markers(seurat_markers,
#' sources = "presto",
#' sort_by = "logFC",
#' gene_filter = 20
#' )
#' }
#'
Read_seurat_markers <- function(df,
sources = c("Seurat", "presto"),
sort_by = "FSS",
gene_filter = 20) {
sources <- match.arg(sources)
if (!is.numeric(gene_filter)) {
stop("'gene_filter' must be a numeric value")
}
if (gene_filter < 1) {
stop("'gene_filter' must be a positive integer")
}
if (is.null(sort_by)) {
sort_by <- "FSS"
}
config <- list(
Seurat = list(
cluster_col = "cluster",
sort_opts = c("avg_log2FC", "p_val_adj", "FSS"),
output_cols = c("gene", "avg_log2FC", "p_val_adj", "p_val", "pct.1", "pct.2"),
fss_components = c("avg_log2FC", "pct.1")
),
presto = list(
cluster_col = "group",
sort_opts = c("logFC", "padj", "FSS"),
output_cols = c("feature", "logFC", "padj", "pval", "pct_in", "pct_out"),
fss_components = c("logFC", "pct_in")
)
)
conf <- config[[sources]]
if (!sort_by %in% conf$sort_opts) {
stop("'sort_by' must be one of: ",
paste(conf$sort_opts, collapse = ", "))
}
required_cols <- unique(c(
conf$cluster_col,
conf$output_cols,
if (sort_by == "FSS") conf$fss_components else sort_by
))
missing_cols <- setdiff(required_cols, colnames(df))
if (length(missing_cols) > 0) {
stop("Missing required columns: ",
paste(missing_cols, collapse = ", "))
}
if (sort_by == "FSS") {
fss_col <- ".FSS_temp"
if (sources == "Seurat") {
df[[fss_col]] <- df[[conf$fss_components[1]]] * df[[conf$fss_components[2]]]
} else {
df[[fss_col]] <- df[[conf$fss_components[1]]] * df[[conf$fss_components[2]]] * 0.01
}
sort_col <- fss_col
} else {
sort_col <- sort_by
}
clusters <- split(df, df[[conf$cluster_col]])
processed <- lapply(clusters, function(cluster_df) {
if (sort_by %in% c("avg_log2FC", "logFC", "FSS")) {
sorted_df <- cluster_df[order(-cluster_df[[sort_col]]), ]
} else {
sorted_df <- cluster_df[order(cluster_df[[sort_col]]), ]
}
filtered_df <- head(sorted_df, gene_filter)
reordered_df <- filtered_df[, conf$output_cols, drop = FALSE]
return(reordered_df)
})
names(processed) <- names(clusters)
return(processed)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.