R/Read_seurat_markers.R

Defines functions Read_seurat_markers

Documented in Read_seurat_markers

#' Create "Marker_list" from Seurat object
#'
#' @param df Dataframe generated by "FindAllMarkers" function, recommend
#'     to use parameter "group.by = "Cell_type"" and "only.pos = TRUE".
#' @param sources Type of markers sources to use. Be one of: `"Seurat"` or `"presto"`.
#' @param sort_by Marker sorting parameter, select "avg_log2FC" or "p_val_adj" or
#'     "FSS" (Feature Significance Score, FSS, product value of `log2FC` and `Expression
#'     ratio`). Default parameters use "sort_by = 'FSS'".
#' @param gene_filter The number of markers left for each cell type based
#'     on the "sort_by" parameter's level of difference. Default parameters use
#'     "gene_fliter = 20"
#'
#' @returns The standardized "Marker_list" in the SlimR package.
#' @export
#' @family Standardized_Marker_list_Input
#'
#' @importFrom utils head
#'
#' @examples
#'\dontrun{
#' # Example for Seurat sources markers
#' seurat_markers <- Seurat::FindAllMarkers(
#'     object = sce,
#'     group.by = "Cell_type",
#'     only.pos = TRUE)
#'
#' Markers_list_Seurat <- Read_seurat_markers(seurat_markers,
#'     sources = "Seurat",
#'     sort_by = "avg_log2FC",
#'     gene_filter = 20
#'     )
#'
#' # Example for presto sources markers
#' seurat_markers <- dplyr::filter(
#'     presto::wilcoxauc(
#'       X = sce,
#'       group_by = "Cell_type",
#'       seurat_assay = "RNA"
#'       ),
#'     padj < 0.05, logFC > 0.5
#'     )
#'
#' Markers_list_Seurat <- Read_seurat_markers(seurat_markers,
#'     sources = "presto",
#'     sort_by = "logFC",
#'     gene_filter = 20
#'     )
#' }
#'
Read_seurat_markers <- function(df,
                                sources = c("Seurat", "presto"),
                                sort_by = "FSS",
                                gene_filter = 20) {
  sources <- match.arg(sources)

  if (!is.numeric(gene_filter)) {
    stop("'gene_filter' must be a numeric value")
  }
  if (gene_filter < 1) {
    stop("'gene_filter' must be a positive integer")
  }

  if (is.null(sort_by)) {
    sort_by <- "FSS"
  }

  config <- list(
    Seurat = list(
      cluster_col = "cluster",
      sort_opts = c("avg_log2FC", "p_val_adj", "FSS"),
      output_cols = c("gene", "avg_log2FC", "p_val_adj", "p_val", "pct.1", "pct.2"),
      fss_components = c("avg_log2FC", "pct.1")
    ),
    presto = list(
      cluster_col = "group",
      sort_opts = c("logFC", "padj", "FSS"),
      output_cols = c("feature", "logFC", "padj", "pval", "pct_in", "pct_out"),
      fss_components = c("logFC", "pct_in")
    )
  )

  conf <- config[[sources]]

  if (!sort_by %in% conf$sort_opts) {
    stop("'sort_by' must be one of: ",
         paste(conf$sort_opts, collapse = ", "))
  }

  required_cols <- unique(c(
    conf$cluster_col,
    conf$output_cols,
    if (sort_by == "FSS") conf$fss_components else sort_by
  ))

  missing_cols <- setdiff(required_cols, colnames(df))
  if (length(missing_cols) > 0) {
    stop("Missing required columns: ",
         paste(missing_cols, collapse = ", "))
  }

  if (sort_by == "FSS") {
    fss_col <- ".FSS_temp"
    if (sources == "Seurat") {
      df[[fss_col]] <- df[[conf$fss_components[1]]] * df[[conf$fss_components[2]]]
    } else {
      df[[fss_col]] <- df[[conf$fss_components[1]]] * df[[conf$fss_components[2]]] * 0.01
    }
    sort_col <- fss_col
  } else {
    sort_col <- sort_by
  }

  clusters <- split(df, df[[conf$cluster_col]])

  processed <- lapply(clusters, function(cluster_df) {
    if (sort_by %in% c("avg_log2FC", "logFC", "FSS")) {
      sorted_df <- cluster_df[order(-cluster_df[[sort_col]]), ]
    } else {
      sorted_df <- cluster_df[order(cluster_df[[sort_col]]), ]
    }

    filtered_df <- head(sorted_df, gene_filter)
    reordered_df <- filtered_df[, conf$output_cols, drop = FALSE]

    return(reordered_df)
  })

  names(processed) <- names(clusters)
  return(processed)
}

Try the SlimR package in your browser

Any scripts or data that you put into this service are public.

SlimR documentation built on Aug. 19, 2025, 1:13 a.m.