R/Celltype_annotation_Excel.R

Defines functions Celltype_annotation_Excel

Documented in Celltype_annotation_Excel

#' Uses "marker_list" from Excel input for cell annotation
#'
#' @param seurat_obj Enter the Seurat object with annotation columns such as
#'     "seurat_cluster" in meta.data to be annotated.
#' @param gene_list Enter the standard "Marker_list" generated by the Excel files
#'     database for the SlimR package, generated by the "read_excel_markers()"
#'     function.
#' @param species This parameter selects the species "Human" or "Mouse" for standard
#'     gene format correction of markers entered by "Marker_list".
#' @param cluster_col Enter annotation columns such as "seurat_cluster" in meta.data
#'     of the Seurat object to be annotated. Default parameters use "cluster_col =
#'     "seurat_clusters"".
#' @param assay Enter the assay used by the Seurat object, such as "RNA". Default
#'     parameters use "assay = 'RNA'".
#' @param save_path The output path of the cell annotation picture. Example parameters
#'     use "save_path = './SlimR/Celltype_annotation_Excel/'".
#' @param metric_names Change the row name for the input mertics, not recommended unless
#'     necessary. (NULL is used as default parameter)
#' @param colour_low Color for lowest expression level. (default = "white")
#' @param colour_high Color for highest expression level. (default = "black")
#' @param colour_low_mertic Color for lowest mertic level. (default = "white")
#' @param colour_high_mertic Color for highest mertic level. (default = "black")
#'
#' @returns The cell annotation picture is saved in "save_path".
#' @export
#' @family Other_Functions_Provided_By_SlimR
#'
#' @importFrom stats setNames
#'
#' @examples
#' \dontrun{
#' Celltype_annotation_Excel(seurat_obj = sce,
#'     gene_list = Markers_list_Excel,
#'     species = "Human",
#'     cluster_col = "seurat_clusters",
#'     assay = "RNA",
#'     save_path = file.path(tempdir(),"SlimR_Celltype_annotation_Excel")
#'     colour_low = "white",
#'     colour_high = "navy",
#'     colour_low_mertic = "white",
#'     colour_high_mertic = "navy",
#'     )
#'     }
#'
Celltype_annotation_Excel <- function(
    seurat_obj,
    gene_list,
    species,
    cluster_col = "seurat_clusters",
    assay = "RNA",
    save_path = NULL,
    metric_names = NULL,
    colour_low = "white",
    colour_high = "navy",
    colour_low_mertic = "white",
    colour_high_mertic = "navy"
) {
  required_packages <- c("ggplot2", "patchwork", "dplyr", "scales", "tidyr")
  for (pkg in required_packages) {
    if (!requireNamespace(pkg, quietly = TRUE)) {
      stop(sprintf("Please install the required package: %s", pkg))
    }
    library(pkg, character.only = TRUE)
  }

  if (!inherits(seurat_obj, "Seurat")) stop("Input object must be a Seurat object!")
  if (!is.list(gene_list)) stop("Gene list must be a list of data.frames!")
  if (species != "Human" && species != "Mouse") stop("species must be 'Human' or 'Mouse'")
  if (missing(save_path)) {stop("Output path must be explicitly specified")}
  if (!interactive() && !grepl(tempdir(), save_path, fixed = TRUE)) {
    warning("Writing to non-temporary locations is restricted", immediate. = TRUE)
    path <- file.path(tempdir(), "fallback_output")
  }

  colour_low <- if (is.null(colour_low)) "white" else colour_low
  colour_high <- if (is.null(colour_high)) "navy" else colour_high
  colour_low_mertic <- if (is.null(colour_low_mertic)) colour_low else colour_low_mertic
  colour_high_mertic <- if (is.null(colour_high_mertic)) colour_high else colour_high_mertic

  dir.create(save_path, showWarnings = FALSE, recursive = TRUE)

  common_theme <- function(base_size = 10) {
    ggplot2::theme_minimal(base_size = base_size) +
      ggplot2::theme(
        axis.text.x = ggplot2::element_text(angle = 45, hjust = 1, family = "sans"),
        axis.title = ggplot2::element_text(family = "sans"),
        plot.title = ggplot2::element_text(hjust = 0, face = "bold", size = 12),
        legend.position = "right",
        panel.grid = ggplot2::element_blank()
      )
  }

  cell_types <- names(gene_list)
  total <- length(cell_types)
  cycles <- 0

  message(paste0("SlimR: The input 'Markers_list' has ",total," cell types to be processed."))

  for (i in seq_along(cell_types)) {
    cell_type <- cell_types[i]
    message(paste0("\n","[", i, "/", total, "] Processing cell type: ", cell_type))

    current_df <- gene_list[[cell_type]]

    if (ncol(current_df) < 1) {
      warning(paste("Skipping", cell_type, ": Requires at least a gene column"))
      next
    }

    genes <- current_df[[1]]
    genes_processed <- if (species == "Human") {
      toupper(genes)
    } else {
      paste0(toupper(substr(genes, 1, 1)), tolower(substr(genes, 2, nchar(genes))))
    }

    valid_idx <- genes_processed %in% rownames(seurat_obj[[assay]])
    if (sum(valid_idx) == 0) {
      warning(paste("No valid genes for", cell_type))
      next
    }

    valid_data <- data.frame(
      original = genes[valid_idx],
      processed = genes_processed[valid_idx],
      stringsAsFactors = FALSE
    )

    valid_data <- valid_data[!duplicated(valid_data$processed), ]
    gene_order_processed <- valid_data$processed
    gene_order_original <- valid_data$original

    num_clusters <- length(unique(Seurat::Idents(seurat_obj)))
    num_genes <- length(gene_order_original)
    plot_height <- max(6, num_clusters * 0.8) + 2
    plot_width <- max(10, num_genes * 0.4)

    dp <- Seurat::DotPlot(
      seurat_obj,
      features = gene_order_processed,
      assay = assay,
      group.by = cluster_col,
      cols = c(colour_low, colour_high)
    ) +
      ggplot2::scale_x_discrete(labels = setNames(gene_order_original, gene_order_processed)) +
      ggplot2::theme(
        axis.text.x = ggplot2::element_text(angle = 45, hjust = 1, family = "sans", size = 10),
        axis.title.x = ggplot2::element_blank(),
        axis.title.y = ggplot2::element_text(family = "sans")
      ) +
      ggplot2::labs(
        title = paste("Cell Type:", cell_type, "| Markers_list input by users | SlimR"),
        subtitle = "Dot size: Expression percentage | Color: Normalized expression level"
      )

    combined_plot <- dp
    plot_height_total <- plot_height

    if (ncol(current_df) >= 2) {
      metric_cols <- if (!is.null(metric_names)) {
        if (length(metric_names) != (ncol(current_df) - 1)) {
          stop("metric_names length (", length(metric_names),
               ") must match number of metric columns (", ncol(current_df)-1, ")")
        }
        metric_names
      } else {
        colnames(current_df)[2:ncol(current_df)]
      }

      metric_data <- cbind(
        valid_data,
        current_df[valid_idx, 2:ncol(current_df), drop = FALSE][!duplicated(valid_data$processed), ]
      )
      colnames(metric_data)[3:ncol(metric_data)] <- metric_cols

      metric_long <- tidyr::pivot_longer(
        metric_data,
        cols = dplyr::all_of(metric_cols),
        names_to = "metric",
        values_to = "score"
      ) %>%
        dplyr::group_by(metric) %>%
        dplyr::mutate(scaled_score = scales::rescale(score, na.rm = TRUE)) %>%
        dplyr::ungroup()

      num_metrics <- length(metric_cols)
      heatmap_height_ratio <- min(0.3, max(0.15, 0.07 * num_metrics))

      hp <- ggplot2::ggplot(
        metric_long,
        ggplot2::aes(
          x = factor(original, levels = gene_order_original),
          y = metric,
          fill = scaled_score
        )
      ) +
        ggplot2::geom_tile(color = "white") +
        ggplot2::scale_fill_gradientn(
          colors = c(colour_low_mertic, colour_high_mertic),
          na.value = "white",
          limits = c(0, 1)
        ) +
        ggplot2::labs(title = "Normalized metrics in Markers_list input by users") +
        ggplot2::theme(
          axis.text.x = ggplot2::element_text(angle = 45, hjust = 1, vjust = 1),
          axis.title = ggplot2::element_blank(),
          panel.background = ggplot2::element_blank(),
        )

      combined_plot <- patchwork::wrap_plots(
        dp,
        hp,
        ncol = 1,
        heights = c(1, heatmap_height_ratio)
      )
      plot_height_total <- plot_height * (1 + heatmap_height_ratio)
    }

    ggplot2::ggsave(
      filename = file.path(save_path, paste0(cell_type, ".png")),
      plot = combined_plot,
      height = plot_height_total,
      width = plot_width,
      limitsize = FALSE
    )
    cycles <- cycles + 1
    message(paste0("[", i, "/", total, "] Features plot saved for: ", cell_type))
  }
  message(paste0("\n","SlimR: Out of the ",total," cell types in 'Markers_list', ",cycles," cell types have been processed. You can see the reason for not processing cell types by 'warnings()'."))
  message(paste0("\n","SlimR: Visualization saved to: ", normalizePath(save_path)))
}

Try the SlimR package in your browser

Any scripts or data that you put into this service are public.

SlimR documentation built on Aug. 19, 2025, 1:13 a.m.