Seurat.utils: Seurat.utils - utility functions for Seurat

Documented in .adjustLayout .Annotate4Plotly3D AutoNumber.by.UMAP BackupReduction clUMAP countEnrichedDepletedGenes countRelevantEnrichments DimPlot.ClusterNames DiscretePaletteSafe .estMinimumFC filterGoEnrichment FlipReductionCoordinates getClusterColors getDiscretePalette getDiscretePaletteObj gg_color_hue multiFeaturePlot.A4 multiSingleClusterHighlightPlots.A4 panelCorPearson PctCellsAboveX PctCellsExpressingGenes PercentInTranscriptome Plot3D.ListOfCategories Plot3D.ListOfGenes plot3D.umap plot3D.umap.gene plotAndSaveHeatmaps PlotFilters plotGeneExpressionInBackgroundHist plotGeneExprHistAcrossCells plotQUMAPsInAFolder PlotTopGenes PlotTopGenesPerCluster qClusteringUMAPS qFeatureScatter qGeneExpressionUMAPS qMarkerCheck.BrainOrg qQC.plots.BrainOrg qqSaveGridA4 qSeuViolin qUMAP RecallReduction save2plots.A4 save4plots.A4 SavePlotlyAsHtml scBarplot.CellFractions scBarplot.CellsPerCluster scBarplot.CellsPerObject scBarplotEnrichr scBarplot.FractionAboveThr scBarplot.FractionBelowThr scBarplotStackedMetaCateg_List scCalcPCAVarExplained scEnhancedVolcano scGOEnrichment scPieClusterDistribution scPlotPCAvarExplained SetupReductionsNtoKdimensions SeuratColorVector suPlotVariableFeatures umapHiLightSel ww.check.quantile.cutoff.and.clip.outliers

# ____________________________________________________________________
# Seurat.Utils.Visualization.R ----
# ____________________________________________________________________
# file.edit("~/GitHub/Packages/Seurat.utils/R/Seurat.Utils.Visualization.R")
# file.edit("~/GitHub/Packages/Seurat.utils/R/Seurat.utils.less.used.R")
# devtools::load_all("~/GitHub/Packages/Seurat.utils")
# devtools::document("~/GitHub/Packages/Seurat.utils"); devtools::load_all("~/GitHub/Packages/Seurat.utils")


# _________________________________________________________________________________________________
#' @title Plot filtering thresholds and distributions
#'
#' @description This function plots the filtering thresholds and distributions for Seurat objects,
#' using four panels to highlight the relationship between gene- and UMI-counts, and the
#' ribosomal- and mitochondrial-content.  !! Default arguments assume that `p` is a list of
#' parameters, present in the global environment, with elements `thr.lp.mito`, `thr.hp.mito`,
#' `thr.lp.ribo`, `thr.hp.ribo`, `thr.lp.nFeature_RNA`, and `thr.hp.nFeature_RNA`.
#'
#' @param ls.obj A list of Seurat objects to be analyzed. Default: `ls.Seurat`.
#' @param parentdir The parent directory where the plots will be stored. Default: `OutDirOrig`.
#' @param suffices Suffixes used in the output plot file names. Default: Names of the Seurat objects in `ls.obj`.
#' @param filetype The file type of the output plot images. Default: `'.png'`.
#' @param below.mito Lower bound of mitochondrial content. Default: `p$thr.lp.mito`.
#' @param above.mito Upper bound of mitochondrial content. Default: `p$thr.hp.mito`.
#' @param below.ribo Lower bound of ribosomal content. Default: `p$thr.lp.ribo`.
#' @param above.ribo Upper bound of ribosomal content. Default: `p$thr.hp.ribo`.
#' @param below.nFeature_RNA Lower bound of RNA features. Default: `p$thr.lp.nFeature_RNA`.
#' @param above.nFeature_RNA Upper bound of RNA features. Default: `p$thr.hp.nFeature_RNA`.
#' @param subdir Subdirectory within `parentdir` where plots will be stored. Default: generated using a call to `kpp()`.
#' @param transparency Point transparency on scatter plots. Default: 0.25.
#' @param cex Size of points on scatter plots. Default: 0.75.
#' @param theme.used A `ggplot2` theme for all plots. Default: `theme_bw(base_size = 18)`.
#' @param LabelDistFromTop Distance from top for label placement. Default: 200.
#'
#' @examples
#' \dontrun{
#' if (interactive()) {
#'   # !! Default arguments assume that `p` is a list of parameters, present in the global
#'   # environment, with elements `thr.lp.mito`, `thr.hp.mito`, `thr.lp.ribo`, `thr.hp.ribo`,
#'   # `thr.lp.nFeature_RNA`, and `thr.hp.nFeature_RNA`.
#'   PlotFilters(ls.Seurat)
#' }
#' }
#'
#' @seealso \code{\link[ggplot2]{ggplot}}, \code{\link[ggplot2]{labs}},
#' \code{\link[ggplot2]{geom_point}}
#'
#' @importFrom ggplot2 ggplot ggtitle geom_point
#' @importFrom Stringendo percentage_formatter
#' @importFrom MarkdownHelpers llprint
#' @importFrom cowplot plot_grid save_plot
#'
#' @export
PlotFilters <- function(
    ls.obj = ls.Seurat,
    par.ls = p,
    parentdir = OutDirOrig,
    suffices = names(ls.obj),
    filetype = ".png",
    below.mito = par.ls$"thr.lp.mito",
    above.mito = par.ls$"thr.hp.mito",
    below.ribo = par.ls$"thr.lp.ribo",
    above.ribo = par.ls$"thr.hp.ribo",
    below.nFeature_RNA = if ("quantile.thr.lp.nFeature_RNA" %in% names(par.ls)) par.ls$"quantile.thr.lp.nFeature_RNA" else par.ls$"thr.lp.nFeature_RNA",
    above.nFeature_RNA = par.ls$"thr.hp.nFeature_RNA",
    subdir = FixPlotName(
      "Filtering.plots",
      "mito", par.ls$"thr.hp.mito", par.ls$"thr.lp.mito",
      "ribo", par.ls$"thr.hp.ribo", par.ls$"thr.lp.ribo",
      "nFeature", below.nFeature_RNA, above.nFeature_RNA
    ),
    transparency = 0.25,
    cex = 0.75,
    theme.used = theme_bw(base_size = 18),
    LabelDistFromTop = 200 # for barplot_label
    ) {
  message("Expects a list of Seurat objects, `ls.obj` with names, and a list of parameters, `par.ls` with a defined structure.")

  # Create names based on the Seurat objects, catenating "dataset" and numbers 1:n
  if (is.null(suffices)) {
    suffices <- paste0("obj_", 1:length(ls.obj))
    message("Provide suffixes unique to each dataset, ideally as names of the list of Seu objects!")
  }

  stopifnot(
    is.list(ls.obj), is.list(par.ls) | is.null(par.ls),
    is.numeric(above.nFeature_RNA), is.numeric(below.nFeature_RNA),
    (below.nFeature_RNA > above.nFeature_RNA) | below.nFeature_RNA < 1, # either an absolute feature count or a quantile
    is.numeric(above.mito), is.numeric(below.mito), below.mito > above.mito,
    is.numeric(above.ribo), is.numeric(below.ribo), below.ribo > above.ribo,
    is.character(parentdir), is.character(subdir), is.character(filetype), is.numeric(transparency), is.numeric(cex),
    is.character(suffices), length(suffices) == length(ls.obj)
  )

  MarkdownHelpers::llprint(
    "We filtered for high quality cells based on the number of genes detected [", above.nFeature_RNA, ";", below.nFeature_RNA,
    "] and the fraction of mitochondrial [", percentage_formatter(above.mito), ";", percentage_formatter(below.mito),
    "] and ribosomal [", percentage_formatter(above.ribo), ";", percentage_formatter(below.ribo), "] reads."
  )

  theme_set(theme.used)
  OutDir <- FixPath(parentdir, subdir)

  print(subdir)

  MarkdownReports::create_set_OutDir(OutDir)
  stopifnot(length(suffices) == length(ls.obj))

  Calculate_nFeature_LowPass <- if (below.nFeature_RNA < 1) below.nFeature_RNA else FALSE
  for (i in 1:length(ls.obj)) {
    print(suffices[i])
    mm <- ls.obj[[i]]@meta.data

    if (Calculate_nFeature_LowPass < 1) {
      below.nFeature_RNA <- floor(quantile(ls.obj[[i]]$"nFeature_RNA", probs = Calculate_nFeature_LowPass))
      iprint("below.nFeature_RNA at", percentage_formatter(Calculate_nFeature_LowPass), "percentile:", below.nFeature_RNA)
    }

    AllMetaColumnsPresent <- all(c("nFeature_RNA", "percent.mito", "percent.ribo") %in% colnames(mm))
    if (!AllMetaColumnsPresent) {
      print(c("nFeature_RNA", "percent.mito", "percent.ribo"))
      print(c("nFeature_RNA", "percent.mito", "percent.ribo") %in% colnames(mm))
      print("Try to run:")
      print('objX <- addMetaFraction(obj = objX, col.name = "percent.mito", gene.symbol.pattern =  "^MT\\.|^MT-")')
      print('objX <- addMetaFraction(obj = objX, col.name = "percent.ribo", gene.symbol.pattern =  "^RPL|^RPS")')
      stop()
    }

    filt.nFeature_RNA <- (mm$"nFeature_RNA" < below.nFeature_RNA & mm$"nFeature_RNA" > above.nFeature_RNA)
    filt.below.mito <- (mm$"percent.mito" < below.mito & mm$"percent.mito" > above.mito)
    filt.below.ribo <- (mm$"percent.ribo" < below.ribo & mm$"percent.ribo" > above.ribo)

    mm <- cbind(mm, filt.nFeature_RNA, filt.below.mito, filt.below.ribo)

    mm$colour.thr.nFeature <- cut(mm$"nFeature_RNA",
      breaks = c(-Inf, above.nFeature_RNA, below.nFeature_RNA, Inf),
      labels = c(
        paste0("LQ (<", above.nFeature_RNA, ")"),
        paste0("HQ (", above.nFeature_RNA, "< X <", below.nFeature_RNA, ")"),
        paste0("Dbl/Outlier (>", below.nFeature_RNA, ")")
      )
    )

    A <- ggplot(data = mm, aes(x = nFeature_RNA, fill = colour.thr.nFeature)) +
      geom_histogram(binwidth = 100) +
      ggtitle(paste(
        "Cells between", above.nFeature_RNA, "and", below.nFeature_RNA,
        " UMIs are selected \n(", pc_TRUE(filt.nFeature_RNA), ")"
      )) +
      geom_vline(xintercept = below.nFeature_RNA) +
      geom_vline(xintercept = above.nFeature_RNA) +
      theme(legend.position = "top")
    # A

    B <- ggplot2::ggplot(mm, aes(x = nFeature_RNA, y = percent.mito)) +
      ggplot2::ggtitle(paste(
        "Cells below", percentage_formatter(below.mito),
        "mito reads are selected \n(with A:", pc_TRUE(filt.nFeature_RNA & filt.below.mito), ")"
      )) +
      ggplot2::geom_point(
        alpha = transparency, size = cex, show.legend = FALSE,
        aes(color = filt.nFeature_RNA & filt.below.mito)
      ) +
      scale_x_log10() + # scale_y_log10() +
      # annotation_logticks() +
      geom_hline(yintercept = below.mito) +
      geom_hline(yintercept = above.mito) +
      geom_vline(xintercept = below.nFeature_RNA) +
      geom_vline(xintercept = above.nFeature_RNA)
    # B

    C <- ggplot(mm, aes(x = nFeature_RNA, y = percent.ribo)) +
      ggtitle(paste(
        "Cells below", percentage_formatter(below.ribo),
        "ribo reads are selected \n(with A:",
        pc_TRUE(filt.nFeature_RNA & filt.below.ribo), ")"
      )) +
      geom_point(
        alpha = transparency, size = cex, show.legend = FALSE,
        aes(color = filt.nFeature_RNA & filt.below.ribo)
      ) +
      scale_x_log10() + # scale_y_log10() +
      annotation_logticks() +
      geom_hline(yintercept = below.ribo) +
      geom_hline(yintercept = above.ribo) +
      geom_vline(xintercept = below.nFeature_RNA) +
      geom_vline(xintercept = above.nFeature_RNA)
    # C

    D <- ggplot(mm, aes(x = percent.ribo, y = percent.mito)) +
      ggtitle(paste(
        "Cells w/o extremes selected \n(with A,B,C:",
        pc_TRUE(filt.nFeature_RNA & filt.below.mito & filt.below.ribo), ")"
      )) +
      geom_point(
        alpha = transparency, size = cex, show.legend = FALSE,
        aes(color = filt.nFeature_RNA & filt.below.mito & filt.below.ribo)
      ) +
      scale_x_log10() +
      scale_y_log10() +
      annotation_logticks() +
      geom_hline(yintercept = below.mito) +
      geom_hline(yintercept = above.mito) +
      geom_vline(xintercept = below.ribo) +
      geom_vline(xintercept = above.ribo)
    # D

    plot_list <- list(A, B, C, D)
    px <- cowplot::plot_grid(
      plotlist = plot_list, nrow = 2, ncol = 2,
      labels = LETTERS[1:4], label_size = 20
    )
    fname <- kpps(OutDir, FixPlotName("Filtering.thresholds", suffices[i], filetype))

    cowplot::save_plot(filename = fname, plot = px, base_height = 14, ncol = 1, nrow = 1) # Figure 2
    stopifnot(file.exists(fname))
  } # for
  # _________________________________________________________________________________________________
  create_set_OutDir(parentdir)
}


# _________________________________________________________________________________________________
# plotting.statistics.and.QC.R ______________________________ ----
# _________________________________________________________________________________________________
# source('~/GitHub/Packages/Seurat.utils/Functions/Plotting.statistics.and.QC.R')
# try (source("https://raw.githubusercontent.com/vertesy/Seurat.utils/master/Functions/Plotting.statistics.and.QC.R"))

# Source: self + web

# Requirements __________________________________________
# require(Seurat)
# require(ggplot2)
# tools for tools::toTitleCase

# May also require
# try (source('/GitHub/Packages/CodeAndRoll/CodeAndRoll.R'),silent= FALSE) # generic utilities funtions
# require('MarkdownReports') # require("devtools")

# _________________________________________________________________________________________________
#' @title Calculate the percent of variation explained by individual PC's
#'
#' @description This function calculates the percentage of variation each principal component (PC)
#' accounts for in a Seurat object. It's specifically tailored for Seurat objects and provides a
#' convenient way to understand the variance distribution across PCs. For similar calculations on
#' standard PCA objects, refer to github.com/vertesy/Rocinante `PCA.percent.var.explained()`.
#'
#' @param obj A Seurat object from which to calculate the percentage of variation explained by each
#' PC. Default: `combined.obj`.
#'
#' @return A named vector with the percentage of variation explained by each principal component.
#'
#' @examples
#' \dontrun{
#' if (interactive()) {
#'   data("combined.obj") # Example Seurat object
#'   var_explained <- scCalcPCAVarExplained(combined.obj)
#'   print(var_explained)
#' }
#' }
#'
#' @export
scCalcPCAVarExplained <- function(obj = combined.obj) { # Determine percent of variation associated with each PC.
  pct <- obj@reductions$pca@stdev / sum(obj@reductions$pca@stdev) * 100
  names(pct) <- 1:length(obj@reductions$pca@stdev)
  return(pct)
}

# _________________________________________________________________________________________________
#' @title Plot the percent of variation explained by individual PC's
#'
#' @description This function plots the percentage of variation explained by each principal
#' component (PC) in a Seurat object. It allows for a visual assessment of how much variance is
#' captured by each PC, which is crucial for dimensionality reduction analyses. Users can choose
#' between two plotting methods: one using `MarkdownReports` and the other using `ggExpress`.
#'
#' @param obj A Seurat object from which to plot the percentage of variation explained by each PC.
#' Default: `combined.obj`.
#' @param plotname The title of the plot to be generated. Default: "Variance Explained by Principal
#' Components".
#' @param sub Subtitle for the plot, typically including information about the number of cells and
#' features analyzed. Default: A string generated from `obj` stating the number of cells and
#' features.
#' @param caption A caption for the plot. Default: "hline at 1%".
#' @param use.MarkdownReports Boolean indicating whether to use `MarkdownReports` for plotting.
#' If `FALSE`, `ggExpress` is used. Default: `FALSE`.
#' @param ... Additional arguments to be passed to `ggExpress::qbarplot` or `MarkdownReports::wbarplot`.
#'
#' @return Generates a plot showing the percent of variation each PC accounts for. This function
#' does not return a value but instead generates a plot directly.
#'
#' @examples
#' \dontrun{
#' if (interactive()) {
#'   data("combined.obj") # Example Seurat object
#'   scPlotPCAvarExplained(combined.obj, use.MarkdownReports = TRUE)
#' }
#' }
#'
#' @importFrom MarkdownReports wbarplot
#' @importFrom ggExpress qbarplot
#'
#' @export
scPlotPCAvarExplained <- function(obj = combined.obj,
                                  plotname = "Variance Explained by Principal Components",
                                  sub = paste(ncol(obj), "cells, ", nrow(obj), "features."),
                                  caption = "hline at 1%",
                                  # caption = .parseKeyParams(obj, suffix = "| hline at 1%"),
                                  use.MarkdownReports = FALSE,
                                  ...) {
  message(" > Running scPlotPCAvarExplained...")

  pct <- scCalcPCAVarExplained(obj)
  if (use.MarkdownReports) {
    MarkdownReports::wbarplot(pct, xlab = "Principal Components", ylab = "% of variation explained", ...)
    barplot_label(round(pct, digits = 2), barplotted_variable = pct, cex = .5)
  } else {
    ggExpress::qbarplot(
      vec = pct, plotname = plotname, subtitle = sub,
      xlab = "Principal Components", ylab = "% of variation explained",
      w = 10, h = 5, hline = 1, caption = caption, ...
    )
  }
}



# _________________________________________________________________________________________________
# Gene Expression Plots ______________________________ ----
# _________________________________________________________________________________________________


# _________________________________________________________________________________________________
#' @title Gene Expression as Fraction of Total UMI Counts
#'
#' @description This function computes and visualizes gene expression levels as a fraction of total
#' UMI (Unique Molecular Identifier) counts across all genes in a Seurat object. It aims to highlight
#' the relative contribution of the most highly expressed genes to the overall transcriptome.
#'
#' @param obj A Seurat object containing gene expression data.
#' @param n.genes.barplot The number of top genes to be displayed in the final barplot, showing
#' their expression as a percentage of the total UMIs. Default: 25.
#' @param width.barplot The width of the barplot that visualizes the highest expressed genes.
#' Default: a quarter of `n.genes.barplot`.
#'
#' @return The same Seurat object passed as input, but with an additional list in the `@misc` slot
#' named `'TotalReadFraction'` that contains the relative total expression of each gene as a
#' fraction of total UMIs.
#'
#' @examples
#' \dontrun{
#' combined.obj <- PercentInTranscriptome(combined.obj)
#' }
#'
#' @export
PercentInTranscriptome <- function(
    obj = combined.obj,
    assay = DefaultAssay(obj),
    n.genes.barplot = 25,
    width.barplot = round(n.genes.barplot / 4),
    ...) {
  #
  message("Obj. version: ", obj@version)
  message("assay: ", assay)

  m.expr <- if (obj@version < "5") {
    obj@assays$RNA@counts
  } else {
    SeuratObject::GetAssayData(object = obj, assay = assay)
  }


  total.Expr <- sort(rowSums(m.expr), decreasing = TRUE)
  relative.total.Expr <- total.Expr / sum(total.Expr)
  print(head(iround(100 * relative.total.Expr), n = n.genes.barplot))

  Relative.of.Total.Gene.Expression <- relative.total.Expr * 100

  qhistogram(Relative.of.Total.Gene.Expression,
    logX = FALSE, logY = TRUE,
    plotname = "Gene expression as fraction of all transcripts *UMI's)",
    subtitle = "Percentage in RNA-counts",
    xlab = "Percent in Transcriptome (total per gene)",
    ylab = "Number of genes",
    xlab.angle = 45,
    w = 7, h = 5,
    ...
  )

  Highest.Expressed.Genes <- head(iround(100 * relative.total.Expr), n = n.genes.barplot)
  qbarplot(Highest.Expressed.Genes,
    plotname = "Percentage of highest expressed genes",
    subtitle = "Total, in RNA-counts",
    xlab = "",
    ylab = "Gene expression as percent of all UMI's",
    xlab.angle = 45,
    w = width.barplot, h = 5,
    ...
  )

  message("!!! \nTotalReadFraction is now stored under combined.obj@misc$'TotalReadFraction'.")

  obj@misc$"TotalReadFraction" <- relative.total.Expr
  return(obj)
}



# _________________________________________________________________________________________________
#' @title Histogram All Genes' Expression Level and a Highlighted Gene
#'
#' @description Shows a comparison of the expression level of the chose gene to all genes.
#' Very useful to see if the gene has a meaningful expression level. This function generates a
#' histogram to visualize the expression level distribution of a specified gene across all cells in
#' a Seurat object. It highlights the position of the gene of interest within the overall distribution.
#'
#' @param gene The gene of interest for which the expression level distribution is to be plotted.
#' Default: 'TOP2A'.
#' @param obj A Seurat object containing the expression data. Default: The first Seurat object in `ls.Seurat`.
#' @param assay The assay from which to retrieve the expression data. Default: "RNA".
#' @param slot The slot in the Seurat object from which to retrieve the expression data. Options
#' include "counts" for raw counts and "data" for normalized (and possibly log-transformed) data.
#' Default: "data".
#' @param w The width of the plot. Default: 7.
#' @param h The height of the plot. Default: 4.
#' @param ... Any other parameter that can be passed to the internally called functions.
#'
#' @export
plotGeneExpressionInBackgroundHist <- function(
    gene = "TOP2A",
    obj = combined.obj,
    assay = "RNA",
    slot = c("counts", "data")[2],
    w = 7, h = 4,
    ...) {
  message("gene: ", gene)
  stopifnot(gene %in% rownames(obj))


  GEX.Counts <- GetAssayData(object = obj, assay = assay, slot = slot)

  GEX.Counts.total <- rowSums(GEX.Counts)
  genes.expression <- GEX.Counts.total[gene]
  mean.expr <- iround(mean(GEX.Counts[gene, ]))

  suffx <- if (slot == "counts") "raw" else "normalised, logtransformed"
  (pname <- paste(gene, "and the", suffx, "transcript count distribution"))

  ggExpress::qhistogram(GEX.Counts.total,
    vline = genes.expression, logX = TRUE, w = w, h = h,
    subtitle = paste("It belong to the top", pc_TRUE(GEX.Counts.total > genes.expression), "of genes (black line). Mean expr:", mean.expr),
    plotname = pname, xlab = "Total Transcripts in Dataset", ylab = "Number of Genes",
    ...
  )
}




# _________________________________________________________________________________________________
#' @title Histogram of Gene / Geneset Aggregate Expression Across Cells
#'
#' @description Creates and optionally saves a histogram showing expression levels of specified genes
#' within a Seurat object. Provides options for aggregate gene expression, expression threshold filtering,
#' and quantile clipping for count data.
#'
#' @param obj Seurat object to analyze; Default: `combined.obj`.
#' @param genes Vector of gene names to include in the analysis; Default: c("MALAT1", "MT-CO1").
#' @param assay Assay to use from the Seurat object; Default: "RNA".
#' @param layerX Data slot to use ('data' or 'counts'); Default: "data".
#' @param thr_expr Expression threshold for highlighting in the plot; Default: 10.
#' @param suffix Additional text to append to the plot title; Default: NULL.
#' @param prefix Additional text to prepend to the plot title; Default: NULL.
#' @param xlab Label for the x-axis; Default: "log10(Summed UMI count @data)".
#' @param return_cells_passing If TRUE, returns count of cells exceeding the expression threshold; Default: `TRUE`.
#' @param clip_count_qtl_thr Quantile threshold for clipping if using count data; Default: 0.95.
#' Needed for visualization (to avoid x axis compression).
#' @param log10_counts If TRUE, log10-transforms the COUNT expression values; Default: `TRUE`.
#' @param return_quantile If TRUE, returns cell count exceeding the quantile threshold; Default: `FALSE`.
#' @param w Width of the plot in inches; Default: 9.
#' @param h Height of the plot in inches; Default: 5.
#' @param show_plot If TRUE, displays the generated plot; Default: `TRUE`.
#' @param ... Additional arguments for customization.
#'
#' @return Depending on the parameters, can return a ggplot object, the number of cells passing
#' the expression threshold, or the number of cells exceeding the quantile threshold.
#'
#' @examples
#' \dontrun{
#' plotGeneExprHistAcrossCells(obj = yourSeuratObject, genes = c("GeneA", "GeneB"))
#' }
#'
#' @return Depending on the parameters, can return a ggplot object, the number of cells passing
#' the expression threshold, or the number of cells exceeding the quantile threshold.
#'
#' @export
#' @importFrom scales hue_pal
#' @importFrom Seurat GetAssayData
#' @importFrom ggplot2 geom_vline labs
#' @importFrom ggExpress qhistogram
plotGeneExprHistAcrossCells <- function(
    genes = c("MALAT1", "MT-CO1", "MT-CO2", "MT-CYB", "TMSB4X", "KAZN"),
    obj = combined.obj,
    assay = "RNA", layerX = "data",
    thr_expr = 10,
    suffix = NULL,
    prefix = NULL,
    plotname = c("Summed Gene-set Expression -", "Expression of"),
    xlab = paste0("Expression -log10(Summed UMIs @", layerX, ")"),
    return_cells_passing = TRUE,
    clip_count_qtl_thr = 0.99,
    log10_counts = TRUE,
    return_quantile,
    w = 9, h = 5,
    show_plot = TRUE,
    ...) {
  #
  stopifnot(
    length(genes) > 0,
    layerX %in% c("data", "counts")
  )

  # Aggregate genes if necessary
  aggregate <- length(genes) > 1
  SummedExpressionPerCell <- colSums(LayerData(
    object = obj, assay = assay,
    layer = layerX
  )[genes, , drop = FALSE])

  # Clip counts if necessary
  if (layerX == "counts") {
    SummedExpressionPerCell <- CodeAndRoll2::clip.at.fixed.value(
      x = SummedExpressionPerCell,
      thr = quantile(SummedExpressionPerCell, probs = clip_count_qtl_thr)
    )
    if (log10_counts) SummedExpressionPerCell <- log10(SummedExpressionPerCell + 1)
  }

  # Create annotation
  CPT <- paste("layer:", layerX, "| assay:", assay, "| cutoff at", iround(thr_expr))

  # Add a subtitle with the number of genes and the expression threshold
  SUBT <- filter_HP(SummedExpressionPerCell, threshold = thr_expr, return_conclusion = TRUE, plot.hist = FALSE)

  if (aggregate) {
    SUBT <- paste0(SUBT, "\n", length(genes), " genes summed up, e.g: ", kppc(head(genes)))
    TTL <- kppd(prefix, plotname[1], suffix)
  } else {
    TTL <- trimws(paste(prefix, plotname[length(plotname)], paste(genes), suffix))
  }

  # Create the plot
  pobj <- ggExpress::qhistogram(SummedExpressionPerCell,
    plotname = TTL,
    subtitle = SUBT,
    caption = CPT,
    prefix = prefix,
    suffix = suffix,
    vline = thr_expr[1], filtercol = -1,
    xlab = xlab,
    ylab = "# of cells",
    w = w, h = h,
    ...
  )

  # draw additional vlines if needed
  if (length(thr_expr) > 1) {
    pobj <- pobj +
      ggplot2::geom_vline(xintercept = thr_expr[-1], col = 2, lty = 2, lwd = 1) +
      ggplot2::labs(caption = "Red line marks original estimate")
    ggExpress::qqSave(ggobj = pobj, title = sppp(TTL, "w.orig")) # , ext = '.png'
  }


  # Print the plot
  if (show_plot) print(pobj)

  # Return the number of cells passing the filter
  if (return_cells_passing) {
    return(MarkdownHelpers::filter_HP(SummedExpressionPerCell, threshold = thr_expr, plot.hist = FALSE))
  }
}



# _________________________________________________________________________________________________
#' @title Percentage of Cells Above Threshold
#'
#' @description This function calculates the percentage of cells above a specified threshold for a given
#' feature in a Seurat object. It can subset the data based on a specified identity and values.
#'
#' @param obj A Seurat object. Default: combined.obj.
#' @param feature The feature to evaluate.
#' @param ident The identity class to split the data by. Default: GetNamedClusteringRuns(obj)[1].
#' @param box Logical value indicating whether to plot the boxplot. Default: `TRUE`.
#' @param box.ident The identity class to split the data by for individual dots in the boxplot.
#' Ident will be used for the boxes displayed (matching the barplot).  Default: NULL.
#' @param threshold The threshold value to evaluate the feature against. Default: 2.
#' @param subset_ident The identity class to subset the data by. Default: NULL.
#' @param subset_values The values of the identity class to keep in the subset. Default: NULL.
#' @param omit.na Logical value indicating whether to omit NA values. Default: `TRUE`.
#' @param assay The assay to use for feature extraction. Default: 'RNA'.
#' @param plot Logical value indicating whether to plot the results. Default: `TRUE`.
#' @param ylab The label for the y-axis of the plot. Default: "% cells above threshold".
#' @param ... Additional parameters to pass to the plotting function.
#'
#' @return A named vector with the percentage of cells above the threshold for each identity class.
#'
#' @examples
#' \dontrun{
#' PctCellsAboveX(obj = seurat_object, feature = "GeneA", ident = "CellType", threshold = 1.5)
#' }
PctCellsAboveX <- function(obj = combined.obj,
                           feature = "TOP2A",
                           ident = GetNamedClusteringRuns(obj = obj, v = FALSE)[1],
                           threshold = 2,
                           suffix = ppp(substitute_deparse(obj), ncol(obj), "thr", threshold),
                           box = FALSE,
                           ident.box = NULL,
                           subset_ident = NULL,
                           subset_values = NULL,
                           omit.na = TRUE,
                           assay = "RNA",
                           plot = TRUE,
                           caption = NULL,
                           ylab = "% cells above threshold",
                           # color = NULL,
                           ...) {
  stopifnot(
    is(obj, "Seurat"),
    feature %in% colnames(obj@meta.data) | feature %in% Features(obj, assay = assay),
    ident %in% colnames(obj@meta.data),
    is.null(subset_ident) | subset_ident %in% colnames(obj@meta.data),
    is.null(subset_values) | subset_values %in% unique(obj@meta.data[, subset_ident]),
    !box & is.null(ident.box) | box
  )

  if (!is.null(subset_ident)) {
    obj <- subsetSeuObjByIdent(obj, ident = subset_ident, identGroupKeep = subset_values)
    if (omit.na) ls_feat <- lapply(ls_feat, na.omit.strip)
  }

  split_ident <- if (box) ident.box else ident
  ls_feat <- split(obj@meta.data[, feature], f = obj@meta.data[, split_ident])
  if (omit.na) ls_feat <- lapply(ls_feat, na.omit.strip)

  # Calculate the percentage of cells above the threshold for each split_ident
  Fraction.of.Cells.Above.Threshold <- sapply(ls_feat, function(x) sum(x > threshold) / length(x))

  if (box) {
    # Arrange ident.box to categories of ident
    ls.from_to <- lapply(split(obj@meta.data[, ident.box], f = obj@meta.data[, ident]), unique)
    from_to <- list.2.replicated.name.vec(ls.from_to)

    stopifnot(all(names(from_to) %in% names(Fraction.of.Cells.Above.Threshold)))

    from_to <- from_to[names(Fraction.of.Cells.Above.Threshold)]

    # Split Fraction
    ls.Fraction.of.Cells.Above.Threshold <- split(Fraction.of.Cells.Above.Threshold, f = from_to)
  }


  if (plot) {
    if (is.null(caption)) {
      caption <- pc_TRUE(is.na(Fraction.of.Cells.Above.Threshold),
        suffix = "of idents yielded NA/NaN & exluded from plot."
      )
    }
    TTL <- paste("Percentage of Cells Above Threshold for", feature)
    STL <- paste("Cells above threshold for", feature, "above", threshold)
    SFX <- ppp(feature, "by", ident, "thr", threshold, "subset_ident", subset_ident, suffix)

    Fraction.of.Cells.Above.Threshold <- na.omit.strip(Fraction.of.Cells.Above.Threshold)

    if (box) {
      pobj <- qboxplot(ls.Fraction.of.Cells.Above.Threshold, ,
        plotname = TTL, subtitle = STL, caption = caption, suffix = SFX,
        add = "dotplot", xlab.angle = 45,
        hide.legend = TRUE, ,
        ylab = ylab
        # , xlab = ident
        , ...
      )
    } else {
      "barplot"
      pobj <- qbarplot(Fraction.of.Cells.Above.Threshold,
        label = percentage_formatter(Fraction.of.Cells.Above.Threshold),
        plotname = TTL, subtitle = STL, caption = caption, suffix = SFX,
        ylab = ylab
        # , xlab = ident
        , ...
      )
    }



    print(pobj)
  }

  return(Fraction.of.Cells.Above.Threshold)
}


# _________________________________________________________________________________________________
#' @title PctCellsExpressingGenes
#'
#' @description Calculates the proportion of cells expressing one or more specified genes using a Seurat
#' object as input.
#'
#' @param genes A character vector specifying the genes of interest. Must be a non-empty character vector.
#' @param obj A Seurat object containing single-cell data.
#' @param assay The assay to use for expression data. Default: "RNA".
#' @param min.expr The minimum expression level to consider a gene as "expressed". Default: 1.
#' @param ident A categorical variable from the metadata of the Seurat object. If NULL, returns overall
#' proportions. Default: NULL.
#' @param max.idents Maximum number of unique values allowed in the `ident` variable. Default: 100.
#'
#' @return A named vector if `ident` is NULL, containing the proportion of cells co-expressing all genes
#' (AND), the proportion expressing any gene (OR), and the proportion expressing each gene individually.
#' If `ident` is provided, returns a matrix with rows representing categories and columns representing
#' expression proportions.
#'
#' @examples
#' \dontrun{
#' # Load the Seurat object (example)
#' library(Seurat)
#' combined.obj <- readRDS("path/to/your/seurat_object.rds")
#'
#' # Define genes of interest
#' # Define genes of interest
#' genes <- c("TOP2A", "MAP2")
#' # Call the function
#' PctCellsExpressingGenes(genes = genes, obj = combined.obj)
#' # Call the function with ident
#' #' PctCellsExpressingGenes(genes = genes, obj = combined.obj, ident = "cluster")
#' }
#'
#' @importFrom Seurat GetAssayData
#' @export

PctCellsExpressingGenes <- function(genes, obj, assay = "RNA", min.expr = 1,
                                    ident = NULL, max.idents = 100) {
  # Input assertions
  stopifnot(
    is.character(genes) && length(genes) > 0, # genes must be a non-empty character vector
    inherits(obj, "Seurat"), # obj must be a Seurat object
    is.character(assay) && length(assay) == 1, # assay must be a single character string
    is.numeric(min.expr) && length(min.expr) == 1, # min.expr must be a single numeric value
    is.null(ident) || (is.character(ident) && length(ident) == 1), # ident must be NULL or a single character string
    is.numeric(max.idents) && length(max.idents) == 1 && max.idents > 0 # max.idents must be a single positive numeric value
  )

  # Message parameters to console
  message("Parameters:")
  message("  genes: ", paste(genes, collapse = ", "))
  message("  assay: ", assay)
  message("  min.expr: ", min.expr)
  message("  ident: ", ifelse(is.null(ident), "NULL", paste(ident, lenght(ident), "-", head(ident))))
  message("  max.idents: ", max.idents)

  # Get the expression data
  expr.data <- Seurat::GetAssayData(obj, assay = assay, slot = "data")

  # Check if the genes are in the expression data
  genes <- intersect(genes, rownames(expr.data))
  if (length(genes) == 0) {
    stop("None of the specified genes are present in the expression data.")
  }

  # Define a function to calculate proportions
  calc_proportions <- function(expr.data, genes, min.expr) {
    # Calculate the proportion of cells expressing each gene
    expr.prop <- sapply(genes, function(gene) {
      sum(expr.data[gene, ] >= min.expr) / ncol(expr.data)
    })

    # Calculate the proportion of cells co-expressing all genes (AND)
    coexpr.prop <- sum(apply(expr.data[genes, ] >= min.expr, 2, all)) / ncol(expr.data)

    # Calculate the proportion of cells expressing any gene (OR)
    orexpr.prop <- sum(apply(expr.data[genes, ] >= min.expr, 2, any)) / ncol(expr.data)

    # Return the proportions
    return(c(coexpr.prop, orexpr.prop, expr.prop))
  }

  # Calculate proportions
  proportions <- calc_proportions(expr.data, genes, min.expr)

  # Message to console
  message(sprintf("Percentage of cells co-expressing all genes (AND): %.2f%%", proportions[1] * 100))
  message(sprintf("Percentage of cells expressing any gene (OR): %.2f%%", proportions[2] * 100))
  for (i in seq_along(genes)) {
    message("gene: ", genes[i], " ...")
    message(sprintf("Percentage of cells expressing %s: %.2f%%", genes[i], proportions[2 + i] * 100))
  }

  # If ident is NULL, return the proportions vector
  if (is.null(ident)) {
    names(proportions) <- c("Expr.ALL", "Expr.ANY", genes)
    return(proportions)
  }

  # Check ident
  ident_vals <- obj@meta.data[[ident]]
  if (is.null(ident_vals)) {
    stop(sprintf("The ident '%s' is not present in the metadata.", ident))
  }

  if (length(unique(ident_vals)) > max.idents) {
    stop(sprintf("The number of unique values in ident '%s' exceeds max.idents.", ident))
  }

  # Message ident details
  message("Ident details:")
  message("  Length of idents: ", length(unique(ident_vals)))
  message("  Head of ident values: ", paste(head(unique(ident_vals)), collapse = ", "))

  # Calculate proportions per ident
  idents <- unique(ident_vals)
  result_matrix <- matrix(NA, nrow = length(idents), ncol = length(proportions))
  rownames(result_matrix) <- idents
  colnames(result_matrix) <- c("Expr.ALL", "Expr.ANY", genes)

  for (ident_value in idents) {
    message("Cluster: ", ident_value)
    cells_in_ident <- which(ident_vals == ident_value)
    expr.data_subset <- expr.data[, cells_in_ident, drop = FALSE]
    result_matrix[ident_value, ] <- calc_proportions(expr.data_subset, genes, min.expr)
  }

  return(result_matrix)
}


# _________________________________________________________________________________________________
# Barplots / Compositional analysis ______________________________ ----
# _________________________________________________________________________________________________


# _________________________________________________________________________________________________

#' @title Generate Barplot of Cell Fractions
#'
#' @description This function generates a bar plot of cell fractions per cluster from a Seurat object.
#' It offers the option to downsample data, equalizing the number of cells in each group
#' to the number in the smallest group. The plot's bars are grouped by one variable and filled by another.
#' The function supports custom color palettes, drawing numerical values on bars, and saving the plot.
#'
#' @param fill.by The variable to fill by for the bar plot.
#' @param group.by The variable to group by for the bar plot.
#' @param obj A Seurat object.
#' @param plotname The title of the plot.
#' @param min.nr.sampled.cells The minimal number of cells to sample from each identity class. Defaults to 200 cells.
#' @param downsample Logical indicating whether to downsample data to equalize group sizes.
#' @param prefix Optional prefix for the plot title.
#' @param suffix Optional suffix for the plot title.
#' @param sub_title Optional subtitle for the plot.
#' @param hlines Numeric vector specifying y-intercepts of horizontal lines to add to the plot.
#' @param return_table Logical; if TRUE, returns a contingency table instead of plotting.
#' @param save_table Logical; if TRUE, saves the table behind the plot.
#' @param save_plot Logical; if TRUE, saves the generated plot.
#' @param also.pdf Save plot in both png and pdf formats.
#' @param seedNr Seed for random number generation to ensure reproducibility.
#' @param draw_plot Logical; if FALSE, suppresses plotting (useful if only the table is desired).
#' @param show_numbers Logical; if TRUE, adds count numbers on top of each bar in the plot.
#' @param min.pct Show % Labels above this threshold. Default = 0.05, or above 5 pct.
#' @param cex.pct Font size of pct labels.
#' @param min_frequency Minimum fraction to display individually in the plot; smaller fractions
#' are aggregated into an "Other" category.
#' @param custom_col_palette Specifies whether to use a standard or custom color palette.
#' @param color_scale Defines the color scale to use for the plot if a custom palette is selected.
#' @param show.total.cells Show total cells
#' @param cex.total Label size for total cells
#' @param xlab.angle Angle of x-axis labels.
#' @param show_plot Logical; if TRUE, shows the plot.
#' @param w Width of the plot in inches. Default: `NULL`
#' @param h Height of the plot in inches. Default: `6`
#' @param ... Additional parameters passed to internally called functions.
#'
#' @return Depending on the value of `return_table`, either returns a ggplot object or a list
#' containing values and percentages tables.
#' @examples
#' \dontrun{
#' if (interactive()) {
#'   scBarplot.CellFractions(obj = combined.obj, group.by = "integrated_snn_res.0.1", fill.by = "Phase", downsample = TRUE)
#'   scBarplot.CellFractions(obj = combined.obj, group.by = "integrated_snn_res.0.1", fill.by = "Phase", downsample = FALSE)
#' }
#' }
#' @seealso \code{\link[tools]{toTitleCase}}, \code{\link[ggplot2]{ggplot}}, \code{\link[dplyr]{group_by}}, \code{\link[dplyr]{summarise}}
#' @importFrom tools toTitleCase
#' @importFrom dplyr group_by summarise sample_n
#' @importFrom ggplot2 ggplot geom_bar geom_hline labs theme_classic theme element_text scale_fill_manual geom_text
#'
#' @export
scBarplot.CellFractions <- function(
    fill.by,
    group.by = GetNamedClusteringRuns()[1],
    obj = combined.obj,
    downsample = FALSE,
    min.nr.sampled.cells = 200,
    plotname = kppws("Cell proportions of", fill.by, "by", group.by),
    suffix = NULL,
    prefix = NULL,
    sub_title = suffix,
    hlines = c(.25, .5, .75),
    return_table = FALSE,
    save_table = TRUE,
    save_plot = TRUE,
    also.pdf = FALSE,
    seedNr = 1989,
    draw_plot = TRUE,
    show_numbers = FALSE,
    min.pct = 0.05,
    cex.pct = 2.5,
    min_frequency = 0, # 0.025,
    custom_col_palette = FALSE,
    color_scale = getDiscretePaletteObj(ident.used = fill.by, obj = obj, palette.used = "glasbey"),
    rnd_colors = FALSE,
    show.total.cells = TRUE,
    cex.total = 2,
    xlab.angle = 45,
    show_plot = TRUE,
    w = NULL,
    h = 6,
    ...) {
  # Input assertions
  stopifnot(
    inherits(obj, "Seurat"), # obj must be a Seurat object
    is.numeric(min_frequency) && length(min_frequency) == 1 && min_frequency >= 0 && min_frequency < 1, # min_frequency must be between 0 and 1
    group.by %in% colnames(obj@meta.data), # group.by must be a valid column in the meta.data slot of the Seurat object
    fill.by %in% colnames(obj@meta.data), # fill.by must be a valid column in the meta.data slot of the Seurat object
    "To many categories for X axis (group.by)" = nr.unique(obj@meta.data[, group.by]) < 100
  )

  META <- obj@meta.data

  if (is.null(w)) {
    categ_X <- nr.unique(META[, group.by])
    categ_Y <- nr.unique(META[, fill.by])
    w <- ceiling(max(7, categ_Y / 4, categ_X / 1.5))
  }

  set.seed(seedNr)
  pname.suffix <- capt.suffix <- NULL

  if (downsample) {
    tbl_X <- table(META[[fill.by]])
    n_smallest_group <- min(tbl_X)
    largest_grp <- max(tbl_X)

    message("The size of the smallest group is: ", n_smallest_group, " cells.")

    dsample.to.repl.thr <- (n_smallest_group < min.nr.sampled.cells) # if less than 200 cells are sampled, sample with replacement
    if (dsample.to.repl.thr) {
      message(paste(
        "If smallest category is <", min.nr.sampled.cells,
        "of total cells, than down- or up-sampling, with replacement to that minimum."
      ))
    }

    # Update plot name and caption to reflect downsampling
    plotname <- kpp(plotname, "downsampled")
    pname.suffix <- "(downsampled)"

    capt.suffix <- paste0(
      "\nDownsampled all groups in ", fill.by, " (Y) to ", n_smallest_group,
      " cells before splitting by X. \nIt is calculated as max(smallest group, 5% of total cells). Largest groups previosly was: ", largest_grp
    )
  }


  # Construct the caption based on downsampling and minimum frequency
  PFX <- if (show_numbers) "Numbers denote # cells." else percentage_formatter(min.pct, prefix = "Labeled above")
  caption_ <- paste("Top: Total cells per bar. |", PFX, capt.suffix)

  if (min_frequency > 0) caption_ <- paste(caption_, "\nCategories <", percentage_formatter(min_frequency), "are shown together as 'Other'")
  pname_ <- paste(plotname, pname.suffix)


  # Create a contingency table of the data
  contingency.table <- table(META[, group.by], META[, fill.by])
  print(contingency.table)


  if (show.total.cells) {
    # First, calculate the total counts per group
    totals <- META |>
      group_by(!!sym(group.by)) |>
      summarise(Total = n()) |>
      ungroup()

    # Merge totals back with the original data for labeling
    group_by_column <- group.by
    META <- META |>
      left_join(totals, by = setNames(nm = group_by_column, group_by_column))
  }


  if (draw_plot) {
    # calculate the proportions and add up small fractions
    prop_table <- META |>
      group_by(!!as.name(fill.by)) |>
      summarise(proportion = n() / nrow(META)) |>
      mutate("category" = ifelse(proportion < min_frequency, "Other", as.character(!!as.name(fill.by))))

    categories <- unique(prop_table$"category")
    n.categories <- length(categories)
    message(n.categories, " Y-categories present: ", kppc(sort(categories)))

    # join the proportions back to the original data
    META <- left_join(META, prop_table, by = fill.by)

    subtt <- kppws(group.by, "|", ncol(obj), "cells", sub_title)


    if (downsample) {
      # Downsample the data
      META <-
        META |>
        group_by(!!sym(fill.by)) |>
        sample_n(
          size = max(n_smallest_group, min.nr.sampled.cells),
          replace = dsample.to.repl.thr
        ) |>
        ungroup()

      contingency.table <- table(META[[group.by]], META[[fill.by]])
      contingency.table <- addmargins(contingency.table)
      print(contingency.table)
    }

    # Plot the data
    pl <- META |>
      group_by(!!sym(group.by)) |>
      ggplot(aes(fill = category, x = !!sym(group.by))) +
      geom_hline(yintercept = hlines, lwd = 1.5) +
      geom_bar(position = "fill") +
      theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
      labs(
        title = pname_, subtitle = subtt,
        x = group.by, y = "Fraction of Cells",
        fill = fill.by, caption = caption_
      ) +
      theme_classic() +
      theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
      scale_y_continuous(limits = c(0, 1), expand = expansion(mult = c(0, 0.1))) +
      theme(axis.text.x = element_text(angle = xlab.angle, hjust = 1))

    # Apply custom color palette if specified
    if (!isFALSE(custom_col_palette)) {
      stopifnot("Length(custom_col_palette) should be >= nr. categories displayed." = length(custom_col_palette) >= n.categories)

      all_categs_have_a_col <- all(categories %in% names(custom_col_palette))
      if (all_categs_have_a_col) {
        colz_manual <- custom_col_palette[categories]
      } else {
        colz_manual <- custom_col_palette[seq(categories)]
      } # end if all_categs_have_a_col
      pl <- pl + scale_fill_manual(values = colz_manual)
    } else if (rnd_colors) {
      colz_manual <- sample(rainbow(n.categories))
      pl <- pl + scale_fill_manual(values = colz_manual)
    } # end if custom_col_palette / rnd_colors



    if (show_numbers) {
      pl <- pl + geom_text(aes(label = ..count..),
        stat = "count", position = position_fill(vjust = 0.5)
      )
    } else {
      pl <- pl + geom_text(
        aes(label = ifelse((..count.. / tapply(..count.., ..x.., sum)[..x..]) >= min.pct,
          scales::percent(..count.. / tapply(..count.., ..x.., sum)[..x..], accuracy = 1),
          ""
        )),
        stat = "count", position = position_fill(vjust = 0.5),
        size = cex.pct
      )
    }

    if (show.total.cells) {
      pl <- pl + geom_text(
        data = totals, aes(x = !!sym(group.by), y = 1, label = Total),
        vjust = -0.5, size = cex.total, inherit.aes = FALSE
      )
    }

    if (save_plot) {
      # sfx <- shorten_clustering_names(group.by)
      sfx <- if (!is.null(suffix)) suffix else NULL
      if (min_frequency) sfx <- sppp(sfx, min_frequency)
      qqSave(
        ggobj = pl, title = FixPlotName(plotname), also.pdf = also.pdf, w = w, h = h,
        suffix = sppp(sfx, "fr.barplot")
        # , ...
      )
    } # save_plot
  } # draw_plot

  # Compile contingency table and its frequencies
  CT_freq_sc <- list(
    "values" = contingency.table,
    "percentages" = CodeAndRoll2::rowDivide(mat = contingency.table, vec = rowSums(contingency.table))
  )

  if (save_table) {
    ReadWriter::write.simple.xlsx(CT_freq_sc,
      filename = sppp(FixPlotName(plotname), suffix, "fr.barplot")
      # suffix = sppp(FixPlotName(plotname), "fr.barplot")
    )
  }

  # Return contingency table or plot based on return_table flag
  if (show_plot) print(pl)

  if (return_table) {
    return(CT_freq_sc)
  } else {
    invisible(pl)
  } # end if return_table
}





# _________________________________________________________________________________________________
#' @title Barplot of Fraction of Cells per Cluster
#'
#' @description Visualizes the fraction of cells within each cluster through a barplot.
#'
#' @param obj Seurat object for analysis. Default: `combined.obj`.
#' @param ident Cluster identity. Used to specify which clustering results to visualize.
#' Default: First entry from ordered clustering runs.
#' @param sort If TRUE, sorts clusters by size. Default: `FALSE`.
#' @param title Title for the plot. Default: "Cells per Identity Group".
#' @param sub Subtitle for the plot. Default: "identity".
#' @param label If TRUE, shows cell count or percentage based on the label vector. Default: `TRUE`.
#' @param palette Color palette for the barplot. Default: 'glasbey'.
#' @param return_table If TRUE, returns the data used for plotting instead of the plot itself. Default: `FALSE`.
#' @param min.cells Minimum cell count threshold for categories. Adjusted by data size.
#' @param suffix Optional suffix for file naming. Used in conjunction with `kpp`.
#' @param ylab_adj Adjustment factor for y-axis label positioning. Default: 1.1.
#' @param ... Additional parameters for internal function calls.
#'
#' @examples
#' \dontrun{
#' scBarplot.CellsPerCluster()
#' scBarplot.CellsPerCluster(sort = TRUE)
#' }
#' @export scBarplot.CellsPerCluster
#'
#' @importFrom ggExpress qbarplot

scBarplot.CellsPerCluster <- function(
    obj = combined.obj,
    ident = GetOrderedClusteringRuns(obj = obj)[1],
    sort = FALSE,
    plotname = "Cells per Identity Group",
    sub = ident,
    label = list(TRUE, "percent")[[1]],
    suffix = if (label == "percent") "percent" else NULL,
    palette = c("alphabet", "alphabet2", "glasbey", "polychrome", "stepped")[3],
    return_table = FALSE,
    ylab_adj = 1.1,
    min.cells = round(ncol(obj) / 100),
    ...) {
  #
  stopifnot(
    inherits(obj, "Seurat"), is.character(ident), is.logical(sort), is.character(plotname), is.character(sub),
    is.logical(label) | label == "percent", is.character(suffix) | is.null(suffix), is.character(palette), is.logical(return_table),
    is.numeric(ylab_adj), is.numeric(min.cells), ident %in% colnames(obj@meta.data)
  )

  cat(0)
  cell.per.cl <- obj[[ident]][, 1]
  cell.per.cluster <- (table(cell.per.cl))
  if (sort) cell.per.cluster <- sort(cell.per.cluster)
  lbl <- if (isFALSE(label)) {
    NULL
  } else if (label == "percent") {
    percentage_formatter(cell.per.cluster / sum(cell.per.cluster), digitz = 2)
  } else if (isTRUE(label)) {
    cell.per.cluster
  } else {
    label
  }

  min.PCT.cells <- min.cells / ncol(obj)
  message("min cell thr: ", min.cells, " corresponding to min: ", percentage_formatter(min.PCT.cells))

  n.clusters <- length(cell.per.cluster)
  nr.cells.per.cl <- table(obj[[ident]][, 1])

  SBT <- pc_TRUE(nr.cells.per.cl < min.cells,
    NumberAndPC = TRUE,
    suffix = paste("of identites are below:", min.cells, "cells, or", percentage_formatter(min.PCT.cells), "of all cells.")
  )

  pl <- ggExpress::qbarplot(cell.per.cluster,
    plotname = plotname,
    subtitle = paste0(sub, "\n", SBT),
    suffix = kpp(ident, ncol(obj), "c", suffix),
    col = 1:n.clusters,
    caption = .parseBasicObjStats(obj = obj),
    xlab.angle = 45,
    ylim = c(0, ylab_adj * max(cell.per.cluster)),
    label = lbl,
    ylab = "Cells",
    palette_use = DiscretePaletteSafe(n = n.clusters, palette.used = palette),
    ...
  )

  if (return_table) {
    print(pl)
    return(cell.per.cluster)
  } else {
    return(pl)
  }
}

# _________________________________________________________________________________________________

# _________________________________________________________________________________________________
#' @title Barplot the Fraction of Cells Above Threshold per Cluster
#'
#' @description Generates a bar plot depicting the percentage of cells within each cluster that
#' exceed a specified threshold, based on a selected metadata column.
#'
#' @param value.col Column in metadata with values to assess against `thrX`. Default: 'percent.ribo'.
#' @param thrX Threshold for calculating the fraction of cells. Default: 0.3.
#' @param obj Seurat object with single-cell data. Default: `combined.obj`.
#' @param id.col Cluster identity column in metadata. Default: 'cl.names.top.gene.res.0.3'.
#' @param return.df Whether to return the underlying data frame instead of the plot. Default: `FALSE`.
#' @param label Whether to add labels to the bar plot. Default: NULL.
#' @param subtitle Optional subtitle for the plot.
#' @param ext File extension for saving the plot. Default: '.png'.
#' @param suffix Suffix for the output file name.
#' @param above Whether to calculate the fraction of cells above or below the threshold. Default: `TRUE`.
#' @param ... Additional parameters for plotting functions.
#'
#' @examples
#' \dontrun{
#' scBarplot.FractionAboveThr(id.col = "cl.names.top.gene.res.0.3", value.col = "percent.ribo", thrX = 0.2)
#' }
#'
#' @seealso \code{\link[dplyr]{select}}, \code{\link[dplyr]{group_by}}
#'
#' @importFrom dplyr select group_by summarize
#'
#' @export
scBarplot.FractionAboveThr <- function(
    value.col = "percent.ribo",
    thrX = 0.1,
    obj = combined.obj,
    id.col = GetClusteringRuns(obj)[1],
    subtitle = id.col,
    ext = ".png",
    return.df = FALSE,
    label = NULL,
    suffix = NULL,
    above = TRUE,
    ylim = c(0, 100), # set to null for relative y axis
    ...) {
  stopifnot(value.col %in% colnames(obj@meta.data))

  meta <- obj@meta.data
  metacol <- meta |>
    dplyr::select(c(id.col, value.col))

  (df_cells_above <- metacol |>
    dplyr::group_by(!!sym(id.col)) |>
    summarize(
      n_cells = n(),
      n_cells_above = sum(!!sym(value.col) > thrX),
      fr_n_cells_above = n_cells_above / n_cells,
      fr_n_cells_below = 1 - fr_n_cells_above
    )
  )


  pass <-
    if (above) {
      metacol[, value.col] > thrX
    } else {
      metacol[, value.col] < thrX
    }
  total_average <- iround(100 * mean(pass))

  df_2vec <-
    if (above) {
      df_cells_above[, c(1, 4)]
    } else {
      df_cells_above[, c(1, 5)]
    }


  (v.fr_n_cells_above <- 100 * deframe(df_2vec))

  tag <- if (above) "above" else "below"
  if (is.null(label)) label <- percentage_formatter(deframe(df_2vec), digitz = 2)

  pname <- paste("Pc. cells", tag, value.col, "of", thrX)
  ggobj <- ggExpress::qbarplot(v.fr_n_cells_above,
    label = label,
    plotname = pname,
    filename = FixPlotName(kpp(pname, id.col, ext)),
    suffix = suffix,
    subtitle = subtitle,
    caption = paste(
      "Overall average (black line):", iround(total_average), "% |",
      substitute_deparse(obj),
    ),
    xlab.angle = 45,
    xlab = "Clusters",
    ylab = paste("% Cells", tag, "thr. (", value.col, ")"),
    ylim = ylim,
    hline = total_average,
    ...
  )
  print(ggobj)
  if (return.df) {
    return(df_cells_above)
  } else {
    ggobj
  }
}


# _________________________________________________________________________________________________
#' @title Fraction of Cells Below Threshold per Cluster
#'
#' @description Generates a bar plot to visualize the percentage of cells within each cluster that
#' fall below a specified threshold, according to a metadata column value.
#' Inherits all parameters from `scBarplot.FractionAboveThr` with the exception that `above` is set to FALSE.
#'
#' @param thrX Threshold value for assessing cell counts. Default: 0.01.
#' @param value.col Metadata column with values for threshold comparison. Default: 'percent.ribo'.
#' @param id.col Cluster identifier in metadata. Default: 'cl.names.top.gene.res.0.3'.
#' @param obj Seurat object with cell data. Default: `combined.obj`.
#' @param return.df If TRUE, returns the data frame instead of the plot. Default: `FALSE`.
#'
#' @examples
#' \dontrun{
#' scBarplot.FractionBelowThr(id.col = "cl.names.top.gene.res.0.3", value.col = "percent.ribo", thrX = 0.01)
#' }
#'
#' @seealso `scBarplot.FractionAboveThr`
#' @seealso \code{\link[dplyr]{select}}, \code{\link[dplyr]{group_by}}
#'
#' @importFrom dplyr select group_by summarize
#'
#' @export
scBarplot.FractionBelowThr <- function(
    thrX = 0.2,
    value.col = "percent.ribo",
    id.col = "cl.names.top.gene.res.0.3",
    obj = combined.obj,
    return.df = FALSE,
    subtitle = id.col,
    suffix = NULL,
    ...) {
  #
  scBarplot.FractionAboveThr(
    thrX = thrX,
    value.col = value.col,
    id.col = id.col,
    obj = obj,
    return.df = return.df,
    subtitle = subtitle,
    suffix = suffix,
    above = FALSE,  # Set `above` to FALSE to get fraction below threshold
    ...
  )
}

# _________________________________________________________________________________________________
# Pie Charts / Compositional analysis ______________________________ ----
# _________________________________________________________________________________________________
#' @title scPieClusterDistribution
#'
#' @description This function generates a pie chart of cluster distributions for a given clustering
#' identity in a single-cell RNA-seq object.
#'
#' @param obj A single-cell RNA-seq object. Default: `combined.obj`.
#' @param ident A character string specifying the clustering identity to be used. Default: the first
#' clustering run in the object.
#' @param ... Additional arguments passed to other methods.
#'
#' @return A pie chart displaying the cluster distribution.
#' @importFrom ggplot2 ggplot aes geom_bar coord_polar theme_minimal
#' @importFrom scales percent
#'
#' @examples
#' \dontrun{
#' scPieClusterDistribution(obj = combined.obj, ident = "cluster_identity")
#' }
scPieClusterDistribution <- function(obj = combined.obj, ident = GetClusteringRuns(obj)[1],
                                     ...) {
  # Input assertions
  stopifnot(
    is(obj, "Seurat"), is.character(ident), length(ident) == 1,
    ident %in% colnames(obj@meta.data)
  )

  # Compute cluster sizes
  cluster_sizes <- table(obj[[ident]])
  print(cluster_sizes)

  # Create pie chart
  qpie(cluster_sizes, caption = .parseBasicObjStats(obj), subtitle = ident)
}




# _________________________________________________________________________________________________
# List of Seurat Objects ______________________________ ----
# _________________________________________________________________________________________________






# _________________________________________________________________________________________________
#' @title Barplot of Cells Per Seurat Object
#'
#' @description Visualizes the number of cells in each Seurat object within a list, showing the
#' distribution of cell counts across different datasets or experimental conditions.
#'
#' @param ls.obj List of Seurat objects to analyze. Default: `ls.Seurat`.
#' @param plotname Title for the plot. Default: 'Nr.Cells.After.Filtering'.
#' @param xlab.angle Angle for x-axis labels, enhancing readability. Default: 45.
#' @param names Optionally provide custom names for x-axis labels. If FALSE, uses object names
#' from `ls.obj`. Default: `FALSE`.
#' @param ... Extra parameters passed to `qbarplot`.
#'
#' @examples
#' \dontrun{
#' ls.obj <- list(obj, obj)
#' scBarplot.CellsPerObject(ls.obj)
#' }
#'
#' @export scBarplot.CellsPerObject

scBarplot.CellsPerObject <- function(
    ls.obj = ls.Seurat,
    plotname = "Nr.Cells.After.Filtering",
    xlab.angle = 45,
    names = FALSE, ...) {
  stopifnot(
    "Should be run on a list of Seu. objects" = length(ls.obj) > 1,
    "Should be run on a list of Seu. objects" = all(sapply(ls.obj, is, "Seurat"))
  )

  cellCounts <- sapply(ls.obj, ncol)
  names(cellCounts) <- if (length(names) == length(ls.obj)) names else names(ls.obj)
  ggExpress::qbarplot(cellCounts,
    plotname = plotname,
    subtitle = paste(sum(cellCounts), "cells in total"),
    label = cellCounts,
    xlab.angle = xlab.angle,
    ylab = "Cells",
    ...
  )
}



# _________________________________________________________________________________________________
#' @title Stacked Barplot of Metadata Categories for List of Seurat Objects
#'
#' @description Creates and saves a stacked barplot for a specified metadata category
#' from a list of Seurat objects.
#'
#' @param ls.obj List of Seurat objects.
#' @param meta.col The metadata column name to be used for the barplot.
#' @param ... Additional arguments passed to `ggExpress::qbarplot.df`.

#'
#' @return A ggplot object representing the stacked barplot.
#'
#' @examples
#' \dontrun{
#' ls.obj <- list(obj, obj)
#' scBarplotStackedMetaCateg_List(ls.obj, meta.col = orig.ident)
#' }
#'
#' @importFrom ggExpress qbarplot.df
#' @importFrom dplyr group_by summarise select
#'
#' @export
scBarplotStackedMetaCateg_List <- function(
    ls.obj,
    meta.col, # e.g. orig.ident
    ...) {
  stopifnot(
    "Should be run on a list of Seu. objects" = length(ls.obj) > 1,
    "Should be run on a list of Seu. objects" = all(sapply(ls.obj, is, "Seurat")),
    length(meta.col) == 1,
    "meta.col not found in 1st object" = meta.col %in% colnames(ls.obj[[1]]@meta.data)
  )

  # Creating a data frame for the stacked bar plot
  df <- do.call(rbind, lapply(seq_along(ls.obj), function(x) {
    data.frame(
      Sample = names(ls.obj)[x],
      Category = ls.obj[[x]]@meta.data[[meta.col]],
      stringsAsFactors = FALSE
    )
  }))

  # Summarizing to get counts of cells per category for each sample
  df <- df |>
    dplyr::group_by(Sample, Category) |>
    dplyr::summarise(Cells = n(), .groups = "drop") |>
    dplyr::select(Sample, Cells, Category)

  TTL <- paste(meta.col, "per object")
  p <- ggExpress::qbarplot.df(df,
    plotname = TTL,
    scale = TRUE, hide.legend = FALSE,
    ...
  )
  print(p)
  return(df)
}


# _________________________________________________________________________________________________
# Colors ______________________________ ----
# _________________________________________________________________________________________________
#' @title Reproduce the ggplot2 default color palette
#'
#' @description Generates a vector of colors that emulates the default color palette used by ggplot2.
#' This function is useful for creating color sets for custom plotting functions or for applications
#' outside of ggplot2 where a similar aesthetic is desired.
#'
#' @param n Integer specifying the number of distinct colors to generate.
#'
#' @return A character vector of color values in hexadecimal format, replicating the default hue
#' color scale of ggplot2.
#'
#' @examples
#' \dontrun{
#' # Generate a palette of 5 colors
#' print(gg_color_hue(5))
#' }
#'
#' @export
gg_color_hue <- function(n) {
  hues <- seq(15, 375, length = n + 1)
  hcl(h = hues, l = 65, c = 100)[1:n]
}



# _________________________________________________________________________________________________
#' @title Safely generate a discrete color palette (NA).
#'
#' @description Safe wrapper around Seurat's DiscretePalette(), which returns NA's if too many
#' categories are requested
#' @param ident.used The identity column used for determining the number of clusters, Default: GetClusteringRuns()[1]
#' @param obj Seurat object, Default: combined.obj
#' @param palette.used The name of the palette to use, Default: c("alphabet", "alphabet2",
#' "glasbey", "polychrome", "stepped")[1]
#' @param show.colors Whether to display the colors in the palette, Default: `FALSE`.
#' @examples
#' \dontrun{
#' if (interactive()) {
#'   getDiscretePalette()
#' }
#' }
#'
#' @export
getDiscretePalette <- function() .Deprecated("DiscretePaletteSafe and DiscretePaletteObj")


# _________________________________________________________________________________________________
#' @title Generate a Discrete Color Palette for Seurat Clusters
#'
#' @description Generates a discrete color palette for visualizing clusters in a Seurat object,
#' using a specified identity column to determine the number of unique clusters.
#'
#' @param ident.used Identity column in the Seurat object to base the color palette on.
#' @param obj Seurat object containing clustering information.
#' @param palette.used The palette name to use for color generation. Options include "alphabet",
#' "alphabet2", "glasbey", "polychrome", and "stepped". Default: "alphabet2".
#' @param show.colors If TRUE, displays the generated colors. Default: `FALSE`.
#' @param seed Seed for random color generation, ensuring reproducibility. Default: 1989.
#'
#' @return A character vector of color values corresponding to the number of clusters.
#'
#' @examples
#' \dontrun{
#' if (interactive()) {
#'   ident.used <- "resolution_1"
#'   obj <- YourSeuratObject
#'   colors <- getDiscretePaletteObj(ident.used = ident.used, obj = obj)
#'   print(colors)
#' }
#' }
#'
#' @export
getDiscretePaletteObj <- function(ident.used,
                                  obj,
                                  palette.used = c("alphabet", "alphabet2", "glasbey", "polychrome", "stepped")[2],
                                  show.colors = FALSE,
                                  seed = 1989) {
  stopifnot(
    is.character(ident.used), is(obj, "Seurat"),
    is.character(palette.used), is.logical(show.colors), is.numeric(seed)
  )

  categs <- unique(unlist(obj[[ident.used]]))
  n.clusters <- length(categs)

  colorz <- DiscretePaletteSafe(
    n = n.clusters,
    palette.used = palette.used,
    show.colors = show.colors,
    seed = seed
  )
  names(colorz) <- categs

  return(colorz)
}


# _________________________________________________________________________________________________
#' @title Safely generate a Discrete color palette.
#'
#' @description Generates a discrete color palette, ensuring no NA values are included, suitable
#' for visualizations where a specific number of distinct, reproducible colors is needed.
#'
#' @param n Number of colors to generate.
#' @param palette.used Palette name to use for generating colors. Options include "alphabet",
#' "alphabet2", "glasbey", "polychrome", "stepped". Default: "alphabet2".
#' @param show.colors If TRUE, displays the generated color palette. Default: `FALSE`.
#' @param seed Seed value for reproducibility, especially when random color generation is involved.
#' Default: 1989.
#'
#' @return Character vector of HEX color values.
#'
#' @examples
#' \dontrun{
#' colors <- DiscretePaletteSafe(n = 10)
#' print(colors)
#' }
#'
#' @importFrom gplots rich.colors
#' @importFrom Seurat DiscretePalette
#'
#' @export
DiscretePaletteSafe <- function(n,
                                palette.used = c("alphabet", "alphabet2", "glasbey", "polychrome", "stepped")[2],
                                show.colors = FALSE,
                                seed = 1989) {
  stopifnot(
    is.numeric(n), n > 0, is.character(palette.used),
    is.logical(show.colors), is.numeric(seed)
  )

  colorz <- Seurat::DiscretePalette(n = n, palette = palette.used)

  if (anyNA(colorz)) {
    colorsOK <- colorz[!is.na(colorz)]
    n.colz <- length(colorsOK)

    msg <- paste(
      "More categories then present in the palette", n, "vs.",
      n.colz, "in", palette.used, "-> recycling."
    )
    warning(msg, immediate. = TRUE)

    set.seed(seed)
    if (n > 10 * n.colz) {
      colorz <- sample(gplots::rich.colors(n))
    } else {
      colorz <- sample(x = colorsOK, size = n, replace = TRUE)
    }

    stopifnot(!anyNA(colorz))
  }

  if (show.colors) MarkdownHelpers::color_check(colorz)
  return(colorz)
}


# _________________________________________________________________________________________________
#' @title Regenerate Cluster Colors from a Seurat Object
#'
#' @description Regenerate and optionally displays the color scheme associated with the clusters
#' in a Seurat object as defined by a specified identity column.
#'
#' @param obj Seurat object containing clustering information.
#' @param use_new_palettes Logical indicating whether to use custom palettes defined in
#' `DiscretePalette` function. Default: `TRUE`.
#' @param palette Name of the color palette to use if `use_new_palettes` is TRUE.
#' Options: "alphabet", "alphabet2", "glasbey", "polychrome", "stepped". Default: "glasbey".
#' @param ident Clustering identity to use for coloring. Retrieved from the first entry
#' of `GetClusteringRuns()` by default.
#' @param show If TRUE, displays a plot showing the color mapping for each cluster. Default: `TRUE`.
#'
#' @examples
#' \dontrun{
#' if (interactive()) {
#'   getClusterColors(obj = combined.obj, ident = GetClusteringRuns(combined.obj)[1])
#' }
#' }
#' @export
#'
#' @importFrom scales hue_pal
getClusterColors <- function(
    obj = combined.obj,
    use_new_palettes = TRUE,
    palette = c("alphabet", "alphabet2", "glasbey", "polychrome", "stepped")[3],
    ident = GetClusteringRuns()[1],
    show = TRUE) {
  (identities <- levels(as.factor(obj[[ident]][, 1])))
  n.clusters <- length(unique(obj[[ident]][, 1]))
  color_palette <- if (use_new_palettes) {
    DiscretePalette(n = n.clusters, palette = palette)
  } else {
    scales::hue_pal()(length(identities))
  }

  names(color_palette) <- (identities)
  identvec <- obj[[ident]][, 1]
  colz <- color_palette[identvec]
  names(colz) <- identvec
  if (show) MarkdownHelpers::color_check(unique(colz))
  colz
}



# _________________________________________________________________________________________________
#' @title Regenerate Color Scheme for Clusters in Seurat Object as a vector
#'
#' @description Extracts and optionally displays the color scheme assigned to cluster identities
#' within a Seurat object, facilitating consistent color usage across visualizations. You can
#' check results in a barplot with `MarkdownHelpers::color_check()`.
#'
#' @param ident Specific clustering identity to use for color extraction.
#' If NULL, the active identity in `obj` is used. Default: NULL.
#' @param obj Seurat object from which to extract cluster colors.
#' Default: `combined.obj`.
#' @param plot.colors If TRUE, visually displays the color scheme.
#' Default: `FALSE`.
#' @param simple If TRUE, returns only the unique set of colors used.
#' If FALSE, returns a named vector mapping cluster identities to colors.
#' Default: `FALSE`.
#'
#' @examples
#' \dontrun{
#' # Display colors for the active identity
#' SeuratColorVector()
#' # Retrieve and plot colors for a specified clustering identity
#' SeuratColorVector(ident = "RNA_snn_res.1", plot.colors = TRUE)
#' }
#'
#' @seealso \code{\link[scales]{hue_pal}}
#'
#' @export
#' @importFrom scales hue_pal
SeuratColorVector <- function(ident = NULL, obj = combined.obj, plot.colors = FALSE, simple = FALSE) {
  if (!is.null(ident)) {
    print(ident)
    ident.vec <- obj[[ident]][, 1]
  } else {
    print(obj@active.ident)
    ident.vec <- obj@active.ident
  }
  ident.vec <- as.factor(ident.vec)
  print(table(ident.vec))
  colorlevels <- scales::hue_pal()(length(levels(ident.vec)))
  if (plot.colors) MarkdownHelpers::color_check(colorlevels)
  if (simple) {
    colorlevels
  } else {
    CodeAndRoll2::translate(
      vec = as.character(ident.vec),
      old = levels(ident.vec),
      new = colorlevels
    )
  }
}


# _________________________________________________________________________________________________
# Metadata Heatmaps ______________________________ ----
# _________________________________________________________________________________________________


#' @title Plot and Save Heatmaps from Metadata Calculation Results
#'
#' @description Generates and saves heatmap visualizations for each metric in the results obtained
#' from metadata calculations, such as  `calculateAverageMetaData() - mean or median values of
#' specified features across different categories.
#'
#' @param results A list containing data frames with calculated metrics for each specified
#'   metadata feature, grouped by categories. Typically, this is the output from a
#'   function like `calculateAverageMetaData()`.
#' @param path The directory path where the heatmap images will be saved.
#'   Defaults to the current working directory (`getwd()`).
#' @param file.prefix A prefix for the filenames of the saved heatmap images.
#'   Defaults to "heatmap_".
#' @param scale Character indicating if the values should be scaled in the row direction,
#'   column direction, both ('row', 'column', 'none'). Defaults to "column".
#' @param cluster_rows Logical indicating whether to cluster rows. Defaults to FALSE.
#' @param show_rownames Logical indicating whether to show row names. Defaults to TRUE.
#' @param show_colnames Logical indicating whether to show column names. Defaults to TRUE.
#' @param ... Additional arguments passed to `pheatmap::pheatmap`.
#'
#' @details This function loops through each metric in the `results`, creates a heatmap
#' for it using `pheatmap`, and saves the heatmap as a PNG file in the specified path.
#' The file names will start with the provided `file.prefix`, followed by the metric name.
#'
#' @examples
#' # Assuming `results` is the output from `calculateAverageMetaData`:
#' results <- calculateAverageMetaData(obj = combined.obj)
#' plotAndSaveHeatmaps(results, path = "path/to/save/heatmaps", file.prefix = "myData_")
#'
#' @return Invisible. The function primarily generates and saves files without returning data.
#'
#' @export
plotAndSaveHeatmaps <- function(results, path = getwd(),
                                file.prefix = "heatmap_",
                                scale = "column",
                                cluster_rows = FALSE,
                                display_numbers = TRUE,
                                show_rownames = TRUE,
                                show_colnames = TRUE,
                                rowname_column = 1,
                                ...) {
  stopifnot(is.list(results), is.character(file.prefix), is.character(path))

  for (mt in names(results)) {
    res <- results[[mt]]
    stopifnot(
      !anyNA(res[[rowname_column]]),
      !anyNaN(res[[rowname_column]])
    )

    # Generate heatmap plot
    x <- ReadWriter::column.2.row.names(results[[mt]], rowname_column = rowname_column)
    pobj <- pheatmap::pheatmap(
      mat = x,
      main = paste("Heatmap of", mt, "values"),
      scale = "column",
      cluster_rows = cluster_rows,
      display_numbers = display_numbers,
      show_rownames = show_rownames,
      show_colnames = show_colnames
    )

    # Construct file name
    file_name <- paste0(file.prefix, mt, ".png")
    file_path <- file.path(path, file_name)

    # Save plot
    MarkdownReports::wplot_save_pheatmap(
      x = pobj, data = x, plotname = file_name,
      png = TRUE, pdf = FALSE, ...
    )
    cat("Saved heatmap for", mt, "to", file_path, "\n")
  } # for
}


# _________________________________________________________________________________________________
# plotting generic, misc ______________________________ ----
# _________________________________________________________________________________________________


# _________________________________________________________________________________________________
#' @title Scatter Plot of Two Features in Seurat Object
#'
#' @description Generates a scatter plot comparing two features (genes or metrics) from a Seurat
#' object and optionally saves it. The function wraps around Seurat's `FeatureScatter` for
#' enhanced usability, including optional logarithmic transformations and saving capabilities.
#'
#' @param feature1 The first feature for the scatter plot's x-axis.
#' @param feature2 The second feature for the scatter plot's y-axis.
#' @param obj Seurat object containing the data for features.
#' @param ext File extension for saving the plot, if enabled.
#' @param plot Flag to display the plot within the R session.
#' @param logX Apply logarithmic transformation to x-axis values.
#' @param logY Apply logarithmic transformation to y-axis values.
#' @param ... Additional parameters passed to Seurat's `FeatureScatter`.
#'
#' @return A `ggplot` object of the feature scatter plot if `plot` is TRUE.
#'
#' @examples
#' \dontrun{
#' # Generate and display a scatter plot for features TOP2A and ID2
#' qFeatureScatter(feature1 = "TOP2A", feature2 = "ID2", obj = yourSeuratObject)
#' }
#'
#' @seealso \code{\link[Seurat]{FeatureScatter}}, \code{\link[ggplot2]{ggplot}}
#'
#' @export
#' @importFrom ggExpress qqSave
#' @importFrom Seurat FeatureScatter
#' @importFrom ggplot2 ggtitle theme_linedraw scale_x_log10 scale_y_log10
qFeatureScatter <- function(
    feature1 = "TOP2A", feature2 = "ID2", obj = combined.obj,
    ext = "png", plot = TRUE,
    logX = FALSE, logY = FALSE,
    ...) {
  plotname <- kpp(feature1, "VS", feature2)
  p <- FeatureScatter(object = obj, feature1 = feature1, feature2 = feature2, ...) +
    ggtitle(paste("Correlation", plotname)) +
    theme_linedraw()

  if (logX) p <- p + scale_x_log10()
  if (logY) p <- p + scale_y_log10()

  # fname <- kpp("FeatureScatter", plotname)
  ggExpress::qqSave(ggobj = p, title = plotname, ext = ext, w = 8, h = 5)
  if (plot) p
}


# _________________________________________________________________________________________________
#' @title Create a Violin Plot for a Seurat Object Feature and save the file.
#'
#' @description Generates a violin plot for a specified feature in a Seurat object,
#' allowing for the data to be split by a specified grouping variable.
#' The function supports customization options such as logarithmic scaling, custom titles, and more.
#'
#' @param obj A Seurat object to be plotted.
#' @param feature A character string specifying the name of the feature to plot.
#' @param ident A character vector specifying the identities to be used in the plot.
#' @param split.by A character string specifying the grouping variable for splitting the plot.
#' @param colors A character vector specifying the colors to use for the plot.
#' @param clip.outliers A logical indicating whether to clip outliers.
#' @param replace.na A logical indicating whether NA values should be replaced.
#' @param pt.size The size of the individual datapoints in the plot. Default: 0 for simple violin plot.
#' @param sub Subtitle of the plot. Default: feature by ident.
#' @param suffix An optional string to append to the title of the plot.
#' @param suffix.2.title A logical indicating whether to append the suffix to the plot title.
#' @param logY A logical indicating whether to use a logarithmic scale for the y-axis.
#' @param hline A numeric or logical value; if numeric, the value where a horizontal line should be drawn.
#' @param caption A character string or logical for the plot caption. If FALSE, no caption is displayed.
#' @param ylab Y-axis label. Default: "Expression".
#' @param ylimit A numeric vector specifying the limits of the y-axis.
#' @param legend Show legend; Default: opposite of `label`.
#' @param legend.pos Position of legend; Default: 'NULL'.
#' @param legend.title Title of legend; Default: 'split.by'.
#' @param show_plot A logical indicating whether to display the plot.
#' @param grid A logical indicating whether to display grid lines.
#' @param w Width of the plot.
#' @param h Height of the plot.
#' @param ... Additional arguments passed to `VlnPlot`.
#'
#' @return A ggplot object representing the violin plot.
#'
#' @examples
#' # Assuming `seurat_obj` is a valid Seurat object
#' qSeuViolin(obj = seurat_obj, feature = "nFeature_RNA")
#'
#' @export
qSeuViolin <- function(
    obj,
    feature = "nFeature_RNA",
    ident = GetNamedClusteringRuns(obj = obj, v = FALSE)[1],
    assay = "RNA",
    slot = "data",
    split.by = NULL,
    colors = NULL,
    clip.outliers = TRUE,
    replace.na = FALSE,
    pt.size = 0,
    sub = NULL,
    suffix = NULL,
    suffix.2.title = FALSE,
    caption = try(.parseKeyParams(obj), silent = TRUE),
    logY = TRUE,
    hline = FALSE,
    ylab = "Expression",
    ylimit = NULL,
    legend = TRUE,
    legend.pos = NULL, # c("top", "bottom", "left", "right", "none")[2],
    legend.title = NULL,
    show_plot = TRUE,
    grid = TRUE,
    w = NULL, h = 7,
    ...) {
  message(" > Running qSeuViolin...")
  #
  stopifnot(
    "Seurat" %in% class(obj), # object must be a Seurat object
    is.logical(logY), # logY must be logical (TRUE or FALSE)
    is.logical(hline) || is.numeric(hline), # hline must be logical or numeric
    is.logical(caption) || is.character(caption), # caption must be logical or character
    is.logical(suffix.2.title), # suffix.2.title must be logical
    is.character(split.by) | is.null(split.by), # split.by must be a character or NULL
    split.by %in% colnames(obj@meta.data),
    is.character(ident),
    ident %in% colnames(obj@meta.data),
    is.character(feature),
    feature %in% colnames(obj@meta.data) || feature %in% rownames(obj)
  )

  is_meta_feature <- feature %in% colnames(obj@meta.data) # check if feature is in meta.data, otherwise it is a  gene name

  value_vector <-
    if(is_meta_feature){
      obj@meta.data[[feature]]
    } else {
      # Extract normalized expression values ("data" slot) from the RNA assay
      GetAssayData(obj, assay = assay, slot = slot)[feature, ]
    }


  if (exists("idents")) warning("Use arg. ident instead of idents!\n", immediate. = TRUE)
  if (exists("features")) warning("Use arg. feature instead of features!\n", immediate. = TRUE)

  split_col <- unlist(obj[[ident]])
  if (is.null(w)) w <- ceiling(length(unique(split_col)) / 6) + 6
  message("Plot width: ", w)


  ttl <- if (suffix.2.title) {
    paste(feature, "|", suffix)
  } else {
    as.character(feature)
  }
  subt <- paste(feature, "- by -", ident)
  if (!is.null(sub)) subt <- paste0(subt, "\n", sub)

  if (replace.na) {
    warning("NA's are not, but zeros are displayed on the plot. Avoid replace.na when possible", immediate. = TRUE)
    value_vector <- na.replace(x = value_vector, replace = 0)
  }


  if (clip.outliers) {
    warning("Outliers are clipped at percentiles 0.5% and 99.5%", immediate. = TRUE)
    obj@meta.data[[feature]] <- CodeAndRoll2::clip.outliers.at.percentile(
      x = value_vector, percentiles = c(.005, .995)
    )
  }

  if (!is.null(colors)) {
    stopifnot(colors %in% colnames(obj@meta.data))
    col_long <- as.factor(unlist(obj[[colors]]))
    colors <- as.factor.numeric(sapply(split(col_long, split_col), unique))
    stopifnot("colors cannot be uniquely split by ident. Set colors = NULL!" = length(colors) == length(unique(split_col)))
  }
  # browser()

  p.obj <- Seurat::VlnPlot(
    object = obj,
    features = feature, group.by = ident,
    cols = colors, split.by = split.by,
    pt.size = pt.size, ...
  ) +
    theme(axis.title.x = element_blank()) +
    labs(y = ylab) +
    ggtitle(label = ttl, subtitle = subt)

  if (!legend) p.obj <- p.obj + NoLegend()
  if (!is.null(legend.title)) p.obj <- p.obj + guides(fill = guide_legend(legend.title)) else NULL
  if (grid) p.obj <- p.obj + ggpubr::grids(axis = "y")

  # Add additional customization, if needed..
  if (!is.null(ylimit)) p.obj <- p.obj + ylim(ylimit[1], ylimit[2])
  if (logY) p.obj <- p.obj + ggplot2::scale_y_log10()
  if (hline[1]) p.obj <- p.obj + ggplot2::geom_hline(yintercept = hline)
  if (!isFALSE(caption)) p.obj <- p.obj + ggplot2::labs(caption = caption)
  if (!is.null(legend.pos)) p.obj <- p.obj + theme(legend.position = legend.pos)

  # Save the plot.
  TTL <- ppp(as.character(feature), "by", ident, suffix)
  qqSave(p.obj, title = TTL, suffix = ppp(flag.nameiftrue(logY), "violin"), w = w, h = h, limitsize = FALSE)
  if (show_plot) p.obj
}




# _________________________________________________________________________________________________
# Plotting 2D UMAPs, etc. ______________________________ ----
# _________________________________________________________________________________________________

# _________________________________________________________________________________________________
#' @title Quick UMAP Visualization of Gene Expression and automatically save the plot
#'
#' @description Generates a UMAP visualization for a specific feature from a Seurat object, and
#' automatically saves it. Offers options for custom titles, subtitles, saving, and more. Assumes
#' default options for custom titles, subtitles, saving, and more.
#'
#' @param feature Feature to visualize on the UMAP; Default: 'TOP2A'.
#' @param obj Seurat object containing single-cell RNA-seq data; Default: `combined.obj`.
#' @param title Title of the plot; Default: `feature`.
#' @param sub Subtitle of the plot; Default: NULL.
#' @param reduction Dimension reduction technique to be used ('umap', 'tsne', or 'pca'); Default: 'umap'.
#' @param splitby Column in the metadata to split the cells by; Default: NULL.
#' @param prefix Prefix added before the filename; Default: NULL.
#' @param suffix Suffix added to the end of the filename; Default: `sub`.
#' @param save.plot If TRUE, the plot is saved into a file; Default: `TRUE`.
#' @param PNG If TRUE, the file is saved as a .png; Default: `TRUE`.
#' @param h Height of the plot in inches; Default: 7.
#' @param w Width of the plot in inches; Default: NULL.
#' @param nr.cols Number of columns to combine multiple feature plots, ignored if `split.by` is not NULL; Default: NULL.
#' @param assay Which assay to use ('RNA' or 'integrated'); Default: 'RNA'.
#' @param axes If TRUE, axes are shown on the plot; Default: `FALSE`.
#' @param aspect.ratio Ratio of height to width. If TRUE, the ratio is fixed at 0.6; Default: `FALSE`.
#' @param HGNC.lookup If TRUE, HGNC gene symbol lookup is performed; Default: `TRUE`.
#' @param make.uppercase If TRUE, feature names are converted to uppercase; Default: `FALSE`.
#' @param qlow Lower quantile for the color scale; Default: 'q10'.
#' @param qhigh Upper quantile for the color scale; Default: 'q90'.
#' @param check_for_2D If TRUE, checks if UMAP is 2 dimensional; Default: `TRUE`.
#' @param caption Adds a caption to the ggplot object; Default: dynamically generated from `obj`.
#' @param ... Additional parameters to pass to the internally called functions.
#'
#' @examples
#' \dontrun{
#' if (interactive()) {
#'   qUMAP(feature = "nFeature_RNA", obj = yourSeuratObject)
#'   qUMAP(feature = "TOP2A", obj = yourSeuratObject, PNG = FALSE, save.plot = TRUE)
#' }
#' }
#'
#' @export
#' @importFrom Seurat FeaturePlot NoLegend NoAxes
#' @importFrom ggplot2 ggtitle coord_fixed labs
#'
qUMAP <- function(
    feature = "TOP2A", obj = combined.obj,
    title = feature, sub = NULL,
    reduction = "umap", splitby = NULL,
    prefix = NULL,
    suffix = make.names(sub),
    save.plot = MarkdownHelpers::TRUE.unless("b.save.wplots", v = FALSE),
    PNG = TRUE,
    h = 7, w = NULL, nr.cols = NULL,
    assay = c("RNA", "integrated")[1],
    axes = FALSE,
    aspect.ratio = c(FALSE, 0.6)[1],
    HGNC.lookup = TRUE,
    make.uppercase = FALSE,
    check_for_2D = TRUE,
    qlow = "q10", qhigh = "q90",
    caption = .parseBasicObjStats(obj, simple = TRUE),
    ...) {
  #
  message("Feature: ", feature, " | Assay: ", assay)
  META <- obj@meta.data
  stopifnot(is(obj) == "Seurat", is.character(feature),
            "Feature not found in genes / meta.data."  = feature %in% c(Features(obj, assay = assay) , colnames(META)),
            "meta.data column is not numeric"  = if(feature %in% colnames(META)) is.numeric(META[, feature]) else TRUE,
            "UMAP is not 2 dimensional! \n Check obj@reductions[[reduction]]@cell.embeddings" =
              if (check_for_2D) ncol(obj@reductions[[reduction]]@cell.embeddings) == 2,
            reduction %in% names(obj@reductions),
            assay %in% names(obj@assays),
            "split.by column not found in meta.data / not categorical" =
              if (!is.null(splitby)) {splitby %in% colnames(META) && is.factor(META[[splitby]]) || is.character(META[[splitby]])} else TRUE
            )

  DefaultAssay(obj) <- assay
  gg.obj <- Seurat::FeaturePlot(obj,
    features = feature,
    reduction = reduction,
    min.cutoff = qlow, max.cutoff = qhigh,
    ncol = nr.cols,
    split.by = splitby,
    ...
  ) +
    ggtitle(label = title, subtitle = sub) +
    if (!axes) NoAxes() else NULL

  if (aspect.ratio) gg.obj <- gg.obj + ggplot2::coord_fixed(ratio = aspect.ratio)
  if (!isFALSE(caption)) gg.obj <- gg.obj + ggplot2::labs(caption = caption)

  if (save.plot) {
    fname <- ww.FnP_parser(sppp(prefix, toupper(reduction), feature, assay, paste0(ncol(obj), "c"), suffix), if (PNG) "png" else "pdf")
    try(save_plot(filename = fname, plot = gg.obj, base_height = h, base_width = w)) # , ncol = 1, nrow = 1
  }
  return(gg.obj)
}



# _________________________________________________________________________________________________
#' @title clUMAP - Quick Visualization of Clustering Results with UMAP and automatically save the plot
#'
#' @description Generates a UMAP visualization based on clustering results from a Seurat object,
#' and automatically saves it. Offers options for custom titles, subtitles, saving, and more. Assumes
#' default options for custom titles, subtitles, saving, and more.
#'
#' @param ident Cluster identity for visualization; Default: 'integrated_snn_res.0.5'.
#' @param obj Seurat object containing single-cell data; Default: `combined.obj`.
#' @param reduction Dimension reduction method ('umap', 'tsne', 'pca'); Default: 'umap'.
#' @param splitby Metadata column to split cells by; optional; Default: NULL.
#' @param title Main title of the plot; Default: `ident`.
#' @param sub Subtitle of the plot; optional; Default: NULL.
#' @param prefix Prefix for saved filename; optional; Default: NULL.
#' @param suffix Suffix for saved filename; defaults to plot subtitle; Default: NULL.
#' @param caption Plot caption; optional; Default: dynamically generated from `obj`.
#' @param label.cex Size of cluster labels; Default: 7.
#' @param h Height of plot in inches; Default: 7.
#' @param w Width of plot in inches; optional; Default: NULL.
#' @param nr.cols Number of columns for facet wrap if `splitby` is not NULL; Default: NULL.
#' @param plotname Custom plot name for saving; Default: dynamically generated from `reduction` and `ident`.
#' @param cols Custom color vector for clusters; optional; Default: NULL.
#' @param palette Color palette for generating cluster colors; Default: 'glasbey'.
#' @param highlight.clusters Specific clusters to be highlighted; optional; Default: NULL.
#' @param cells.highlight Specific cells to be highlighted; optional; Default: NULL.
#' @param cols.highlight Color for highlighted cells; Default: 'red'.
#' @param sizes.highlight Size of highlighted cells; Default: 1.
#' @param label Show cluster labels; Default: `TRUE`.
#' @param repel Repel labels to avoid overlap; Default: `TRUE`.
#' @param legend Show legend; Default: opposite of `label`.
#' @param legend.pos Position of legend; Default: 'NULL'.
#' @param axes Show axes; Default: `FALSE`.
#' @param aspect.ratio Fixed aspect ratio for the plot; Default: `TRUE`.
#' @param MaxCategThrHP Maximum number of categories before simplification; Default: 200.
#' @param save.plot Save plot to file; Default: `TRUE`.
#' @param PNG Save as PNG (TRUE) or PDF (FALSE); Default: `TRUE`.
#' @param check_for_2D Ensure UMAP is 2D; Default: `TRUE`.
#' @param ... Additional parameters for `DimPlot`.
#'
#' @examples
#' \dontrun{
#' clUMAP(ident = "integrated_snn_res.0.5", obj = yourSeuratObj)
#' clUMAP(ident = "integrated_snn_res.0.5", obj = yourSeuratObj, cols = RColorBrewer::brewer.pal(8, "Dark2"))
#' }
#'
#' @importFrom ggplot2 ggtitle labs coord_fixed ggsave
#' @importFrom Seurat DimPlot NoLegend NoAxes
#' @importFrom RColorBrewer brewer.pal
#'
#' @export
clUMAP <- function(
    ident = NULL,
    obj = combined.obj,
    title = ident,
    sub = NULL,
    prefix = NULL,
    suffix = make.names(sub),
    caption = .parseBasicObjStats(obj, simple = TRUE), # try(.parseKeyParams(obj = obj), silent = TRUE),
    reduction = "umap", splitby = NULL,
    label.cex = 7,
    h = 7, w = NULL,
    nr.cols = NULL,
    plotname = ppp(toupper(reduction), ident),
    cols = NULL,
    palette = c("alphabet", "alphabet2", "glasbey", "polychrome", "stepped")[4],
    max.cols.for.std.palette = 7,
    highlight.clusters = NULL, cells.highlight = NULL,
    cols.highlight = "red",
    sizes.highlight = 1,
    label = TRUE, repel = TRUE,
    legend = !label,
    legend.pos = NULL, # c("top", "bottom", "left", "right", "none")[2],
    MaxCategThrHP = 200,
    axes = NULL,
    aspect.ratio = c(FALSE, 0.6)[2],
    save.plot = MarkdownHelpers::TRUE.unless("b.save.wplots", v = FALSE),
    PNG = TRUE,
    check_for_2D = TRUE,
    ...) {
  #

  stopifnot(
    is.character(caption) | is.null(caption),
    is.logical(save.plot),
    is.character(suffix) | is.null(suffix)
  )
  tictoc::tic()

  if (is.null(ident)) {
    ident <- GetNamedClusteringRuns(obj, v = FALSE)[1]
    message("Identity not provided. Plotting: ", ident, "\n")
  }

  if (check_for_2D) {
    umap_dims <- ncol(obj@reductions[[reduction]]@cell.embeddings)
    if (umap_dims != 2) warning(">>> UMAP is not 2 dimensional! \n Check obj@reductions[[reduction]]@cell.embeddings")
  }

  IdentFound <- (ident %in% colnames(obj@meta.data))
  if (!IdentFound) {
    ident <- GetClusteringRuns(obj = obj, pat = "_res.*[0,1]\\.[0-9]$")[1]
    iprint("Identity not found. Plotting", ident, "\n")
  }
  identity <- obj[[ident]]
  NtCategs <- length(unique(identity[, 1]))
  if (NtCategs > 1000) warning("More than 1000 levels! qUMAP?", immediate. = TRUE)


  # Highlight specific clusters if provided _____________________________________________________
  if (!missing(highlight.clusters)) {
    if (!(all(highlight.clusters %in% identity[, 1]))) {
      MSG <- paste(
        "Some clusters not found in the object! Missing:",
        kppc(setdiff(highlight.clusters, unique(identity[, 1]))), "\nFrom:\n",
        kppc(sort(unique(identity[, 1])))
      )
      warning(MSG, immediate. = TRUE)
    }

    idx.ok <- identity[, 1] %in% highlight.clusters
    stopifnot("minimum 10 cells are needed" = sum(idx.ok) > 10)

    highlight.these <- rownames(identity)[idx.ok]
    PCT <- percentage_formatter(length(highlight.these) / ncol(obj), suffix = "or")

    # Annotation to subtitle _________________________________________________________________
    sub2 <- paste(PCT, length(highlight.these), "cells in", ident, "are highlighted")
    sub3 <- paste("Highlighted clusters:", kppc(highlight.clusters))
    sub <- if (is.null(sub)) pnl(sub2, sub3) else pnl(sub, sub2, sub3)

    # title <- kpipe(ident, )
  } else {
    highlight.these <- NULL
  }

  # Message if highlighting cells _____________________________________________________________
  if (!missing(cells.highlight)) {
    highlight.these <- cells.highlight
    message("Highlighting ", length(highlight.these), " cells, e.g.: ", head(highlight.these))
    message("cols.highlight: ", cols.highlight, " | sizes.highlight: ", sizes.highlight)
  }

  if (is.null(cols)) {
    cols <- if (NtCategs > max.cols.for.std.palette) {
      getDiscretePaletteObj(
        ident.used = ident, palette.used = palette,
        obj = obj, show.colors = FALSE
      )
    }
  }

  # if (FALSE) cols <- adjustcolor(cols, alpha.f = alpha)

  if (!is.null(highlight.these)) {
    cols <- "lightgrey"
  }

  # Plot _________________________________________________________________________________________
  if (NtCategs > MaxCategThrHP) {
    iprint("Too many categories (", NtCategs, ") in ", ident, "- use qUMAP for continous variables.")
  } else {
    if (length(unique(identity)) < MaxCategThrHP) {
      gg.obj <-
        Seurat::DimPlot(
          object = obj, group.by = ident,
          cols = cols,
          reduction = reduction, split.by = splitby,
          ncol = nr.cols,
          cells.highlight = highlight.these,
          cols.highlight = cols.highlight,
          sizes.highlight = sizes.highlight,
          label = label, repel = repel, label.size = label.cex,
          ...
        ) +
        ggtitle(label = title, subtitle = sub) +
        if (!legend) NoLegend() else NULL
    }

    # Additional options ___________________________________________________
    if (is.null(axes)) gg.obj <- gg.obj + Seurat::NoAxes()
    if (!is.null(caption)) gg.obj <- gg.obj + ggplot2::labs(caption = caption)
    if (!is.null(legend.pos)) gg.obj <- gg.obj + ggplot2::theme(legend.position = legend.pos)
    if (aspect.ratio) gg.obj <- gg.obj + ggplot2::coord_fixed(ratio = aspect.ratio)
    if (legend) suffix <- paste0(suffix, ".lgnd")

    # Save plot ___________________________________________________________
    if (save.plot) {
      pname <- sppp(prefix, plotname, paste0(ncol(obj), "c"), suffix, sppp(highlight.clusters))
      fname <- ww.FnP_parser(pname, if (PNG) "png" else "pdf")
      try(save_plot(filename = fname, plot = gg.obj, base_height = h, base_width = w))
    }
    tictoc::toc()
    return(gg.obj)
  } # if not too many categories
}






# _________________________________________________________________________________________________
#' @title Highlight Selected Clusters on UMAP
#'
#' @description Generates a UMAP plot from a Seurat object with specified clusters highlighted.
#' It saves the resulting UMAP plot directly to the current working directory.
#'
#' @param obj Seurat object to be visualized; Default: `combined.obj`.
#' @param COI Vector of cluster IDs to highlight on the UMAP plot;
#' Default: `c("0", "2", "4")`.
#' @param ident Name of the metadata column containing cluster IDs;
#' Default: `GetClusteringRuns()[1]`.
#' @param h Height of the plot; Default: `7`.
#' @param w Width of the plot; Default: `5`.
#' @param show_plot Logical; if `TRUE`, the plot will be displayed in the RStudio viewer;
#' Default: `TRUE`.
#' @param ... Additional arguments to be passed to the `DimPlot` function.#'
#'
#' @return Saves a UMAP plot highlighting specified clusters to the current working directory.
#' The function itself does not return an object within R.
#'
#' @examples
#' \dontrun{
#' if (interactive()) {
#'   # GetClusteringRuns()[1] "integrated_snn_res.0.1"
#'   umapHiLightSel(obj = combined.obj, COI = c("0", "1"), ident = GetClusteringRuns()[1])
#' }
#' }
#'
#' @seealso \code{\link[Seurat]{DimPlot}}
#'
#' @export
#' @importFrom Seurat DimPlot
#' @importFrom ggplot2 ggsave
umapHiLightSel <- function(obj = combined.obj,
                           COI = c("0", "2", "4"),
                           ident = GetClusteringRuns()[1],
                           h = 7, w = 5,
                           show_plot = TRUE,
                           ...) {
  stopifnot(is(obj, "Seurat"),
    "Ident no found the object!" = ident %in% colnames(obj@meta.data),
    "Not all clusters in COI are found the object!" = all(COI %in% unique(obj@meta.data[[ident]]))
  )

  cellsSel <- getCellIDs.from.meta(ident = ident, ident_values = COI, obj = obj)
  pl <- Seurat::DimPlot(obj,
    reduction = "umap",
    group.by = ident,
    label = TRUE,
    cells.highlight = cellsSel,
    ...
  )
  if (show_plot) print(pl)

  ggplot2::ggsave(
    filename = extPNG(kollapse("cells", COI, collapseby = ".")),
    height = h, width = w
  )
}



# _________________________________________________________________________________________________
#' @title DimPlot.ClusterNames
#'
#' @description Plot UMAP with Cluster names.
#' @param obj Seurat object, Default: combined.obj
#' @param ident identity used, Default: 'cl.names.top.gene.res.0.5'
#' @param reduction UMAP, tSNE, or PCA (Dim. reduction to use), Default: 'umap'
#' @param title Title of the plot, Default: ident
#' @param ... Pass any other parameter to the internally called functions (most of them should work).
#' @examples
#' \dontrun{
#' if (interactive()) {
#'   DimPlot.ClusterNames(obj = combined.obj)
#' }
#' }
#' @export
DimPlot.ClusterNames <- function(
    obj = combined.obj,
    ident = GetNamedClusteringRuns(obj = obj, v = FALSE)[1],
    reduction = "umap",
    title = ident,
    ...) {
  #
  Seurat::DimPlot(
    object = obj, reduction = reduction, group.by = ident,
    label = TRUE, repel = TRUE, ...
  ) + NoLegend() + ggtitle(title)
}




# _________________________________________________________________________________________________
# Multiplex 2D UMAPs, etc. ______________________________ ----


# _________________________________________________________________________________________________
#' @title multiFeaturePlot.A4
#'
#' @description Save multiple FeaturePlots, as jpeg, on A4 for each gene, which are stored as a list of gene names.
#' @param list.of.genes List of gene names for which the plots are to be generated. No default.
#' @param obj Seurat object, Default: combined.obj
#' @param subdir Should plots be saved in a sub-directory? Default: `TRUE`.
#' @param foldername Folder name to save the generated plots. Default: The name of the list of genes.
#' @param subtitle.from.names Should the subtitle be extracted from the names of the gene symbols,
#' eg: `c("Astrocytes" = "AQP4")` ? Default: `TRUE`.
#' @param plot.reduction Dimension reduction technique to use for plots. Default: 'umap'
#' @param intersectionAssay The assay to intersect with, either 'RNA' or 'integrated'. Default: 'RNA'
#' @param layout Layout orientation of the plot. Default: 'wide'
#' @param colors Vector of colors to be used in the plot. Default: c("grey", "red")
#' @param nr.Col Number of columns in the plot grid. Default: 2
#' @param nr.Row Number of rows in the plot grid. Default: 4
#' @param cex Point size in the plot. Default: round(0.1/(nr.Col * nr.Row), digits = 2)
#' @param gene.min.exp Minimum gene expression level for plotting. Default: 'q01'
#' @param gene.max.exp Maximum gene expression level for plotting. Default: 'q99'
#' @param prefix Prefix for the plot filenames. Default: NULL
#' @param suffix Suffix for the plot filenames. Default: NULL
#' @param background_col Background color of the plots. Default: "white"
#' @param saveGeneList Should the list of genes be saved? Default: `FALSE`.
#' @param w Width of the plot. Default: 8.27
#' @param h Height of the plot. Default: 11.69
#' @param scaling Scaling factor for plot size. Default: 1
#' @param aspect.ratio Should the aspect ratio be fixed? Default: Yes, at 0.6
#' @param format Format to save the plot file. Default: 'jpg'
#' @param ... Pass any other parameter to the internally called functions (most of them should work).
#' @seealso
#'  \code{\link[tictoc]{tic}}
#'  \code{\link[cowplot]{plot_grid}}
#' @importFrom tictoc tic toc
#' @importFrom cowplot plot_grid
#' @importFrom MarkdownReports create_set_OutDir
#'
#' @export
multiFeaturePlot.A4 <- function(
    list.of.genes,
    obj = combined.obj,
    subdir = TRUE,
    foldername = substitute(list.of.genes),
    subtitle.from.names = TRUE,
    plot.reduction = "umap",
    intersectionAssay = c("RNA", "integrated")[1],
    layout = c("tall", "wide", FALSE)[2],
    colors = c("grey", "red"),
    nr.Col = 2, nr.Row = 4,
    raster = if (ncol(obj) > 1e5) TRUE else FALSE,
    cex = round(0.1 / (nr.Col * nr.Row), digits = 2),
    cex.min = if (raster) TRUE else FALSE,
    gene.min.exp = "q01", gene.max.exp = "q99",
    prefix = NULL, suffix = NULL,
    background_col = "white",
    aspect.ratio = c(FALSE, 0.6)[2],
    saveGeneList = FALSE,
    w = 8.27, h = 11.69, scaling = 1,
    format = c("jpg", "pdf", "png")[1],
    ...) {
  tictoc::tic()
  ParentDir <- OutDir
  if (is.null(foldername)) foldername <- "genes"
  final.foldername <- FixPlotName(paste0(foldername, "-", plot.reduction, suffix))
  if (subdir) create_set_SubDir(final.foldername, "/", verbose = FALSE)

  if (is.null(names(list.of.genes))) subtitle.from.names <- FALSE

  list.of.genes.found <- check.genes(
    genes = list.of.genes, obj = obj,
    assay.slot = intersectionAssay, makeuppercase = FALSE
  )
  if (subtitle.from.names) names(list.of.genes.found) <- as.character(flip_value2name(list.of.genes)[list.of.genes.found])
  DefaultAssay(obj) <- intersectionAssay

  if (!is.null(cex.min)) cex <- max(cex.min, cex)

  if (layout == "tall") {
    w <- 8.27 * scaling
    h <- 11.69 * scaling
    nr.Col <- 2
    nr.Row <- 4
    print("layout active, nr.Col ignored.")
  }
  if (layout == "wide") {
    w <- 11.69 * scaling
    h <- 8.27 * scaling
    nr.Col <- 2
    nr.Row <- 2
    print("layout active, nr.Col ignored.")
  }

  lsG <- CodeAndRoll2::split_vec_to_list_by_N(1:length(list.of.genes.found), by = nr.Row * nr.Col)
  for (i in 1:length(lsG)) {
    genes <- list.of.genes.found[lsG[[i]]]
    iprint(i, genes)
    plotname <- kpp(c(prefix, plot.reduction, i, genes, suffix, format))

    plot.list <- Seurat::FeaturePlot(
      object = obj, features = genes, reduction = plot.reduction, combine = FALSE,
      ncol = nr.Col, cols = colors, raster = raster,
      min.cutoff = gene.min.exp, max.cutoff = gene.max.exp,
      pt.size = cex, ...
    )

    # Remove the legend and axes
    for (j in 1:length(plot.list)) {
      plot.list[[j]] <- plot.list[[j]] + NoLegend() + NoAxes()
      if (aspect.ratio) plot.list[[j]] <- plot.list[[j]] + ggplot2::coord_fixed(ratio = aspect.ratio)
      if (subtitle.from.names) plot.list[[j]] <- plot.list[[j]] + ggplot2::ggtitle(label = genes[j], subtitle = names(genes)[j])
    }
    # browser()

    pltGrid <- cowplot::plot_grid(plotlist = plot.list, ncol = nr.Col, nrow = nr.Row)
    # cowplot::ggsave2(filename = plotname, width = w, height = h, bg = background_col, plot = pltGrid)
    cowplot::save_plot(
      plot = pltGrid, filename = plotname,
      base_width = w, base_height = h,
      bg = background_col
    )
  }

  if (subdir) MarkdownReports::create_set_OutDir(ParentDir, verbose = FALSE)
  if (saveGeneList) {
    if (is.null(obj@misc$gene.lists)) obj@misc$gene.lists <- list()
    obj@misc$gene.lists[[substitute_deparse(list.of.genes)]] <- list.of.genes.found
    print("Genes saved under: obj@misc$gene.lists")
    return(obj)
  }
  tictoc::toc()
}



# ____________________________________________________________________________________
#' @title Generate Cluster Highlight UMAPs compiled into A4 pages
#'
#' @description This function generates and saves cluster highlight plots for both single and multiple
#' clusters using UMAP or other dimensionality reduction techniques. It supports saving plots in various
#' formats and allows customization of plot appearance and layout.
#'
#' @param ident The name of the metadata column in the Seurat object `obj` to use for identifying clusters.
#' @param obj A Seurat object combining multiple datasets. Default: `combined.obj`.
#' @param foldername Name of the folder to save the plots in. Default: Value of `ident`.
#' @param plot.reduction The dimensionality reduction technique to use for the plots. Default: `"umap"`.
#' @param intersectionAssay The assay to use when calculating intersections. Default: `"RNA"`.
#' @param layout Plot layout, can be `"tall"`, `"wide"`, or `FALSE` for no specific layout. Default: `"wide"`.
#' @param colors A vector of colors to use for non-highlighted and highlighted clusters. Default: `c("grey", "red")`.
#' @param nr.Col Number of columns in the plot grid. Default: 2.
#' @param nr.Row Number of rows in the plot grid. Default: 4.
#' @param cex Size of the text in the plot, calculated based on the number of rows and columns. Default: Calculated value.
#' @param sizes.highlight Size of highlighted cells; Default: 1.
#' @param subdir Logical flag indicating whether to create a subdirectory for the plots. Default: `TRUE`.
#' @param prefix Optional prefix for the plot file names. Default: `NULL`.
#' @param suffix Optional suffix for the plot file names. Default: `NULL`.
#' @param background_col Background color of the plots. Default: `"white"`.
#' @param aspect.ratio Aspect ratio of the plots, can be `FALSE` for default ratio or a numeric value. Default: 0.6.
#' @param saveGeneList Logical flag indicating whether to save the list of genes used in the plots. Default: `FALSE`.
#' @param w Width of the plots, in inches. Default: `8.27`.
#' @param h Height of the plots, in inches. Default: `11.69`.
#' @param scaling Scaling factor for adjusting the size of the plots. Default: 1.
#' @param format Format to save the plots in, can be `"jpg"`, `"pdf"`, or `"png"`. Default: `"jpg"`.
#' @param ... Additional arguments passed to lower-level plotting functions.
#'
#' @return Invisible. This function primarily saves plots to files.
#' @examples
#' multiSingleClusterHighlightPlots.A4(ident = "cluster_id", obj = yourSeuratObject)
#'
#' @importFrom ggplot2 ggplot geom_point
#' @importFrom cowplot plot_grid ggsave2
#' @importFrom tictoc tic toc
#' @importFrom MarkdownReports create_set_OutDir
#'
#' @export
multiSingleClusterHighlightPlots.A4 <- function(
    obj = combined.obj,
    ident = GetClusteringRuns(obj)[1],
    foldername = ident,
    plot.reduction = "umap",
    intersectionAssay = DefaultAssay(combined.obj), # c("RNA", "integrated")[1],
    layout = c("tall", "wide", FALSE)[2],
    colors = c("grey", "red"),
    nr.Col = 2, nr.Row = 4,
    cex = round(0.1 / (nr.Col * nr.Row), digits = 2),
    sizes.highlight = 1,
    subdir = TRUE,
    prefix = NULL, suffix = NULL,
    background_col = "white",
    aspect.ratio = c(FALSE, 0.6)[2],
    saveGeneList = FALSE,
    w = 8.27, h = 11.69, scaling = 1,
    format = c("jpg", "pdf", "png")[1],
    ...) {
  message(" > Running multiSingleClusterHighlightPlots.A4...")

  NrCellsPerCluster <- sort(table(obj[[ident]]), decreasing = TRUE)
  stopifnot(
    ident %in% colnames(obj@meta.data),
    "Some clusters too small (<20 cells). See: table(obj[[ident]]) | Try: removeResidualSmallClusters()" =
      all(NrCellsPerCluster > 20)
  )

  tictoc::tic()
  ParentDir <- OutDir
  if (is.null(foldername)) foldername <- "clusters"
  if (subdir) MarkdownReports::create_set_SubDir(paste0(foldername, "-", plot.reduction), "/")

  clusters <- sort(unique(obj@meta.data[[ident]]))

  DefaultAssay(obj) <- intersectionAssay

  # Adjust plot dimensions and grid layout based on specified layout
  if (layout == "tall") {
    w <- 8.27 * scaling
    h <- 11.69 * scaling
    nr.Col <- 2
    nr.Row <- 4
    message("tall layout active, nr.Col ignored.")
  }

  if (layout == "wide") {
    w <- 11.69 * scaling
    h <- 8.27 * scaling
    nr.Col <- 2
    nr.Row <- 2
    message("wide layout active, nr.Col ignored.")
  }

  # Split clusters into lists for plotting
  ls.Clust <- CodeAndRoll2::split_vec_to_list_by_N(1:length(clusters), by = nr.Row * nr.Col)
  # browser()
  for (i in 1:length(ls.Clust)) { # for each page

    clusters_on_this_page <- as.character(clusters[ls.Clust[[i]]])
    iprint("page:", i, "| clusters", kppc(clusters_on_this_page))
    (plotname <- kpp(c(prefix, plot.reduction, i, "clusters", ls.Clust[[i]], suffix, format)))

    plot.list <- list()
    for (j in seq(clusters_on_this_page)) {  # for each cluster
      cl <- clusters_on_this_page[j]
      message(cl)
      plot.list[[j]] <- clUMAP(
        ident = ident, obj = obj,
        highlight.clusters = cl, label = FALSE, legend = FALSE, save.plot = FALSE,
        plotname = plotname, cols = colors,
        sizes.highlight = sizes.highlight,
        h = h, w = w, ...
      )
    } # for j

    # Customize plot appearance
    for (j in 1:length(plot.list)) {
      plot.list[[j]] <- plot.list[[j]] + NoLegend() + NoAxes()
      if (aspect.ratio) {
        plot.list[[j]] <- plot.list[[j]] +
          ggplot2::coord_fixed(ratio = aspect.ratio)
      }
    } # for j2

    # Save plots
    pltGrid <- cowplot::plot_grid(plotlist = plot.list, ncol = nr.Col, nrow = nr.Row)
    cowplot::ggsave2(filename = plotname, width = w, height = h, bg = background_col, plot = pltGrid)
  } # for ls.Clust

  if (subdir) MarkdownReports::create_set_OutDir(ParentDir)
  tictoc::toc()
}




# _________________________________________________________________________________________________
#' @title Quick Clustering UMAPs on A4 Page
#'
#' @description Generates and arranges UMAP plots for up to four specified clustering resolutions
#' from a Seurat object onto an A4 page, facilitating comparative visualization.
#'
#' @param obj Seurat object to visualize; Default: `combined.obj`.
#' @param idents Vector of clustering resolution identifiers to plot;
#' dynamically defaults to the first 4 found by `GetClusteringRuns`.
#' @param prefix Prefix for plot titles; Default: "Clustering.UMAP.Res".
#' @param suffix Suffix for plot titles; Default: "".
#' @param title Custom title for the composite plot; dynamically generated from `prefix`, `idents`, and `suffix`.
#' @param nrow Number of rows in the plot grid; Default: 2.
#' @param ncol Number of columns in the plot grid; Default: 2.
#' @param w Width of the plot; Default: 11.69.
#' @param h Height of the plot; Default: 8.27.
#' @param ... Additional parameters for individual UMAP plots.
#'
#' @examples
#' \dontrun{
#' qClusteringUMAPS()
#' }
#'
#' @export
#' @importFrom Seurat NoAxes
#' @importFrom ggExpress qA4_grid_plot
qClusteringUMAPS <- function(
    obj = combined.obj,
    idents = na.omit.strip(GetClusteringRuns(obj)[1:4]),
    prefix = "Clustering.UMAP.Res",
    suffix = "",
    nrow = 2, ncol = 2,
    w = 11.69, h = 8.27,
    title = sppu(
      prefix,
      as.numeric(stringr::str_extract(idents, "\\d+\\.\\d+$")),
      suffix
    ),
    ...) {
  message(" > Running qClusteringUMAPS...")

  # Check that the QC markers are in the object
  idents.found <- intersect(idents, colnames(obj@meta.data))
  n.found <- length(idents.found)
  stopifnot("None of the idents found" = n.found > 0)
  message(kppws(n.found, " found of ", idents))

  if (n.found > 5) {
    idents.found <- idents.found[1:4]
    message("Only the first 4 idents will be plotted: ", idents.found)
  }

  px <- list(
    "A" = clUMAP(ident = idents[1], save.plot = FALSE, obj = obj, caption = NULL, ...) + NoAxes(),
    "B" = clUMAP(ident = idents[2], save.plot = FALSE, obj = obj, caption = NULL, ...) + NoAxes(),
    "C" = clUMAP(ident = idents[3], save.plot = FALSE, obj = obj, caption = NULL, ...) + NoAxes(),
    "D" = clUMAP(ident = idents[4], save.plot = FALSE, obj = obj, caption = NULL, ...) + NoAxes()
  )

  ggExpress::qA4_grid_plot(
    plot_list = px,
    plotname = title,
    w = w, h = h,
    nrow = nrow, ncol = ncol
  )
}

# _________________________________________________________________________________________________
#' @title Quickly Draw 4 Gene Expression UMAPs on an A4 Page
#'
#' @description Generates and arranges UMAP plots for up to four specified gene expressions
#' from a Seurat object onto an A4 page, facilitating comparative visualization.
#'
#' @param obj Seurat object to visualize; Default: `combined.obj`.
#' @param features Vector of gene identifiers to plot;
#' dynamically defaults to the first 4 found by `rownames(obj)`.
#' @param prefix Prefix for plot titles; Default: "Expression.UMAP.Gene".
#' @param suffix Suffix for plot titles; Default: "".
#' @param title Custom title for the composite plot; dynamically generated from `prefix`, `genes`, and `suffix`.
#' @param nrow Number of rows in the plot grid; Default: 2.
#' @param ncol Number of columns in the plot grid; Default: 2.
#' @param w Width of the plot; Default: 11.69.
#' @param h Height of the plot; Default: 8.27.
#' @param ... Additional parameters for individual UMAP plots.
#'
#' @examples
#' \dontrun{
#' qGeneExpressionUMAPS()
#' }
#'
#' @export
#' @importFrom Seurat NoAxes
#' @importFrom ggExpress qA4_grid_plot
qGeneExpressionUMAPS <- function(
    obj = combined.obj,
    features = rownames(obj)[1:4],
    prefix = "Expression.UMAP.Gene",
    suffix = "",
    nrow = 2, ncol = 2,
    w = 11.69, h = 8.27,
    title = paste0(prefix, " ", paste(features, collapse = ", "), " ", suffix),
    ...) {
  message("Plotting qGeneExpressionUMAPS")

  # Check that the features are in the object
  features.found <- intersect(features, c(colnames(obj@meta.data), rownames(obj)))
  n.found <- length(features.found)
  stopifnot(
    "None of the features found" = n.found > 1,
    "Only 4 features are allowed" = n.found < 5
  )

  message(kppws(n.found, "found of", length(features), "features:", features))

  px <- list(
    "A" = qUMAP(feature = features[1], save.plot = FALSE, obj = obj, caption = NULL, ...) + NoAxes(),
    "B" = qUMAP(feature = features[2], save.plot = FALSE, obj = obj, caption = NULL, ...) + NoAxes(),
    "C" = qUMAP(feature = features[3], save.plot = FALSE, obj = obj, caption = NULL, ...) + NoAxes(),
    "D" = qUMAP(feature = features[4], save.plot = FALSE, obj = obj, ...) + NoAxes()
  )

  ggExpress::qA4_grid_plot(
    plot_list = px,
    plotname = title,
    w = w, h = h,
    nrow = nrow, ncol = ncol
  )
}


# _________________________________________________________________________________________________
#' @title Plot qUMAPs for Genes in a Folder
#'
#' @description This function plots qUMAPs for a specified set of genes, storing the results in a
#' specified folder. If no folder name is provided, it defaults to using the gene set name.
#'
#' @param genes A vector of gene names to be plotted.
#' @param obj An object containing the UMAP and gene data. Default: combined.obj.
#' @param foldername The name of the folder where the plots will be saved. If NULL, the gene set
#' name is used. Default: NULL.
#' @param intersectionAssay The assay slot to use for intersection. Default: 'RNA'.
#' @param plot.reduction The type of reduction to plot. Default: 'umap'.
#' @param ... Additional arguments passed to plotting and directory creation functions.
#'
#' @return Invisible. The function generates plots and saves them in the specified folder.
#'
#' @examples
#' plotQUMAPsInAFolder(
#'   genes = c("Gene1", "Gene2"), obj = combined.obj,
#'   foldername = "MyGenePlots", intersectionAssay = "RNA",
#'   plot.reduction = "umap"
#' )
#'
#' @importFrom MarkdownReports create_set_SubDir create_set_OutDir
#' @export

plotQUMAPsInAFolder <- function(genes, obj = combined.obj,
                                foldername = NULL,
                                intersectionAssay = DefaultAssay(obj),
                                plot.reduction = "umap",
                                ...) {
  message(" > Running plotQUMAPsInAFolder...")

  # Input checks
  stopifnot(
    is.character(genes),
    is.null(foldername) || is.character(foldername),
    is.character(plot.reduction)
  )

  ParentDir <- OutDir
  if (is.null(foldername)) foldername <- substitute_deparse(genes)

  MarkdownReports::create_set_SubDir(paste0(foldername, "-", plot.reduction), "/")

  list.of.genes.found <- check.genes(
    genes = genes, obj = obj,
    assay.slot = intersectionAssay, makeuppercase = FALSE
  )

  for (g in list.of.genes.found) {
    message(g)
    qUMAP(g, reduction = plot.reduction, obj = obj, ...)
  }

  MarkdownReports::create_set_OutDir(ParentDir)

  invisible()
}


# _________________________________________________________________________________________________
#' @title Plot Top N Differentially Expressed Genes Per Cluster
#'
#' @description Visualizes the top N differentially expressed (DE) genes for each cluster within a
#' specified clustering resolution of a Seurat object, facilitating the exploration of gene
#' expression patterns across clusters.
#'
#' @param obj Seurat object containing single-cell RNA-seq data and clustering information;
#' Default: `combined.obj`.
#' @param cl_res Cluster resolution used to identify distinct clusters for analysis; Default: `res`.
#' @param nrGenes Number of top DE genes to display for each cluster;
#' Default: GetClusteringRuns()[1].
#' @param order.by Criteria for ranking DE genes within clusters; Default: `"combined.score"`.
#' @param df_markers Data frame or list of DE genes across clusters. If not provided,
#' attempts to retrieve from `obj@misc$df.markers[[paste0("res.", cl_res)]]`;
#' Default: calculated based on `cl_res`.
#' @param filt_coding_genes Logical indicating whether to filter out non-coding genes; Default: `TRUE`.
#'
#' @examples
#' \dontrun{
#' if (interactive()) {
#'   PlotTopGenesPerCluster(obj = combined.obj, cl_res = 0.5, nrGenes = 10)
#' }
#' }
#'
#' @export
PlotTopGenesPerCluster <- function(
    obj = combined.obj,
    cl_res = GetClusteringRuns()[1],
    nrGenes = p$"n.markers",
    order.by = c("combined.score", "avg_log2FC", "p_val_adj")[1],
    df_markers = obj@misc$"df.markers"[[paste0("res.", cl_res)]],
    filt_coding_genes = TRUE,
    ...) {
  message(" > Running PlotTopGenesPerCluster...")

  if(filt_coding_genes) {
    genes <- df_markers$gene
    df_markers <- df_markers[genes %in% filterCodingGenes(genes), ]
  }

  topX.markers <- GetTopMarkers(
    dfDE = df_markers, n = nrGenes,
    order.by = order.by
  )

  ls.topMarkers <- splitbyitsnames(topX.markers)
  for (i in 1:length(ls.topMarkers)) {
    multiFeaturePlot.A4(
      list.of.genes = ls.topMarkers[[i]], obj = obj, subdir = TRUE, foldername = ppp("TopGenes.umaps"),
      prefix = ppp("DEG.markers.res", cl_res, "cluster", names(ls.topMarkers)[i])
    )
  }
}

# _________________________________________________________________________________________________
#' @title Quickly Plot Key QC Markers in Brain Organoids
#'
#' @description Generates and arranges UMAP plots for specified QC features
#' from a Seurat object on an A4 page, facilitating a quick quality control (QC) overview.
#'
#' @param obj Seurat object to visualize; Default: `combined.obj`.
#' @param QC.Features Vector of QC feature names to plot; Default:
#' `c("nFeature_RNA", "percent.ribo", "percent.mito", "nuclear.fraction")`.
#' @param prefix Prefix for plot titles; Default: "QC.markers.4.UMAP".
#' @param suffix Suffix for plot titles; Default: "".
#' @param title Custom title for the composite plot; dynamically generated from `prefix`,
#' `QC.Features`, and `suffix`.
#' @param nrow Number of rows in the plot grid; Default: 2.
#' @param ncol Number of columns in the plot grid; Default: 2.
#' @param ... Additional parameters for individual UMAP plots.
#'
#' @examples
#' \dontrun{
#' qQC.plots.BrainOrg()
#' }
#'
#' @export
#' @importFrom ggExpress qA4_grid_plot
qQC.plots.BrainOrg <- function(
    obj = combined.obj,
    QC.Features = c("nFeature_RNA", "percent.ribo", "percent.mito", "nuclear.fraction", "percent.HGA"),
    prefix = "QC.markers.4.UMAP",
    suffix = "",
    title = sppu(prefix, QC.Features, suffix),
    nrow = 2, ncol = 2,
    ...) {
  message(" > Plotting qQC.plots.BrainOrg...")

  # Check that the QC markers are in the object
  QC.Features.Found <- intersect(QC.Features, union(colnames(obj@meta.data), rownames(obj)))
  QC.Features.Found.in.meta <- intersect(QC.Features, colnames(obj@meta.data))
  n.found <- length(QC.Features.Found)
  message(kppws(n.found, " found: ", QC.Features.Found))
  stopifnot(n.found > 1)


  # Count the number of NAs in specified columns
  na_counts <- sapply(X = obj@meta.data[, QC.Features.Found.in.meta], function(x) sum(is.na(x)))

  # Raise a warning if there are any NAs
  if(length(na_counts)){
    if (sum(na_counts) > 0) {
      warning(sprintf("There are %d NA values found\n", na_counts),
              immediate. = TRUE
      )
    }
  }

  px <- list(
    "A" = qUMAP(QC.Features.Found[1], save.plot = FALSE, obj = obj, ...) + NoAxes(),
    "B" = qUMAP(QC.Features.Found[2], save.plot = FALSE, obj = obj, ...) + NoAxes(),
    "C" = qUMAP(QC.Features.Found[3], save.plot = FALSE, obj = obj, ...) + NoAxes(),
    "D" = qUMAP(QC.Features.Found[4], save.plot = FALSE, obj = obj, ...) + NoAxes()
  )

  ggExpress::qA4_grid_plot(
    plot_list = px,
    plotname = title,
    w = 11.69, h = 8.27,
    nrow = nrow, ncol = ncol
  )
}


# _________________________________________________________________________________________________
#' @title Quickly Plot Key Markers in Brain Organoids
#'
#' @description Generates plots for a predefined or custom set of gene markers within brain organoids,
#' aiding in the quick assessment of their expression across different cells or clusters.
#'
#' @param obj Seurat object for visualization; Default: `combined.obj`.
#' @param custom.genes Logical indicating whether to use a custom set of genes.
#' If FALSE, a predefined list of key brain organoid markers is used; Default: `FALSE`.
#' @param suffix Suffix for the folder name where the plots are saved; Default: "".
#'
#' @examples
#' \dontrun{
#' qMarkerCheck.BrainOrg(combined.obj)
#' qMarkerCheck.BrainOrg(combined.obj, custom.genes = c("Gene1", "Gene2"))
#' }
#'
#' @export
#' @importFrom CodeAndRoll2 as_tibble_from_namedVec

qMarkerCheck.BrainOrg <- function(obj = combined.obj, custom.genes = FALSE,
                                  suffix = "") {
  message(" > Running qMarkerCheck.BrainOrg...")

  Signature.Genes.Top16 <- if (!isFALSE(custom.genes)) {
    custom.genes
  } else {
    Signature.Genes.Top16 <- c(
      `dl-EN` = "KAZN", `ul-EN` = "SATB2" # dl-EN = deep layer excitatory neuron
      , `Immature neurons` = "SLA", Interneurons = "DLX6-AS1",
      Interneurons = "ERBB4", Interneurons = "SCGN",
      `Intermediate progenitor` = "EOMES" # ,  `Intermediate progenitor1` = "TAC3"
      , `S-phase` = "TOP2A", `G2M-phase` = "H4C3" # formerly: HIST1H4C
      , `oRG` = "HOPX" # , `oRG` = "ID4" # oRG outer radial glia
      # , Astroglia = "GFAP"
      , Astrocyte = "S100B", `Hypoxia/Stress` = "DDIT4",
      `Choroid.Plexus` = "TTR", `Low-Quality` = "POLR2A",
      `Mesenchyme` = "DCN", Glycolytic = "PDK1"
      # , `Choroid.Plexus` = "OTX2", `Mesenchyme` = "DCN"
    )
    print(Signature.Genes.Top16)
  }

  stopifnot()

  print(CodeAndRoll2::as_tibble_from_namedVec(Signature.Genes.Top16))
  multiFeaturePlot.A4(
    obj = obj, list.of.genes = Signature.Genes.Top16, layout = "tall",
    foldername = sppp("Signature.Genes.Top16", suffix)
  )
}





# _________________________________________________________________________________________________
#' @title Plot Top Genes
#'
#' @description This function plots the highest expressed genes on UMAPs, saving the plots in a
#' subfolder. It requires the prior execution of `calc.q99.Expression.and.set.all.genes`.
#'
#' @param obj A Seurat object containing the data for plotting. Default: combined.obj.
#' @param n The number of top genes to plot. Default: 32.
#' @param exp.slot The slot in the Seurat object where the expression data is stored.
#' Default: "expr.q99".
#'
#' @details This function identifies the top `n` genes based on expression levels stored in
#' `exp.slot` of the provided Seurat object. It then plots these genes using UMAPs and saves
#' the results in a subfolder named "Highest.Expressed.Genes".
#'
#' @examples
#' \dontrun{
#' if (interactive()) {
#'   PlotTopGenes()
#' }
#' }
#'
#' @export

PlotTopGenes <- function(obj = combined.obj, n = 8
                         , exp.slot = "expr.q99") {
  message("Using obj@misc$", exp.slot)
  stopifnot(inherits(obj, "Seurat"),
    "Requires calling calc.q99.Expression.and.set.all.genes before. " =
      exp.slot %in% names(obj@misc)
  )

  Highest.Expressed.Genes <- names(head(sort(obj@misc[[exp.slot]], decreasing = TRUE), n = n))
  multiFeaturePlot.A4(list.of.genes = Highest.Expressed.Genes, foldername = "Highest.Expressed.Genes")
}






# _________________________________________________________________________________________________
# Manipulating UMAP and PCA  ______________________________ ----
# _________________________________________________________________________________________________

#' @title Flip Reduction Coordinates
#'
#' @description Flips dimensionality reduction coordinates (such as UMAP or tSNE) vertically or
#' horizontally to change the visualization perspective.
#'
#' @param obj Seurat object to modify; Default: `combined.obj`.
#' @param dim Number of dimensions in the reduction to consider; Default: 2.
#' @param reduction Dimension reduction technique to modify ('umap', 'tsne', or 'pca'); Default: 'umap'.
#' @param flip Axis (or axes) to flip; can be 'x', 'y', or 'xy' to flip both; Default: "x".
#' @param FlipReductionBackupToo Boolean indicating whether to also flip coordinates in the backup slot; Default: `TRUE`.
#'
#' @examples
#' \dontrun{
#' # Before flipping UMAP coordinates
#' clUMAP()
#' # Flip UMAP coordinates and visualize again
#' combined.obj <- FlipReductionCoordinates(combined.obj)
#' clUMAP()
#' }
#'
#' @export
#' @importFrom Seurat Embeddings
FlipReductionCoordinates <- function(
    obj = combined.obj, dim = 2, reduction = "umap",
    flip = c("x", "y", "xy", NULL)[1], FlipReductionBackupToo = TRUE) {
  coordinates <- Embeddings(obj, reduction = reduction)
  stopifnot(ncol(coordinates) == dim)

  if (flip %in% c("x", "xy")) coordinates[, 1] <- coordinates[, 1] * -1
  if (flip %in% c("y", "xy")) coordinates[, 2] <- coordinates[, 2] * -1
  obj@reductions[[reduction]]@cell.embeddings <- coordinates

  if (FlipReductionBackupToo) {
    bac.slot <- paste0(reduction, dim, "d")
    if (length(obj@misc$reductions.backup[[bac.slot]])) {
      obj@misc$reductions.backup[[bac.slot]]@cell.embeddings <- coordinates
      iprint(dim, "dimensional", reduction, "backup flipped too.")
    }
  }
  return(obj)
}



# _________________________________________________________________________________________________
#' @title Relabel Cluster Numbers Along a UMAP (or tSNE) Axis
#'
#' @description Automatically renumbers clusters based on their position along a specified dimension
#' in a UMAP (or tSNE or PCA) plot, potentially enhancing interpretability by ordering clusters.
#'
#' @param obj Seurat object containing clustering and UMAP (or other dimensional reduction) data;
#' Default: `combined.obj`.
#' @param dim Dimension along which to order clusters (1 for the first dimension, typically horizontal);
#' Default: 1.
#' @param swap If TRUE, reverses the ordering direction; Default: `FALSE`.
#' @param reduction Dimension reduction technique used for cluster positioning ('umap', 'tsne', or 'pca');
#' Default: 'umap'.
#' @param ident Clustering resolution identifier used to fetch cluster labels from `obj` metadata;
#' Default: 'integrated_snn_res.0.5'.
#' @param plot If TRUE, plots the UMAP with new cluster names; Default: `TRUE`.
#'
#' @examples
#' \dontrun{
#' combined.obj <- AutoNumber.by.UMAP(
#'   obj = combined.obj, dim = 1, reduction = "umap",
#'   ident = "integrated_snn_res.0.5"
#' )
#' DimPlot.ClusterNames(combined.obj, ident = GetClusteringRuns(combined.obj)[1])
#' }
#'
#' @export
#' @importFrom CodeAndRoll2 translate
#' @importFrom Stringendo kpp kppu iprint
#' @importFrom Seurat FetchData
AutoNumber.by.UMAP <- function(obj = combined.obj,
                               reduction = "umap",
                               dim = 1, swap = FALSE,
                               ident = GetClusteringRuns(obj = obj)[1],
                               plot = TRUE,
                               obj.version = obj@version) {
  dim_name <- kppu(reduction, dim)
  if (obj.version < "5") dim_name <- toupper(dim_name)
  message("Obj. version: ", obj.version, " \ndimension name: ", dim_name)
  message("Resolution: ", ident)

  stopifnot("Identity not found." = ident %in% colnames(obj@meta.data))

  coord.umap <- obj@reductions$umap@cell.embeddings[, dim_name]

  # Get cluster labels
  identX <- as.character(obj@meta.data[[ident]])

  # Order clusters by median coordinate
  ls.perCl <- split(coord.umap, f = identX)
  MedianClusterCoordinate <- sapply(ls.perCl, median)

  OldLabel <- names(sort(MedianClusterCoordinate, decreasing = swap))
  NewLabel <- as.character(0:(length(MedianClusterCoordinate) - 1))
  NewMeta <- CodeAndRoll2::translate(vec = identX, old = OldLabel, new = NewLabel)
  NewMetaCol <- kpp(ident, "ordered")
  iprint("NewMetaCol:", NewMetaCol)

  obj[[NewMetaCol]] <- NewMeta
  if (plot) {
    x <- clUMAP(obj, ident = NewMetaCol)
    print(x)
  }
  return(obj)
}


# _________________________________________________________________________________________________
# General ______________________________ ----
# _________________________________________________________________________________________________

# _________________________________________________________________________________________________
# DGEA and GO-term enrichment ______________________________ ----
# _________________________________________________________________________________________________


#' @title scEnhancedVolcano
#'
#' @description This function creates an enhanced volcano plot.
#'
#' @param toptable A data frame with the results of differential gene expression analysis.
#' @param lab A vector of gene symbols to label on the plot.
#' @param suffix A string to append to the filename/title of the plot.
#' @param title The title of the plot.
#' @param suffix A string to append to the filename/title of the plot.
#' @param caption The first line of caption of the plot.
#' @param caption2 The second line of caption of the plot.
#' @param x The x-axis, which is typically the average log2 fold change.
#' @param y The y-axis, which is typically the adjusted p-value.
#' @param selectLab A vector of gene symbols to select for labeling.
#' @param min.p The minimum p-value, to trim high values on the Y-axis.
#' @param max.l2fc The maximum log2 fold change, to trim high values on the X-axis.
#' @param min.pct.cells The minimum percentage of cells in which a gene must be expressed to be included in the plot.
#' @param pCutoffCol The column in the toptable that contains the p-value cutoff.
#' @param pCutoff The p-value cutoff.
#' @param FCcutoff The fold change cutoff.
#' @param count_stats Logical. Calculates a data frame with the count statistics.
#' @param drawConnectors Whether to draw connectors between the labels and the points.
#' @param max.overlaps The maximum number of labels that can overlap.
#' @param min.p The minimum p-value, to trim high values on the Y-axis.
#' @param h The height of the plot.
#' @param w The width of the plot.
#' @param ... Pass any other parameter to `EnhancedVolcano::EnhancedVolcano()`.
#'
#' @return A ggplot object.
#'
#' @importFrom EnhancedVolcano EnhancedVolcano
#'
#' @export

scEnhancedVolcano <- function(
    toptable,
    x = "avg_log2FC",
    y = "p_val_adj",
    lab = rownames(toptable),
    title = paste("DGEA"),
    selectLab = trail(lab, 10),
    min.p = 1e-50,
    max.l2fc = Inf,
    min.pct.cells = 0.1,
    pCutoffCol = "p_val_adj",
    pCutoff = 1e-3,
    FCcutoff = 1,
    suffix = NULL,
    caption = paste("Min. Fold Change in Input:", .estMinimumFC(toptable)),
    caption2 = paste("min p_adj:", min.p, "(Y-axis values clipped at)"),
    count_stats = TRUE,
    drawConnectors = TRUE, max.overlaps = Inf,
    h = 9, w = h,
    ...) {
  #
  message(
    "\nMin. log2fc: ", FCcutoff, "\nMax. p-adj: ", pCutoff,
    "\nMin. p-adj (trim high y-axis): ", min.p,
    "\nMin. pct cells expressing: ", min.pct.cells
  )
  stopifnot(nrow(toptable) > 5)


  # Filter min. cells expressing.
  toptable <- toptable |> dplyr::filter(pct.1 > min.pct.cells | pct.2 > min.pct.cells)

  # calculate true min pct cells expressing (maybe input prefiltered above thr. already).
  min.pct.cells <- toptable |>
    select(pct.1, pct.2) |>
    as.matrix() |>
    rowMax() |>
    min()

  # Clip p-values.
  toptable[["p_val_adj"]] <-
    clip.at.fixed.value(x = toptable[["p_val_adj"]], thr = min.p, high = FALSE)

  # Clip log2FC.
  if (max.l2fc < Inf) {
    toptable[["avg_log2FC"]] <-
      clip.at.fixed.value(x = toptable[["avg_log2FC"]], thr = -max.l2fc, high = FALSE)
    toptable[["avg_log2FC"]] <-
      clip.at.fixed.value(x = toptable[["avg_log2FC"]], thr = max.l2fc, high = TRUE)
  }

  # Add statistical information to the subtitle.
  if (count_stats) {
    enr_stats <- unlist(countRelevantEnrichments(
      df = toptable, logfc_col = x, pval_col = y,
      logfc_cutoff = FCcutoff, pval_cutoff = pCutoff
    ))
    stat_info <- kppws("Genes", intermingle2vec(names(enr_stats), enr_stats), "(red)")
    subtitle <- paste0(
      stat_info, "\n",
      paste(
        "Cutoffs: max.p_adj: ", pCutoff, " |  min.log2FC: ", FCcutoff,
        " |  min.pct.cells: ", min.pct.cells
      )
    )
  }
  caption <- paste0(caption, "\n", caption2)

  # Create an enhanced volcano plot.
  # try.dev.off();
  pobj <- EnhancedVolcano::EnhancedVolcano(
    toptable = toptable,
    x = x, y = y,
    title = title, subtitle = subtitle,
    lab = lab, selectLab = selectLab,
    caption = caption,
    pCutoffCol = pCutoffCol,
    pCutoff = pCutoff,
    FCcutoff = FCcutoff,
    drawConnectors = drawConnectors,
    max.overlaps = max.overlaps,
    ...
  )

  print(pobj)
  # Save the plot.
  qqSave(
    ggobj = pobj, title = paste0("Volcano.", make.names(title), suffix),
    h = h, w = w
  )
  return(pobj)
}

# ________________________________________________________________________
#' @title Estimate Minimum Log2-Based Fold Change
#'
#' @description This function estimates the minimum log2-based fold change from a data frame column.
#'
#' @param df A data frame containing the fold change data. Default: `df.m.UL`.
#' @param col A character string specifying the column name containing log2 fold change values. Default: "avg_log2FC".
#'
#' @return The minimum log2-based fold change, rounded and transformed from log2 to linear scale.
#'
#' @examples
#' \dontrun{
#' df <- data.frame(avg_log2FC = c(-1, -0.5, 0.5, 1))
#' .estMinimumFC(df, "avg_log2FC")
#' # .estMinimumFC(df = df.m.UL, col = "avg_log2FC")
#' }
#' @return The minimum log2-based fold change, rounded and transformed from log2 to linear scale.

.estMinimumFC <- function(df, col = "avg_log2FC") {
  lfc <- df[[col]]
  lfc_enr <- min(lfc[lfc > 0])
  lfc_depl <- abs(max(lfc[lfc < 0]))
  estim_min_l2fc <- min(lfc_enr, lfc_depl)
  return(iround(2^estim_min_l2fc))
}


# ________________________________________________________________________
#' @title Count Relevant Enrichments
#'
#' @description This function counts the number of relevantly expressed genes from a differential
#' gene expression table. It considers genes to be relevant if they fall under a maximum p-value
#' cutoff and are above a minimum log2 fold change cutoff. The function reports the number of
#' enriched and depleted genes.
#'
#' @param df Data frame containing the gene expression data.
#' @param pval_col Character. Name of the column containing the adjusted p-values. Default: "p_val_adj".
#' @param logfc_col Character. Name of the column containing the log2 fold change values. Default: "avg_log2FC".
#' @param pval_cutoff Numeric. The maximum adjusted p-value to consider a gene relevant. Default: 1e-2.
#' @param logfc_cutoff Numeric. The minimum log2 fold change to consider a gene relevant. Default: 1.
#'
#' @return A list with the counts of enriched and depleted genes.
#' @export
#'
#' @examples
#' df <- data.frame(
#'   p_val_adj = c(0.001, 0.02, 0.03, 0.0001),
#'   avg_log2FC = c(1.5, -2, 0.5, 2)
#' )
#' countRelevantEnrichments(df)
countRelevantEnrichments <- function(df,
                                     pval_col = "p_val_adj", logfc_col = "avg_log2FC",
                                     pval_cutoff = 1e-2, logfc_cutoff = 1) {
  #
  stopifnot(
    is.data.frame(df),
    pval_col %in% colnames(df),
    logfc_col %in% colnames(df),
    is.numeric(pval_cutoff),
    is.numeric(logfc_cutoff)
  )

  relevant_genes <- df |>
    dplyr::filter(!!sym(pval_col) <= pval_cutoff)

  enriched_count <- relevant_genes |>
    dplyr::filter(!!sym(logfc_col) >= logfc_cutoff) |>
    nrow()

  depleted_count <- relevant_genes |>
    dplyr::filter(!!sym(logfc_col) <= -logfc_cutoff) |>
    nrow()

  return(list(enriched = enriched_count, depleted = depleted_count))
}


# ________________________________________________________________________
#' @title Perform GO Enrichment Analysis
#'
#' @description This function performs Gene Ontology (GO) enrichment analysis using the
#' `clusterProfiler::enrichGO` function. It takes the gene list, universe, organism database,
#' gene identifier type, and ontology type as inputs and returns the enrichment results.
#'
#' @param genes Character vector. List of genes for enrichment analysis. Default: NULL.
#' @param universe Character vector. Background gene list (universe). Default: NULL.
#' @param org_db Character. Organism-specific database to use (e.g., 'org.Hs.eg.db'). Default: 'org.Hs.eg.db'.
#' @param key_type Character. Gene identifier type (e.g., 'SYMBOL', 'ENTREZID'). Default: 'SYMBOL'.
#' @param ont Character. Ontology type to use (e.g., 'BP', 'MF', 'CC'). Default: 'BP'.
#' @param pAdjustMethod Character. Method for p-value adjustment. Default: 'BH' for Benjamini-Hochberg.
#' @param pvalueCutoff Numeric. P-value cutoff for significance. Default: 0.05.
#' @param qvalueCutoff Numeric. Q-value cutoff for significance. Default: 0.2.
#' @param save Logical. Save the results as a data frame. Default: `TRUE`.
#' @param suffix Character. Suffix to append to the output file name. Default: 'GO.Enrichments'.
#' @param check.gene.symbols Logical. Check gene symbols for validity. Default: `TRUE`.
#' @param ... Additional arguments to pass to `clusterProfiler::enrichGO`.

#'
#' @return A data frame with GO enrichment results.
#' @export
#'
#' @examples
#' \dontrun{
#' gene_list <- rownames(df.m.DL.up.in.TSC)
#' background_genes <- names(all.genes)
#' go_results <- performGOEnrichment(gene_list, background_genes, "org.Hs.eg.db", "SYMBOL", "BP")
#' print(go_results)
#' }
scGOEnrichment <- function(genes, universe = NULL,
                           org_db = "org.Hs.eg.db", key_type = "SYMBOL", ont = "BP",
                           pAdjustMethod = "BH", pvalueCutoff = 0.05, qvalueCutoff = 0.2,
                           save = TRUE,
                           suffix = NULL,
                           check.gene.symbols = TRUE,
                           ...) {
  # Load required library
  stopifnot("Package 'clusterProfiler' must be installed to use this function." = require("clusterProfiler"))

  # Input assertions
  stopifnot(
    is.character(genes) | is.null(genes),
    (is.character(universe) | is.null(universe)),
    (length(universe) > 100 | is.null(universe)),
    is.character(org_db), is.character(key_type), is.character(ont),
    is.character(ont)
  )

  if (is.null(genes) | length(genes) == 0) {
    return(NULL)
  }

  # check.gene.symbols
  if (check.gene.symbols) {
    x <- checkGeneSymbols(genes, species = "human")
    genes <- x[x[, "Approved"], 1]
  }

  message("Performing enrichGO() analysis...")
  message(length(genes), " approved genes of interest, in ", length(universe), " background genes.")

  go_results <- clusterProfiler::enrichGO(
    gene = genes,
    universe = universe,
    pAdjustMethod = pAdjustMethod,
    OrgDb = org_db,
    keyType = key_type,
    pvalueCutoff = pvalueCutoff,
    qvalueCutoff = qvalueCutoff,
    ont = ont,
    ...
  )

  nr_of_enr_terms <- length(go_results@result$"ID")

  # Output assertions
  if (nrow(go_results) < 1) warning("No enriched terms found!", immediate. = TRUE)

  if (save) {
    xsave(go_results,
      suffix = kpp("enr", nr_of_enr_terms, suffix),
      showMemObject = FALSE, saveParams = FALSE, allGenes = FALSE
    )
  }
  message("\nNr of enriched terms: ", nr_of_enr_terms)

  return(go_results)
}




# ________________________________________________________________________
#' @title Barplot GO Enrichment Results by enrichplot
#'
#' @description This function creates a bar plot of GO enrichment analysis results using the
#' `enrichplot::barplot.enrichResult` function. It also allows saving the plot to a file.
#'
#' @param df.enrichment Data frame. Enrichment results from GO analysis. Default: NULL.
#' @param tag Character. Tag to be added to the title of the plot. Default: "in ...".
#' @param universe Character. Background gene list (universe). Default: `df.enrichment@universe`.
#' @param title Character. Title of the plot. Default: "GO Enrichment Analysis" followed by `tag`.
#' @param subtitle Character. Subtitle of the plot. Default: NULL.
#' @param caption Character. Caption of the plot. Default: constructed from input parameters.
#' @param save Logical. Whether to save the plot to a file. Default: `TRUE`.
#' @param h Height of the plot canvas, calculated as the height of an A4 page times `scale`; Default: `8.27 * scale`.
#' @param w Width of the plot canvas, calculated as the width of an A4 page times `scale`; Default: `11.69 * scale`.
#' @param also.pdf Save plot in both png and pdf formats.
#' @param ... Additional arguments passed to `enrichplot::barplot.enrichResult`.
#'
#' @importFrom ggplot2 labs
#'
#' @return None. The function prints the plot and optionally saves it.
#' @export
#'
#' @examples
#' \dontrun{``
#' df.enrichment <- data.frame() # Example enrichment results data frame
#' plotGOEnrichment(df.enrichment)
#' }
scBarplotEnrichr <- function(df.enrichment,
                             tag = "...",
                             universe = df.enrichment@universe,
                             title = paste("GO Enriched Terms", tag),
                             subtitle = kppws("Input: ", substitute_deparse(df.enrichment)),
                             caption = paste0(
                               "Input genes: ", length(df.enrichment@"gene"),
                               " | Enriched terms: ", nrow(df.enrichment),
                               " | background genes: ", length(universe)
                             ),
                             save = TRUE,
                             w = 10, h = 10,
                             also.pdf = FALSE,
                             ...) {
  stopifnot("Package 'enrichplot' must be installed to use this function." = require("enrichplot"))

  if (tag == "...") warning("Please provide a tag describing where are the enrichments.", immediate. = TRUE)
  nr_input_genes <- length(df.enrichment@"gene")

  pobj <-
    if (nrow(df.enrichment) < 1 | is.null(df.enrichment)) {
      warning("No enriched terms input!", immediate. = TRUE)
      ggplot() +
        theme_void() +
        annotate("text",
          x = 1, y = 1, label = "NO ENRICHMENT",
          size = 8, color = "red", hjust = 0.5, vjust = 0.5
        )
    } else if (nr_input_genes < 5) {
      warning("Very few inputs for GOENR", immediate. = TRUE)
      ggplot() +
        theme_void() +
        annotate("text",
          x = 1, y = 1, label = "TOO FEW GENES (<5)",
          size = 8, color = "red", hjust = 0.5, vjust = 0.5
        )
    } else {
      enrichplot:::barplot.enrichResult(df.enrichment, showCategory = 20)
    }
  pobj <- pobj + ggplot2::labs(title = title, subtitle = subtitle, caption = caption)

  if (save) {
    qqSave(pobj, title = title, w = w, h = h, also.pdf = also.pdf)
  }

  return(pobj)
}


# ________________________________________________________________________
#' @title Filter GO Enrichment Results
#'
#' @description This function filters GO enrichment results based on adjusted p-value and q-value
#' cutoffs, and retrieves the descriptions of the filtered results.
#'
#' @param df.enrichments An object of class `enrichResult` containing the GO enrichment results.
#' @param colname Character. The name of the column containing the GO-term names, or else.
#' @param pvalueCutoff Numeric. The p-value cutoff for filtering the results. Default: NULL, meaning
#' that the default cutoff of the input object is used. It is stored in `df.enrichments@pvalueCutoff`.
#' @param qvalueCutoff Numeric. The q-value cutoff for filtering the results. Default: NULL,
#' meaning that the default cutoff of the input object is used. It is stored in `df.enrichments@qvalueCutoff`.
#'
#' @return A character vector of descriptions of the filtered GO enrichment results.
#'
#' @examples
#' # Assuming GO.Enriched.DL.Ctrl is an object of class `enrichResult` created by clusterprofiler or equivalent
#' descriptions <- filterGoEnrichment(GO.Enriched.DL.Ctrl)
#' print(descriptions)
#'
#' @importFrom dplyr filter pull
#'
#' @export
filterGoEnrichment <- function(df.enrichments,
                               pvalueCutoff = NULL,
                               qvalueCutoff = NULL,
                               colname = "Description") {
  # Input assertions
  stopifnot(
    "enrichResult" %in% class(df.enrichments),
    !is.null(df.enrichments@result),
    !is.null(df.enrichments@pvalueCutoff),
    !is.null(df.enrichments@qvalueCutoff)
  )

  pvalueCutoff <- if (is.null(pvalueCutoff)) df.enrichments@pvalueCutoff else pvalueCutoff
  qvalueCutoff <- if (is.null(qvalueCutoff)) df.enrichments@qvalueCutoff else qvalueCutoff

  message(paste(
    "Filtering GO enrichment results with \np-value cutoff",
    pvalueCutoff, "and q-value cutoff", qvalueCutoff
  ))

  # Filter and retrieve GO
  descriptions <- df.enrichments@result |>
    dplyr::filter(p.adjust < pvalueCutoff) |>
    dplyr::filter(qvalue < qvalueCutoff) |>
    dplyr::pull(!!sym(colname))

  # Output assertions
  stopifnot(is.character(descriptions))
  message("\nNr of enriched terms: ", length(descriptions))

  return(descriptions)
}

# Example usage
# Assuming GO.Enriched.DL.Ctrl is an object of class `enrichResult`
# descriptions <- filterGoEnrichment(GO.Enriched.DL.Ctrl)
# print(descriptions)


# ________________________________________________________________________
#' @title Count Enriched and Depleted Genes
#'
#' @description This function counts the number of significantly enriched and depleted genes
#' based on the provided criteria. It filters the genes based on adjusted p-value and
#' logarithm of fold change.
#'
#' @param df A dataframe containing the result of the differential gene expression analysis.
#' @param min_padj A numeric value specifying the minimum adjusted p-value. Default: 0.01.
#' @param min_logFC A numeric value specifying the minimum logarithm to fold change.
#' Default: 0.5. This value should be positive and will be used as a negative value for
#' depleted genes.
#' @param colname.p A string specifying the column name for the adjusted p-value in the dataframe.
#' Default: 'p_val_adj'.
#' @param colname.lFC A string specifying the column name for the logarithm to fold change in
#' the dataframe. Default: 'avg_log2FC'.
#'
#' @return A list of two elements:
#' \item{GeneCounts}{A named numeric vector containing the numbers of enriched and depleted genes.}
#' \item{Parameters}{A named numeric vector containing the parameter names and their values.}
#'
#' @examples
#' df <- data.frame(
#'   p_val = c(5.580902e-14, 4.607790e-12, 1.643436e-11),
#'   avg_log2FC = c(0.4985875, 0.4983416, 0.4977825),
#'   pct.1 = c(0.429, 0.575, 0.387),
#'   pct.2 = c(0.251, 0.396, 0.232),
#'   p_val_adj = c(1.091513e-09, 9.011916e-08, 3.214233e-07)
#' )
#' result <- countEnrichedDepletedGenes(df)
#' print(result)
#'
#' @export

countEnrichedDepletedGenes <- function(df, min_padj = 0.01, min_logFC = 0.5,
                                       # genes = rownames(df),
                                       colname.p = "p_val_adj", colname.lFC = "avg_log2FC") {
  stopifnot(
    min_padj > 0,
    min_logFC > 0,
    colname.p %in% colnames(df),
    colname.lFC %in% colnames(df)
  )

  # Filter the dataframe for enriched genes
  idx.enr <- df[[colname.p]] <= min_padj & df[[colname.lFC]] >= min_logFC
  enriched_genes <- df[idx.enr, ]
  enriched_symbols <- rownames(enriched_genes)
  # enriched_symbols <- genes[idx.enr]

  # Filter the dataframe for depleted genes
  idx.depl <- (df[[colname.p]] <= min_padj & df[[colname.lFC]] <= -min_logFC)

  depleted_genes <- df[idx.depl, ]
  depleted_symbols <- rownames(depleted_genes)
  # depleted_symbols <- genes[idx.depl]

  # Create the named numeric vectors
  gene_counts <- c("Enriched" = nrow(enriched_genes), "Depleted" = nrow(depleted_genes))
  print("gene_counts")
  print(gene_counts)
  parameters <- c("min_padj" = min_padj, "min_logFC" = min_logFC)
  print("parameters")
  print(parameters)

  # Create the list of gene symbols
  gene_symbols <- list("Enriched" = enriched_symbols, "Depleted" = depleted_symbols)

  # Return the results as a list
  result <- list(gene_counts, parameters, gene_symbols)
  names(result) <- c("GeneCounts", "Parameters", "GeneSymbols")

  return(result)
}

# ________________________________________________________________________



# _________________________________________________________________________________________________
# Helpers ______________________________ ----
# _________________________________________________________________________________________________

#' @title Adjust Layout Parameters for multi* plotting fucntions
#'
#' @description Adjusts layout dimensions and properties based on the specified layout type.
#'              Updates the provided environment with new dimensions and layout configuration.
#'
#' @param layout A string specifying the layout type. Can be either "tall" or "wide". Default: NULL.
#' @param scaling A numeric scaling factor to adjust the dimensions. Default: 1.
#' @param wA4 The width of the A4 paper in inches. Default: 8.27.
#' @param hA4 The height of the A4 paper in inches. Default: 11.69.
#' @param env The environment where the layout dimensions and properties should be assigned.
#'            Default: parent.frame().
#'
#' @return Invisible NULL. The function operates by side effects, updating the `env` environment.
#' @examples
#' env <- new.env()
#' .adjustLayout("tall", 1, 8.27, 11.69, env)
#' print(env$w) # Should print the width based on "tall" layout scaling.
#'
.adjustLayout <- function(layout, scaling, wA4, hA4, env) {
  # Input checks
  stopifnot(
    is.character(layout), is.numeric(scaling), is.numeric(wA4),
    is.numeric(hA4), is.environment(env),
    layout %in% c("tall", "wide")
  )

  if (layout == "tall") {
    assign("w", wA4 * scaling, envir = env)
    assign("h", hA4 * scaling, envir = env)
    assign("nr.Col", 2, envir = env)
    assign("nr.Row", 4, envir = env)
    message("tall layout active, nr.Col ignored.")
  } else if (layout == "wide") {
    assign("w", hA4 * scaling, envir = env)
    assign("h", wA4 * scaling, envir = env)
    assign("nr.Col", 2, envir = env) # Adjusted for consistency with wide layout explanation
    assign("nr.Row", 2, envir = env)
    message("wide layout active, nr.Col ignored.")
  } else {
    message("No specific layout selected, defaulting to input parameters.")
  }
}


# _________________________________________________________________________________________________
# Saving plots ______________________________ ----
# _________________________________________________________________________________________________



# _________________________________________________________________________________________________
#' @title Save Two Plots on One A4 Page
#'
#' @description Arranges and saves two UMAP plots (or any plots) side-by-side or one above
#' the other on a single A4 page.
#'
#' @param plot_list A list containing ggplot objects to be arranged and saved.
#' @param pname Boolean indicating if the plot name should be automatically generated;
#' if FALSE, the name is based on `plot_list` and `suffix`; Default: `FALSE`.
#' @param suffix Suffix to be added to the generated filename if `pname` is FALSE; Default: NULL.
#' @param scale Scaling factor for adjusting the plot size; Default: 1.
#' @param nrow Number of rows in the plot arrangement; Default: 2.
#' @param ncol Number of columns in the plot arrangement; Default: 1.
#' @param h Height of the plot, calculated as A4 height times `scale`;
#' calculated dynamically based on `scale`.
#' @param w Width of the plot, calculated as A4 width times `scale`;
#' calculated dynamically based on `scale`.
#' @param ... Additional parameters passed to `plot_grid`.
#'
#' @examples
#' \dontrun{
#' p1 <- ggplot(iris, aes(Sepal.Length, Sepal.Width, color = Species)) +
#'   geom_point()
#' p2 <- ggplot(iris, aes(Petal.Length, Petal.Width, color = Species)) +
#'   geom_point()
#' save2plots.A4(plot_list = list(p1, p2))
#' }
#'
#' @export
#' @importFrom cowplot plot_grid save_plot ggdraw
#' @importFrom ggplot2 theme
save2plots.A4 <- function(
    plot_list, pname = FALSE, suffix = NULL, scale = 1,
    nrow = 2, ncol = 1,
    h = 11.69 * scale, w = 8.27 * scale, ...) {
  if (pname == FALSE) pname <- sppp(substitute_deparse(plot_list), suffix)
  p1 <- cowplot::plot_grid(
    plotlist = plot_list, nrow = nrow, ncol = ncol,
    labels = LETTERS[1:length(plot_list)], ...
  )
  p1 <- cowplot::ggdraw(p1) +
    theme(plot.background = element_rect(fill = "white", color = NA))

  print("Saved as:")
  MarkdownHelpers::ww.FnP_parser(extPNG(pname))

  save_plot(plot = p1, filename = extPNG(pname), base_height = h, base_width = w)
}

# _________________________________________________________________________________________________
#' @title Save Four Plots on One A4 Page
#'
#' @description Arranges and saves four plots (e.g. UMAPs) onto a single A4 page, allowing for a
#' compact comparison of different visualizations or clustering results.
#'
#' @param plot_list A list containing ggplot objects to be arranged and saved; each object represents one panel.
#' @param pname Plot name; if FALSE, a name is generated automatically based on `plot_list` and `suffix`; Default: `FALSE`.
#' @param suffix Suffix to be added to the filename; Default: NULL.
#' @param scale Scaling factor for adjusting the size of the overall plot canvas; Default: 1.
#' @param nrow Number of rows to arrange the plots in; Default: 2.
#' @param ncol Number of columns to arrange the plots in; Default: 2.
#' @param h Height of the plot canvas, calculated as the height of an A4 page times `scale`; Default: `8.27 * scale`.
#' @param w Width of the plot canvas, calculated as the width of an A4 page times `scale`; Default: `11.69 * scale`.
#' @param ... Additional parameters passed to `plot_grid`.
#'
#' @examples
#' \dontrun{
#' p1 <- ggplot(iris, aes(Sepal.Length, Sepal.Width, color = Species)) +
#'   geom_point()
#' p2 <- ggplot(mtcars, aes(mpg, disp, color = as.factor(cyl))) +
#'   geom_point()
#' p3 <- ggplot(mpg, aes(displ, hwy, color = class)) +
#'   geom_point()
#' p4 <- ggplot(diamonds, aes(carat, price, color = cut)) +
#'   geom_point()
#' save4plots.A4(plot_list = list(p1, p2, p3, p4))
#' }
#'
#' @export
#' @importFrom cowplot plot_grid save_plot ggdraw
#' @importFrom ggplot2 theme
save4plots.A4 <- function(
    plot_list, pname = FALSE, suffix = NULL, scale = 1,
    nrow = 2, ncol = 2,
    h = 8.27 * scale, w = 11.69 * scale,
    ...) {
  if (pname == FALSE) pname <- sppp(substitute_deparse(plot_list), suffix)
  p1 <- cowplot::plot_grid(
    plotlist = plot_list, nrow = nrow, ncol = ncol,
    labels = LETTERS[1:length(plot_list)], ...
  )
  # https://stackoverflow.com/questions/13691415/change-the-background-color-of-grid-arrange-output
  p1 <- cowplot::ggdraw(p1) +
    theme(plot.background = element_rect(fill = "white", color = NA))

  iprint("Saved as:", pname)
  MarkdownHelpers::ww.FnP_parser(extPNG(pname))
  save_plot(plot = p1, filename = extPNG(pname), base_height = h, base_width = w)
}



# _________________________________________________________________________________________________
#' @title qqSaveGridA4
#'
#' @description Saves a grid of 2 or 4 ggplot objects onto an A4 page.
#' @param plotlist A list of ggplot objects. Default: pl.
#' @param plots A numeric vector indicating the indices of the plots to save from the 'plotlist'. Default: 1:2.
#' @param NrPlots Number of plots to save. Default: length(plots).
#' @param height Height for the saved image. Default: 11.69.
#' @param width Width for the saved image. Default: 8.27.
#' @param fname File name for the saved image. Default: "Fractions.Organoid-to-organoid variation.png".
#' @param ... Additional arguments passed to the plot_grid function.
#' @return This function does not return a value. It saves a grid plot of ggplot objects to the specified file.
#' @examples
#' \dontrun{
#' if (interactive()) {
#'   qqSaveGridA4(plotlist = pl, plots = 1:2, fname = "Fractions.per.Cl.png")
#'   qqSaveGridA4(plotlist = pl, plots = 1:4, fname = "Fractions.per.Cl.4.png")
#' }
#' }
#' @seealso
#' \code{\link[cowplot]{plot_grid}}
#' @importFrom cowplot plot_grid
#'
#' @export
qqSaveGridA4 <- function(
    plotlist = pl,
    plots = 1:2, NrPlots = length(plots), height = 11.69, width = 8.27,
    fname = "Fractions.Organoid-to-organoid variation.png", ...) {
  stopifnot(NrPlots %in% c(2, 4))
  iprint(NrPlots, "plots found,", plots, "are saved.")
  pg.cf <- cowplot::plot_grid(plotlist = plotlist[plots], nrow = 2, ncol = NrPlots / 2, labels = LETTERS[1:NrPlots], ...)
  if (NrPlots == 4) list2env(list(height = width, width = height), envir = as.environment(environment()))
  MarkdownHelpers::ww.FnP_parser(fname)
  save_plot(
    filename = fname,
    plot = pg.cf, base_height = height, base_width = width
  )
}



# _________________________________________________________________________________________________
# plotting.dim.reduction.3D.R ______________________________ ----
# _________________________________________________________________________________________________
# source('~/GitHub/Packages/Seurat.utils/Functions/plotting.dim.reduction.3D.R')
# try (source("https://raw.githubusercontent.com/vertesy/Seurat.utils/master/Functions/Plotting.dim.reduction.3D.R"))
# Source: self + https://github.com/Dragonmasterx87/Interactive-3D-Plotting-in-Seurat-3.0.0

# Requirements __________________________________________
# try(library(plotly), silent = TRUE)
# try(library(MarkdownReports), silent = TRUE)
# try(library(htmlwidgets), silent = TRUE)

# May also require
# try (source('~/GitHub/Packages/CodeAndRoll/CodeAndRoll.R'),silent= TRUE) # generic utilities funtions
# require('MarkdownReports') # require("devtools")



# _________________________________________________________________________________________________
#' @title Check Quantile Cutoff and Clip Outliers
#'
#' @description Checks a specified quantile cutoff and clips outliers from an expression vector,
#' ensuring that a minimum number of cells expressing a gene remain.
#'
#' @param expr.vec A numeric vector representing gene expression data.
#' @param quantileCutoffX The quantile cutoff for clipping outliers.
#' @param min.cells.expressing The minimum number of cells that should remain expressing after clipping.
#' @return The expression vector with outliers clipped, ensuring the minimum number of cells expressing.
#' @examples
#' \dontrun{
#' expr.vec <- c(...)
#' quantileCutoff <- 0.99
#' min.cells.expressing <- 10
#' ww.check.quantile.cutoff.and.clip.outliers(expr.vec, quantileCutoff, min.cells.expressing)
#' }
#' @export
#' @importFrom CodeAndRoll2 clip.outliers.at.percentile
#'
ww.check.quantile.cutoff.and.clip.outliers <- function(expr.vec = plotting.data[, gene],
                                                       quantileCutoffX = quantileCutoff,
                                                       min.cells.expressing = 10) {
  expr.vec.clipped <-
    CodeAndRoll2::clip.outliers.at.percentile(expr.vec, probs = c(1 - quantileCutoffX, quantileCutoffX))
  if (sum(expr.vec.clipped > 0) > min.cells.expressing) {
    expr.vec <- expr.vec.clipped
  } else {
    iprint("WARNING: quantile.cutoff too stringent, would leave <", min.cells.expressing, "cells. It is NOT applied.")
  }
  return(expr.vec)
}


# _________________________________________________________________________________________________
#' @title plot3D.umap.gene
#'
#' @description Plot a 3D umap with gene expression. Uses plotly. Based on github.com/Dragonmasterx87.
#' @param gene The gene of interest. Default: 'TOP2A'
#' @param obj The Seurat object for which the 3D umap plot will be generated. Default: combined.obj
#' @param quantileCutoff Cutoff value for the quantile for clipping outliers in the gene expression data. Default: 0.99
#' @param def.assay The default assay to be used. Choose between "integrated" and "RNA". Default: "RNA"
#' @param suffix A suffix added to the filename. Default: NULL
#' @param annotate.by The cluster or grouping to be used for automatic annotation. Default: First returned result from GetNamedClusteringRuns(obj) function.
#' @param alpha Opacity of the points in the plot. Default: 0.5
#' @param dotsize The size of the dots in the plot. Default: 1.25
#' @param ... Pass any other parameter to the internally called `plotly::plot_ly`.
#' @examples
#' \dontrun{
#' if (interactive()) {
#'   plot3D.umap.gene(obj = combined.obj, gene = "DDIT4", quantileCutoff = .95)
#'   plot3D.umap.gene(obj = combined.obj, gene = "percent.mito", quantileCutoff = .95) # for continous meta variables
#'   plot3D.umap.gene(obj = combined.obj, gene = "nFeature_RNA", quantileCutoff = .95) # for continous meta variables
#' }
#' }
#' @importFrom plotly plot_ly layout
#' @importFrom Seurat FetchData
#'
#' @export

plot3D.umap.gene <- function(
    gene = "TOP2A",
    obj = combined.obj,
    annotate.by = GetNamedClusteringRuns(obj = obj, v = FALSE)[1],
    quantileCutoff = .99,
    def.assay = c("integrated", "RNA")[2],
    suffix = NULL,
    alpha = .5,
    dotsize = 1.25,
    col.names = c("umap_1", "umap_2", "umap_3"),
    assay = "RNA",
    ...) {
  # Input assertions ____________________________________
  # browser()
  stopifnot(
    is(obj, "Seurat"),
    is.character(gene),
    "gene or feature not found in obj" = (gene %in% Features(obj, assay = assay) | gene %in% colnames(obj@meta.data)),
    "annotate.by not found in @meta" = (annotate.by %in% colnames(obj@meta.data) | annotate.by == FALSE),
    "reductions.backup is missing from @misc" = is.list(obj@misc$"reductions.backup"),
    "umap3d is missing from @misc$reductions.backup" = is(obj@misc$reductions.backup$"umap3d", class2 = "DimReduc"),
    "reductionn has 3 columns" = (ncol(obj@misc$reductions.backup$"umap3d") == 3),
    "3D reduction has >/< cells than object" = (ncol(obj) == nrow(obj@misc$reductions.backup$"umap3d"@cell.embeddings))
  )
  # browser()

  if (obj@version < "5") col.names <- toupper(col.names)
  message("Obj. version: ", obj@version, " \ndim names: ", kppc(col.names))

  DefaultAssay(object = obj) <- def.assay
  iprint(DefaultAssay(object = obj), "assay")

  # Get and format 3D plotting data ____________________________________
  plotting.data <- obj@misc$reductions.backup$"umap3d"@cell.embeddings
  colnames(plotting.data) <- toupper(col.names)


  Expression <- Seurat::FetchData(object = obj, vars = gene)
  plotting.data <- cbind(plotting.data, Expression)

  plotting.data$"Expression" <- ww.check.quantile.cutoff.and.clip.outliers(
    expr.vec = plotting.data[, gene],
    quantileCutoffX = quantileCutoff, min.cells.expressing = 10
  )

  # CodeAndRoll2::clip.outliers.at.percentile(plotting.data[, gene], probs = c(1 - quantileCutoff, quantileCutoff))
  plotting.data$"label" <- paste(rownames(plotting.data), " - ", plotting.data[, gene], sep = "")

  ls.ann.auto <- if (annotate.by != FALSE) {
    .Annotate4Plotly3D(obj = obj, plotting.data. = plotting.data, annotation.category = annotate.by)
  } else {
    NULL
  }

  # plt <- plotly::plot_ly(
  #   data = plotting.data,
  #   x = ~UMAP_1, y = ~UMAP_2, z = ~UMAP_3,
  #   type = "scatter3d",
  #   mode = "markers",
  #   marker = list(size = dotsize),
  #   text = ~label,
  #   color = ~Expression,
  #   opacity = alpha
  #   # , colors = c('darkgrey', 'red')
  #   , colorscale = "Viridis"
  #   # , hoverinfo="text"
  #   , ...
  # )
  plt <- plotly::plot_ly(
    data = plotting.data,
    x = ~UMAP_1, y = ~UMAP_2, z = ~UMAP_3,
    type = "scatter3d",
    mode = "markers",
    marker = list(
      size = dotsize,
      color = ~Expression,   # Map Expression to color
      colorscale = "Viridis",
      opacity = alpha
    ),
    text = ~label,
    ...
  )  |>
    plotly::layout(title = gene, scene = list(annotations = ls.ann.auto))

  SavePlotlyAsHtml(plt, category. = gene, suffix. = suffix)
  return(plt)
}




# _________________________________________________________________________________________________
#' @title plot3D.umap
#'
#' @description Plot a 3D umap based on one of the metadata columns. Uses plotly. Based on github.com/Dragonmasterx87.
#' @param category The metadata column based on which the 3D UMAP will be plotted.
#' Default: First returned result from GetNamedClusteringRuns(obj) function.
#' @param obj The Seurat object for which the 3D umap plot will be generated. Default: combined.obj
#' @param suffix A suffix added to the filename. Default: NULL
#' @param annotate.by The cluster or grouping to be used for automatic annotation.
#' Default: First returned result from GetNamedClusteringRuns(obj) function.
#' @param dotsize The size of the dots in the plot. Default: 1.25
#' @param ... Pass any other parameter to the internally called `plotly::plot_ly`.
#' @examples
#' \dontrun{
#' if (interactive()) {
#'   plot3D.umap(category = "integrated_snn_res.0.1", obj = combined.obj)
#' }
#' }
#' @importFrom plotly plot_ly layout
#' @importFrom Seurat FetchData
#'
#' @export

plot3D.umap <- function(
    obj = combined.obj,
    category = GetNamedClusteringRuns(obj = obj, v = FALSE)[1],
    annotate.by = category,
    suffix = NULL,
    dotsize = 1.25,
    col.names = c("umap_1", "umap_2", "umap_3"),
    ...) {
  message("category: ", category)
  message("annotate.by: ", annotate.by)



  # Input assertions ____________________________________
  stopifnot(
    is(obj, "Seurat"),
    category %in% colnames(obj@meta.data),
    annotate.by %in% colnames(obj@meta.data),
    "reductions.backup is missing from @misc" = is.list(obj@misc$"reductions.backup"),
    "umap3d is missing from @misc$reductions.backup" = is(obj@misc$reductions.backup$"umap3d", class2 = "DimReduc"),
    "reductionn has 3 columns" = (ncol(obj@misc$reductions.backup$"umap3d") == 3),
    "3D reduction has >/< cells than object" = (ncol(obj) == nrow(obj@misc$reductions.backup$"umap3d"@cell.embeddings))
  )

  if (obj@version < "5") col.names <- toupper(col.names)
  message("Obj. version: ", obj@version, " \ndim names: ", kppc(col.names))

  # Get and format 3D plotting data ____________________________________
  plotting.data <- obj@misc$reductions.backup$"umap3d"@cell.embeddings # plotting.data <- Seurat::FetchData(object = obj, vars = c(col.names, category))
  colnames(plotting.data) <- toupper(col.names)

  plotting.data <- cbind(plotting.data, obj[[category]])
  colnames(plotting.data)[4] <- "category"
  plotting.data$label <- paste(rownames(plotting.data)) # Make a column of row name identities (these will be your cell/barcode names)

  ls.ann.auto <- if (annotate.by != FALSE) {
    .Annotate4Plotly3D(obj = obj, plotting.data. = plotting.data, annotation.category = annotate.by)
  } else {
    NULL
  }

  plt <- plotly::plot_ly(
    data = plotting.data,
    x = ~UMAP_1, y = ~UMAP_2, z = ~UMAP_3,
    type = "scatter3d",
    mode = "markers",
    marker = list(size = dotsize),
    text = ~label,
    color = ~category,
    colors = gg_color_hue(length(unique(plotting.data$"category")))
    # , hoverinfo="text"
    , ...
  ) |>
    plotly::layout(title = category, scene = list(annotations = ls.ann.auto))

  SavePlotlyAsHtml(plt, category. = category, suffix. = suffix)
  return(plt)
}


# _________________________________________________________________________________________________
#' @title SavePlotlyAsHtml
#'
#' @description Save a Plotly 3D scatterplot as an HTML file.
#' @param plotly_obj The Plotly object to save.
#' @param category The category of the plot.
#' @param suffix A suffix to add to the filename.
#' @param OutputDir The output directory.
#' @seealso
#'  \code{\link[htmlwidgets]{saveWidget}}
#' @examples \dontrun{
#' plt <- plotly::plot_ly("some stuff")
#' SavePlotlyAsHtml(plt, category. = "label.categ", suffix. = "test")
#' }
#'
#' @export
#' @importFrom htmlwidgets saveWidget
SavePlotlyAsHtml <- function(plotly_obj, category. = category, suffix. = NULL) { # Save Plotly 3D scatterplot as an html file.
  OutputDir <- if (exists("OutDir")) OutDir else getwd()
  name.trunk <- kpp("umap.3D", category., suffix., idate(), "html")
  fname <- kpps(OutputDir, name.trunk)
  iprint("Plot saved as:", fname)
  htmlwidgets::saveWidget(plotly_obj, file = fname, selfcontained = TRUE, title = category.)
}


# _________________________________________________________________________________________________
#' @title Backup Dimensionality Reduction Data
#'
#' @description This function is mostly used internally.It stores a backup of specified
#' dimensionality reduction data (e.g., UMAP, tSNE, PCA)
#' within the Seurat object, from `obj@reductions$umap` to the `@misc$reductions.backup` slot. This
#' allows to store 2D and 3D UMAP visualizations in parallel and easily switch between them via
#' the `RecallReduction` function.
#'
#' @param obj Seurat object containing dimensionality reduction data; Default: `combined.obj`.
#' @param dim Number of dimensions to include in the backup; Default: 2.
#' @param reduction Type of dimensionality reduction to backup ('umap', 'tsne', 'pca'); Default: 'umap'.
#'
#' @examples
#' \dontrun{
#' if (interactive()) {
#'   obj <- BackupReduction(obj = obj, dim = 2, reduction = "umap")
#' }
#' }
#'
#' @export
BackupReduction <- function(obj = combined.obj, dim = 2, reduction = "umap") { # Backup UMAP to `obj@misc$reductions.backup` from `obj@reductions$umap`.
  if (is.null(obj@misc$"reductions.backup")) obj@misc$"reductions.backup" <- list()
  dslot <- paste0(reduction, dim, "d")
  obj@misc$reductions.backup[[dslot]] <- obj@reductions[[reduction]]
  return(obj)
}



# _________________________________________________________________________________________________
#' @title SetupReductionsNtoKdimensions
#'
#' @description Function to calculate N-to-K dimensional umaps (default = 2:3); and back them up to
#' slots `obj@misc$reductions.backup` from @reductions$umap
#' @param obj A Seurat object. Default: combined.obj
#' @param nPCs A numeric value representing the number of principal components to use. Default: p$n.PC
#' @param dimensions A numeric vector specifying the dimensions to use for the dimensionality reductions. Default: 3:2
#' @param reduction_input The type of dimensionality reduction to use as input. Can be "pca", or
#' some correction results, like harmony pca. Default: 'pca'
#' @param reduction_output Te type of dimensionality reduction to perform.  Can be "umap", "tsne",
#' "pca", or some correctionn results, like harmony pca.  Default: 'umap'
#' @param ... Additional arguments to pass to the dimensionality reduction function.
#' @return The input Seurat object with computed dimensionality reductions and backups of these reductions.
#' @examples
#' \dontrun{
#' if (interactive()) {
#'   combined.obj <- SetupReductionsNtoKdimensions(obj = combined.obj, nPCs = 10, dimensions = 2:3, reduction = "umap")
#' }
#' }
#' @importFrom tictoc tic toc
#'
#' @export
SetupReductionsNtoKdimensions <- function(obj = combined.obj, nPCs = p$"n.PC", dimensions = 3:2,
                                          reduction_input = "pca",
                                          reduction_output = "umap", ...) {
  cat("Starting dimensional reduction setup\n")
  tictoc::tic()

  for (d in dimensions) {
    iprint("Calculating", d, "dimensional", reduction_output)

    # Assign the reduction based on the output type
    obj <- switch(reduction_output,
                  umap = RunUMAP(obj, dims = 1:nPCs, reduction = reduction_input, n.components = d, ...),
                  tsne = RunTSNE(obj, dims = 1:nPCs, reduction = reduction_input, n.components = d, ...),
                  pca = RunPCA(obj, dims = 1:nPCs, n.components = d, ...)
    )

    obj <- BackupReduction(obj = obj, dim = d, reduction = reduction_output)
  }

  tictoc::toc()
  return(obj)
}



# _________________________________________________________________________________________________
#' @title Recall Dimensionality Reduction from backup slot
#'
#' @description Restores dimensionality reduction data (e.g., UMAP, tSNE, PCA) from a backup
#' stored within `obj@misc$reductions.backup` to the active `obj@reductions` slot.
#'
#' @param obj Seurat object from which the backup will be restored; Default: `combined.obj`.
#' @param dim Number of dimensions of the reduction data to restore; Default: 2.
#' @param reduction Type of dimensionality reduction to be restored ('umap', 'tsne', 'pca'); Default: 'umap'.
#'
#' @examples
#' \dontrun{
#' if (interactive()) {
#'   combined.obj <- RecallReduction(obj = combined.obj, dim = 2, reduction = "umap")
#'   qUMAP()
#'   combined.obj <- RecallReduction(obj = combined.obj, dim = 3, reduction = "umap")
#'   qUMAP()
#' }
#' }
#'
#' @export
RecallReduction <- function(obj = combined.obj, dim = 2, reduction = "umap") {
  dslot <- paste0(reduction, dim, "d")
  reduction.backup <- obj@misc$reductions.backup[[dslot]]
  msg <- paste(dim, "dimensional", reduction, "from obj@misc$reductions.backup")
  stopif(is.null(reduction.backup), message = paste0(msg, " is NOT FOUND"))
  iprint(msg, "is set active. ")
  stopifnot(dim == ncol(reduction.backup))
  obj@reductions[[reduction]] <- reduction.backup
  return(obj)
}



# _________________________________________________________________________________________________
#' @title .Annotate4Plotly3D
#'
#' @description Internal helper function. Create annotation labels for 3D plots.
#' Source https://plot.ly/r/text-and-annotations/#3d-annotations.
#' @param obj The Seurat object for which the 3D plot annotations will be generated. Default: combined.obj
#' @param plotting.data. The data frame containing plotting data.
#' @param annotation.category The category for which the annotation is generated.
#' @export
#' @importFrom dplyr group_by summarise
#' @importFrom Seurat FetchData

.Annotate4Plotly3D <- function(
    obj = combined.obj,
    plotting.data.,
    annotation.category) {
  stopifnot(
    "annotation.category is missing" = !is.null(annotation.category),
    "plotting.data. is missing" = !is.null(plotting.data.),
    "annotation.category is not in meta.data" = annotation.category %in% colnames(obj@meta.data)
  )

  plotting.data.$"annot" <- Seurat::FetchData(object = obj, vars = c(annotation.category))[, 1]
  auto_annot <-
    plotting.data. |>
    group_by(annot) |>
    summarise(
      showarrow = FALSE,
      xanchor = "left",
      xshift = 10,
      opacity = 0.7,
      "x" = mean(UMAP_1),
      "y" = mean(UMAP_2),
      "z" = mean(UMAP_3)
    )
  names(auto_annot)[1] <- "text"
  ls.ann.auto <- apply(auto_annot, 1, as.list)
  return(ls.ann.auto)
}

# _________________________________________________________________________________________________
#' @title Plot3D.ListOfGenes
#'
#' @description Plot and save list of 3D UMAP or tSNE plots using plotly.
#' @param obj Seurat object to be used for the plot. Default: combined.obj
#' @param annotate.by Variable to annotate the clusters by. Default: 'integrated_snn_res.0.7'
#' @param opacity Opacity for the plot points. Default: 0.5
#' @param cex Point size for the plot. Default: 1.25
#' @param default.assay Default assay to be used from the Seurat object. Default: second entry from c("integrated", "RNA")
#' @param ListOfGenes List of genes to be plotted. Default: c("BCL11B", "FEZF2", "EOMES", "DLX6-AS1", "HOPX", "DDIT4")
#' @param SubFolderName Name of the subfolder where the plots will be saved. Default: a subfolder named 'plot3D' concatenated with the list of genes.
#' @examples
#' \dontrun{
#' if (interactive()) {
#'   CellTypeMarkers <- c("PGK1", "CTIP2" = "BCL11B", "FEZF2", "EOMES", "DLX6-AS1", "HOPX", "DDIT4", "TOP2A", "PTGDS", "EDNRB", "EGFR", "SCGN", "NR2F2", "EMX2", "GAD2", "DLX2", "SATB2")
#'   Plot3D.ListOfGenes(obj = combined.obj, ListOfGenes = CellTypeMarkers)
#' }
#' }
#' @export
Plot3D.ListOfGenes <- function(
    obj = combined.obj # Plot and save list of 3D UMAP ot tSNE plots using plotly.
    , annotate.by = "integrated_snn_res.0.7", opacity = 0.5, cex = 1.25, default.assay = c("integrated", "RNA")[2],
    ListOfGenes = c("BCL11B", "FEZF2", "EOMES", "DLX6-AS1", "HOPX", "DDIT4"),
    SubFolderName = ppp("plot3D", substitute_deparse(ListOfGenes))) {
  try(create_set_SubDir(SubFolderName))
  obj. <- obj
  rm("obj")
  stopifnot(annotate.by %in% c(colnames(obj.@meta.data), FALSE))

  DefaultAssay(object = obj.) <- default.assay
  MissingGenes <- setdiff(ListOfGenes, rownames(obj.))
  if (length(MissingGenes)) iprint("These genes are not found, and omitted:", MissingGenes, ". Try to change default assay.")
  ListOfGenes <- intersect(ListOfGenes, rownames(obj.))

  for (i in 1:length(ListOfGenes)) {
    g <- ListOfGenes[i]
    print(g)
    plot3D.umap.gene(obj = obj., gene = g, annotate.by = annotate.by, alpha = opacity, def.assay = default.assay, dotsize = cex)
  }
  try(oo())
  try(create_set_Original_OutDir(NewOutDir = ParentDir))
}


# _________________________________________________________________________________________________
#' @title Plot3D.ListOfCategories
#'
#' @description This function plots and saves a list of 3D UMAP or tSNE plots using plotly.
#' @param obj A Seurat object for which the plot is to be created. Default: 'combined.obj'.
#' @param annotate.by Character vector specifying the metadata column to be used for annotating
#' the plot. Default: 'integrated_snn_res.0.7'.
#' @param cex Numeric value specifying the point size on the plot. Default: 1.25.
#' @param default.assay Character vector specifying the assay to be used. Default: 'RNA'
#' (second element in the vector c("integrated", "RNA")).
#' @param ListOfCategories Character vector specifying the categories to be included in the plot.
#' Default categories are "v.project", "experiment", "Phase", "integrated_snn_res.0.7".
#' @param SubFolderName String specifying the name of the subfolder where the plots will be saved.
#' By default, it's created using the function ppp("plot3D", substitute_deparse(ListOfCategories)).
#' @examples
#' \dontrun{
#' if (interactive()) {
#'   categ3Dplots <- c("v.project", "experiment", "Phase", "integrated_snn_res.0.7", "Area", "Individual", "Type")
#'   Plot3D.ListOfCategories(obj = combined.obj, ListOfCategories = categ3Dplots)
#' }
#' }
#' @export
Plot3D.ListOfCategories <- function(
    obj = combined.obj # Plot and save list of 3D UMAP ot tSNE plots using plotly.
    , annotate.by = "integrated_snn_res.0.7", cex = 1.25, default.assay = c("integrated", "RNA")[2],
    ListOfCategories = c("v.project", "experiment", "Phase", "integrated_snn_res.0.7"),
    SubFolderName = ppp("plot3D", substitute_deparse(ListOfCategories))) {
  try(create_set_SubDir(SubFolderName))
  obj. <- obj
  rm("obj")
  stopifnot(annotate.by %in% colnames(obj.@meta.data))
  DefaultAssay(object = obj.) <- default.assay

  MissingCateg <- setdiff(ListOfCategories, colnames(obj.@meta.data))
  if (length(MissingCateg)) iprint("These metadata categories are not found, and omitted:", MissingCateg, ". See colnames(obj@meta.data).")
  ListOfCategories <- intersect(ListOfCategories, colnames(obj.@meta.data))

  for (i in 1:length(ListOfCategories)) {
    categ <- ListOfCategories[i]
    print(categ)
    plot3D.umap(obj = obj., category = categ, annotate.by = annotate.by, dotsize = cex)
  }
  try(oo())
  try(create_set_Original_OutDir(NewOutDir = ParentDir))
}


# _________________________________________________________________________________________________
# TEMPORARY ______________________________ ----
# _________________________________________________________________________________________________

# _________________________________________________________________________________________________
#' @title Display Correlation Values in Pairs Plot
#'
#' @description This function displays the correlation coefficient and significance level within
#' a scatterplot generated by the `pairs()` function. The default correlation method is Pearson,
#' but Kendall or Spearman methods can also be selected.
#'
#' @param x Numeric vector or the first half of the data pair.
#' @param y Numeric vector or the second half of the data pair.
#' @param digits Number of significant digits to display in the correlation coefficient.
#' Default: 2.
#' @param prefix A string prefix added before the correlation coefficient. Default: "".
#' @param cex.cor The character expansion factor for the correlation coefficient text.
#' This argument directly influences the text size. Default: 2.
#' @param method The method of correlation coefficient calculation. It can be "pearson" (default),
#' "kendall", or "spearman".
#'
#' @return This function does not return a value but modifies the current plot by adding the
#' correlation coefficient and its significance level.
#'
#' @examples
#' \dontrun{
#' pairs(mtcars[, 1:4], panel = panelCorPearson)
#' }
#' @importFrom graphics text par
#' @importFrom stats cor cor.test
#' @export

panelCorPearson <- function(x, y, digits = 2, prefix = "", cex.cor = 2, method = "pearson") {
  # Input validation
  stopifnot(
    is.numeric(x), is.numeric(y),
    is.numeric(digits) && digits > 0,
    is.character(prefix),
    is.numeric(cex.cor) && cex.cor > 0,
    method %in% c("pearson", "kendall", "spearman")
  )

  usr <- par("usr")
  on.exit(par(usr))
  par(usr = c(0, 1, 0, 1))
  r <- abs(cor(x, y, method = method, use = "complete.obs"))
  txt <- format(c(r, 0.123456789), digits = digits)[1]
  txt <- paste(prefix, txt, sep = "")
  if (missing(cex.cor)) cex <- 0.8 / strwidth(txt)

  test <- cor.test(x, y, method = method)
  Signif <- symnum(test$p.value,
    corr = FALSE, na = FALSE,
    cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1),
    symbols = c("***", "**", "*", ".", " ")
  )

  cex <- ifelse(missing(cex.cor), 0.8 / strwidth(txt), cex.cor)
  text(0.5, 0.5, txt, cex = cex * r)
  text(.8, .8, Signif, cex = cex, col = 2)
}



# _________________________________________________________________________________________________
#' @title suPlotVariableFeatures for Single Seurat Object
#'
#' @description Generates a Variable Feature Plot for a specified Seurat object, labels points with
#' the top 20 variable genes, and saves the plot to a PDF file.
#'
#' @param obj A single Seurat object.
#' @param NrVarGenes A vector containing the top 20 variable genes for the Seurat object.
#' @param repel A logical value indicating whether to repel the labels to avoid overlap. Default: `TRUE`.
#' @param plotWidth Numeric value specifying the width of the plot when saved. Default: 7.
#' @param plotHeight Numeric value specifying the height of the plot when saved. Default: 5.
#' @param save A logical value indicating whether to save the plot to a PDF file. Default: `TRUE`.
#' @param suffix A string suffix to append to the plot filename. Default: NULL.
#' @param assay The assay to use for the plot. Default: DefaultAssay(obj).
#' @param ... Additional arguments to pass to the Seurat::VariableFeaturePlot function.
#'
#' @examples
#' \dontrun{
#' suPlotVariableFeatures(combined.obj)
#' }
#' @export
suPlotVariableFeatures <- function(obj = combined.obj, NrVarGenes = 15,
                                   repel = TRUE, plotWidth = 7, plotHeight = 5, save = TRUE,
                                   # suffix = kpp("nVF", .getNrScaledFeatures(obj)),
                                   assay = DefaultAssay(obj),
                                   suffix = NULL,
                                   ...) {
  message(" > Running suPlotVariableFeatures()...")
  message(length(Cells(obj)), " cells | assay: ", assay, " | NrVarGenes: ", NrVarGenes)

  stopifnot(
    is(obj, "Seurat"), is.function(ppp), is.logical(repel),
    is.numeric(plotWidth), is.numeric(plotHeight)
  )

  obj.name <- substitute_deparse(obj)

  plot1 <- Seurat::VariableFeaturePlot(obj, assay = assay, ...) +
    theme(panel.background = element_rect(fill = "white")) +
    labs(
      title = "Variable Genes",
      subtitle = kppws(obj.name, suffix),
      caption = paste("Assay:", assay, "|", idate())
    )


  # Assuming LabelPoints is defined elsewhere and available for use.
  TopVarGenes <- VariableFeatures(obj, assay = assay)[1:NrVarGenes]
  labeledPlot <- LabelPoints(
    plot = plot1, points = TopVarGenes, repel = repel,
    xnudge = 0, ynudge = 0, max.overlaps = 15
  )

  print(labeledPlot)
  filename <- ppp("Var.genes", obj.name, suffix, idate(), "png")

  # if (save) ggplot2::ggsave(plot = labeledPlot, filename = filename, width = plotWidth, height = plotHeight)
  if (save) {
    qqSave(
      ggobj = labeledPlot,
      # title = plotname,
      fname = filename, ext = ext,
      w = plotWidth, h = plotHeight, also.pdf = FALSE
    )
  }
}


# Notes --------------------------------------------------------------------------------------------

# plotMetadataCategPie() is in Seurat.Utils.Metadata.R
vertesy/Seurat.utils documentation built on Dec. 4, 2024, 5:20 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
vertesy/Seurat.utils
Seurat.utils - utility functions for Seurat

R/Seurat.Utils.Visualization.R
In vertesy/Seurat.utils: Seurat.utils - utility functions for Seurat

R Package Documentation

Browse R Packages

We want your feedback!

vertesy/Seurat.utils Seurat.utils - utility functions for Seurat

R/Seurat.Utils.Visualization.R In vertesy/Seurat.utils: Seurat.utils - utility functions for Seurat

R Package Documentation

Browse R Packages

We want your feedback!

vertesy/Seurat.utils
Seurat.utils - utility functions for Seurat

R/Seurat.Utils.Visualization.R
In vertesy/Seurat.utils: Seurat.utils - utility functions for Seurat