R/Plotting_QC_Seq_10X.R

Defines functions Iterate_Barcode_Rank_Plot Barcode_Plot Seq_QC_Plot_Alignment_Combined Seq_QC_Plot_Basic_Combined Seq_QC_Plot_Antisense Seq_QC_Plot_Exonic Seq_QC_Plot_Intronic Seq_QC_Plot_Intergenic Seq_QC_Plot_Genome Seq_QC_Plot_Transcriptome Seq_QC_Plot_Reads_in_Cells Seq_QC_Plot_Saturation Seq_QC_Plot_Total_Genes Seq_QC_Plot_UMIs Seq_QC_Plot_Genes Seq_QC_Plot_Number_Cells Seq_QC_Plot_Reads_per_Cell

Documented in Barcode_Plot Iterate_Barcode_Rank_Plot Seq_QC_Plot_Alignment_Combined Seq_QC_Plot_Antisense Seq_QC_Plot_Basic_Combined Seq_QC_Plot_Exonic Seq_QC_Plot_Genes Seq_QC_Plot_Genome Seq_QC_Plot_Intergenic Seq_QC_Plot_Intronic Seq_QC_Plot_Number_Cells Seq_QC_Plot_Reads_in_Cells Seq_QC_Plot_Reads_per_Cell Seq_QC_Plot_Saturation Seq_QC_Plot_Total_Genes Seq_QC_Plot_Transcriptome Seq_QC_Plot_UMIs

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#################### 10X SEQ QC ####################
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


#' QC Plots Sequencing metrics
#'
#' Plot the mean number of reads per cell
#'
#' @param metrics_dataframe data.frame  contain Cell Ranger QC Metrics (see \code{\link{Read_Metrics_10X}}).
#' @param plot_by Grouping factor for the plot.  Default is to plot as single group with single point per sample.
#' @param colors_use colors to use for plot if plotting by group.  Defaults to RColorBrewer Dark2 palette if
#'  less than 8 groups and `DiscretePalette_scCustomize(palette = "polychrome")` if more than 8.
#' @param dot_size size of the dots plotted if `plot_by` is not `sample_id`  Default is 1.
#' @param x_lab_rotate logical.  Whether to rotate the axes labels on the x-axis.  Default is FALSE.
#' @param significance logical.  Whether to calculate and plot p-value comparisons when plotting by
#' grouping factor.  Default is FALSE.
#' @param ... Other variables to pass to `ggpubr::stat_compare_means` when doing significance testing.
#'
#' @return A ggplot object
#'
#' @import cli
#' @import ggplot2
#' @importFrom ggbeeswarm geom_quasirandom
#' @importFrom rlang is_installed
#' @importFrom utils combn
#'
#' @export
#'
#' @concept seq_qc_plotting_basic
#'
#' @examples
#' \dontrun{
#' Seq_QC_Plot_Reads_per_Cell(metrics_dataframe = metrics)
#' }
#'

Seq_QC_Plot_Reads_per_Cell <- function(
  metrics_dataframe,
  plot_by = "sample_id",
  colors_use = NULL,
  dot_size = 1,
  x_lab_rotate = FALSE,
  significance = FALSE,
  ...
) {
  if (!plot_by %in% colnames(x = metrics_dataframe)) {
    cli_abort(message = "{.val {plot_by}} is not a column in the provided {.code metrics_dataframe}.")
  }

  # Change plot_by to character vector to make significance functions show all comparisons
  if (inherits(x = metrics_dataframe[[plot_by]], what = "factor")) {
    stats_dataframe <- metrics_dataframe
    stats_dataframe[[plot_by]] <- as.character(stats_dataframe[[plot_by]])
  } else {
    stats_dataframe <- metrics_dataframe
  }

  # Create color palette if null and check valid if provided
  length_plotby <- length(x = unique(x = metrics_dataframe[[plot_by]]))

  if (is.null(x = colors_use) && !plot_by == "sample_id") {
    if (length_plotby <= 8) {
      colors_use <- Dark2_Pal()
    } else {
      colors_use <- DiscretePalette_scCustomize(num_colors = length_plotby, palette = "polychrome")
    }
  } else {
    if (length(x = colors_use) < length_plotby && !plot_by == "sample_id") {
      cli_abort(message = c("Not enough colors provided.",
                            "i" = "The number of colors supplied: {.field {length(x = colors_use)}}, is less than the number of groups in {.val {plot_by}} column: {.field {length_plotby}}.")
      )
    } else {
      colors_use <- colors_use
    }
  }

  if (plot_by == "sample_id") {
    metrics_dataframe$samples_plotting <- "Samples"

    plot <- ggplot(metrics_dataframe, aes(x = .data[["samples_plotting"]], y = .data[["Mean_Reads_per_Cell"]])) +
      geom_boxplot(fill = "white", outlier.color = NA) +
      geom_quasirandom() +
      theme(legend.position = "none",
            axis.text.x = element_text(angle = 45, hjust = 1,size = 12),
            axis.text.y = element_text(size = 12),
            axis.title = element_text(face = "bold", size = 14),
            plot.title = element_text(face = "bold", size = 18, hjust = 0.5)) +
      ggtitle("Mean Reads per Cell per Sample") +
      ylab('Mean Reads per Cell') +
      xlab("") +
      theme_ggprism_mod()
  } else {
    plot <- ggplot(metrics_dataframe, aes(x=.data[[plot_by]], y = .data[["Mean_Reads_per_Cell"]], fill = .data[[plot_by]])) +
      geom_boxplot(fill = "white") +
      geom_dotplot(binaxis ='y', stackdir = 'center', dotsize = dot_size) +
      theme(legend.position = "none",
            axis.text.x = element_text(angle = 45, hjust = 1,size = 12),
            axis.text.y = element_text(size = 12),
            axis.title = element_text(face = "bold", size = 14),
            plot.title = element_text(face = "bold", size = 18, hjust = 0.5)) +
      scale_fill_manual(values = colors_use) +
      ggtitle("Mean Reads per Cell per Sample") +
      ylab('Mean Reads per Cell') +
      xlab(plot_by) +
      theme_ggprism_mod()
  }

  if (isTRUE(x = x_lab_rotate)) {
    plot <- plot + theme_ggprism_mod(axis_text_angle = 45)
  }

  if (isTRUE(x = significance)) {
    ggpubr_check <- is_installed(pkg = "ggpubr")
    if (isFALSE(x = ggpubr_check)) {
      cli_abort(message = c(
        "Please install the {.val ggpubr} package to calculate/plot significance values.",
        "i" = "This can be accomplished with the following commands: ",
        "----------------------------------------",
        "{.field `install.packages({symbol$dquote_left}ggpubr{symbol$dquote_right})`}",
        "----------------------------------------"
      ))
    }

    if (length(x = unique(x = stats_dataframe[[plot_by]])) < 2) {
      cli_abort(message = "Cannot calculate statistics when {.val {plot_by}} column contains less than 2 groups.")
    }
    groups <- unique(x = stats_dataframe[[plot_by]])

    comparisons <- combn(groups, 2)
    comparisons <- data.frame(comparisons, stringsAsFactors = FALSE)
    comparisons <- lapply(1:length(x = colnames(x = comparisons)), function(x){
      indiv_comp <- as.character(x = comparisons[[x]])
    })
    plot <- plot + ggpubr::stat_compare_means(comparisons = comparisons, ...)
  }

  return(plot)
}


#' QC Plots Sequencing metrics
#'
#' Plot the number of cells per sample
#'
#' @param metrics_dataframe data.frame  contain Cell Ranger QC Metrics (see \code{\link{Read_Metrics_10X}}).
#' @param plot_by Grouping factor for the plot.  Default is to plot as single group with single point per sample.
#' @param colors_use colors to use for plot if plotting by group.  Defaults to RColorBrewer Dark2 palette if
#'  less than 8 groups and `DiscretePalette_scCustomize(palette = "polychrome")` if more than 8.
#' @param dot_size size of the dots plotted if `plot_by` is not `sample_id`  Default is 1.
#' @param x_lab_rotate logical.  Whether to rotate the axes labels on the x-axis.  Default is FALSE.
#' @param significance logical.  Whether to calculate and plot p-value comparisons when plotting by
#' grouping factor.  Default is FALSE.
#' @param ... Other variables to pass to `ggpubr::stat_compare_means` when doing significance testing.
#'
#' @return A ggplot object
#'
#' @import cli
#' @import ggplot2
#' @importFrom ggbeeswarm geom_quasirandom
#' @importFrom rlang is_installed
#' @importFrom utils combn
#'
#' @export
#'
#' @concept seq_qc_plotting_basic
#'
#' @examples
#' \dontrun{
#' Seq_QC_Plot_Number_Cells(metrics_dataframe = metrics)
#' }
#'

Seq_QC_Plot_Number_Cells <- function(
  metrics_dataframe,
  plot_by = "sample_id",
  colors_use = NULL,
  dot_size = 1,
  x_lab_rotate = FALSE,
  significance = FALSE,
  ...
) {
  if (!plot_by %in% colnames(x = metrics_dataframe)) {
    cli_abort(message = "{.val {plot_by}} is not a column in the provided {.code metrics_dataframe}.")
  }

  # Change plot_by to character vector to make significance functions show all comparisons
  if (inherits(x = metrics_dataframe[[plot_by]], what = "factor")) {
    stats_dataframe <- metrics_dataframe
    stats_dataframe[[plot_by]] <- as.character(stats_dataframe[[plot_by]])
  } else {
    stats_dataframe <- metrics_dataframe
  }

  # Create color palette if null and check valid if provided
  length_plotby <- length(x = unique(x = metrics_dataframe[[plot_by]]))

  if (is.null(x = colors_use) && !plot_by == "sample_id") {
    if (length_plotby <= 8) {
      colors_use <- Dark2_Pal()
    } else {
      colors_use <- DiscretePalette_scCustomize(num_colors = length_plotby, palette = "polychrome")
    }
  } else {
    if (length(x = colors_use) < length_plotby && !plot_by == "sample_id") {
      cli_abort(message = c("Not enough colors provided.",
                            "i" = "The number of colors supplied: {.field {length(x = colors_use)}}, is less than the number of groups in {.val {plot_by}} column: {.field {length_plotby}}.")
      )
    } else {
      colors_use <- colors_use
    }
  }

  if (plot_by == "sample_id") {
    metrics_dataframe$samples_plotting <- "Samples"

    plot <- ggplot(metrics_dataframe, aes(x = .data[["samples_plotting"]], y = .data[["Estimated_Number_of_Cells"]])) +
      geom_boxplot(fill = "white", outlier.color = NA) +
      geom_quasirandom() +
      theme(legend.position = "none",
            axis.text.x = element_text(angle = 45, hjust = 1,size = 12),
            axis.text.y = element_text(size = 12),
            axis.title = element_text(face = "bold", size = 14),
            plot.title = element_text(face = "bold", size = 18, hjust = 0.5)) +
      ggtitle("Cells per Sample") +
      ylab('Cells') +
      xlab("") +
      theme_ggprism_mod()
  } else {
    plot <- ggplot(metrics_dataframe, aes(x=.data[[plot_by]], y = .data[["Estimated_Number_of_Cells"]], fill = .data[[plot_by]])) +
      geom_boxplot(fill = "white") +
      geom_dotplot(binaxis ='y', stackdir = 'center', dotsize = dot_size) +
      theme(legend.position = "none",
            axis.text.x = element_text(angle = 45, hjust = 1,size = 12),
            axis.text.y = element_text(size = 12),
            axis.title = element_text(face = "bold", size = 14),
            plot.title = element_text(face = "bold", size = 18, hjust = 0.5)) +
      scale_fill_manual(values = colors_use) +
      ggtitle("Cells per Sample") +
      ylab('Cells') +
      xlab(plot_by) +
      theme_ggprism_mod()
  }

  if (isTRUE(x = x_lab_rotate)) {
    plot <- plot + theme_ggprism_mod(axis_text_angle = 45)
  }

  if (isTRUE(x = significance)) {
    ggpubr_check <- is_installed(pkg = "ggpubr")
    if (isFALSE(x = ggpubr_check)) {
      cli_abort(message = c(
        "Please install the {.val ggpubr} package to calculate/plot significance values.",
        "i" = "This can be accomplished with the following commands: ",
        "----------------------------------------",
        "{.field `install.packages({symbol$dquote_left}ggpubr{symbol$dquote_right})`}",
        "----------------------------------------"
      ))
    }

    if (length(x = unique(x = stats_dataframe[[plot_by]])) < 2) {
      cli_abort(message = "Cannot calculate statistics when {.val {plot_by}} column contains less than 2 groups.")
    }
    groups <- unique(x = stats_dataframe[[plot_by]])

    comparisons <- combn(groups, 2)
    comparisons <- data.frame(comparisons, stringsAsFactors = FALSE)
    comparisons <- lapply(1:length(x = colnames(x = comparisons)), function(x){
      indiv_comp <- as.character(x = comparisons[[x]])
    })
    plot <- plot + ggpubr::stat_compare_means(comparisons = comparisons, ...)
  }

  return(plot)
}


#' QC Plots Sequencing metrics
#'
#' Plot the median genes per cell per sample
#'
#' @param metrics_dataframe data.frame  contain Cell Ranger QC Metrics (see \code{\link{Read_Metrics_10X}}).
#' @param plot_by Grouping factor for the plot.  Default is to plot as single group with single point per sample.
#' @param colors_use colors to use for plot if plotting by group.  Defaults to RColorBrewer Dark2 palette if
#' less than 8 groups and `DiscretePalette_scCustomize(palette = "polychrome")` if more than 8.
#' @param dot_size size of the dots plotted if `plot_by` is not `sample_id`  Default is 1.
#' @param x_lab_rotate logical.  Whether to rotate the axes labels on the x-axis.  Default is FALSE.
#' @param significance logical.  Whether to calculate and plot p-value comparisons when plotting by
#' grouping factor.  Default is FALSE.
#' @param ... Other variables to pass to `ggpubr::stat_compare_means` when doing significance testing.
#'
#' @return A ggplot object
#'
#' @import cli
#' @import ggplot2
#' @importFrom ggbeeswarm geom_quasirandom
#' @importFrom rlang is_installed
#' @importFrom utils combn
#'
#' @export
#'
#' @concept seq_qc_plotting_basic
#'
#' @examples
#' \dontrun{
#' Seq_QC_Plot_Genes(metrics_dataframe = metrics)
#' }
#'

Seq_QC_Plot_Genes <- function(
  metrics_dataframe,
  plot_by = "sample_id",
  colors_use = NULL,
  dot_size = 1,
  x_lab_rotate = FALSE,
  significance = FALSE,
  ...
) {
  if (!plot_by %in% colnames(x = metrics_dataframe)) {
    cli_abort(message = " is not a column in the provided `metrics_dataframe`.")
  }

  # Change plot_by to character vector to make significance functions show all comparisons
  if (inherits(x = metrics_dataframe[[plot_by]], what = "factor")) {
    stats_dataframe <- metrics_dataframe
    stats_dataframe[[plot_by]] <- as.character(stats_dataframe[[plot_by]])
  } else {
    stats_dataframe <- metrics_dataframe
  }

  # Create color palette if null and check valid if provided
  length_plotby <- length(x = unique(x = metrics_dataframe[[plot_by]]))

  if (is.null(x = colors_use) && !plot_by == "sample_id") {
    if (length_plotby <= 8) {
      colors_use <- Dark2_Pal()
    } else {
      colors_use <- DiscretePalette_scCustomize(num_colors = length_plotby, palette = "polychrome")
    }
  } else {
    if (length(x = colors_use) < length_plotby && !plot_by == "sample_id") {
      cli_abort(message = c("Not enough colors provided.",
                            "i" = "The number of colors supplied: {.field {length(x = colors_use)}}, is less than the number of groups in {.val {plot_by}} column: {.field {length_plotby}}.")
      )
    } else {
      colors_use <- colors_use
    }
  }

  if (plot_by == "sample_id") {
    metrics_dataframe$samples_plotting <- "Samples"

    plot <- ggplot(metrics_dataframe, aes(x = .data[["samples_plotting"]], y = .data[["Median_Genes_per_Cell"]])) +
      geom_boxplot(fill = "white", outlier.color = NA) +
      geom_quasirandom() +
      ggtitle("Median Genes per Cell") +
      ylab('Median Genes') +
      xlab("") +
      theme_ggprism_mod()
  } else {
    plot <- ggplot(metrics_dataframe, aes(x=.data[[plot_by]], y = .data[["Median_Genes_per_Cell"]], fill = .data[[plot_by]])) +
      geom_boxplot(fill = "white") +
      geom_dotplot(binaxis ='y', stackdir = 'center', dotsize = dot_size) +
      scale_fill_manual(values = colors_use) +
      ggtitle("Median Genes per Cell") +
      ylab('Median Genes') +
      xlab(plot_by) +
      theme_ggprism_mod()
  }

  if (isTRUE(x = x_lab_rotate)) {
    plot <- plot + theme_ggprism_mod(axis_text_angle = 45)
  }

  if (isTRUE(x = significance)) {
    ggpubr_check <- is_installed(pkg = "ggpubr")
    if (isFALSE(x = ggpubr_check)) {
      cli_abort(message = c(
        "Please install the {.val ggpubr} package to calculate/plot significance values.",
        "i" = "This can be accomplished with the following commands: ",
        "----------------------------------------",
        "{.field `install.packages({symbol$dquote_left}ggpubr{symbol$dquote_right})`}",
        "----------------------------------------"
      ))
    }

    if (length(x = unique(x = stats_dataframe[[plot_by]])) < 2) {
      cli_abort(message = "Cannot calculate statistics when {.val {plot_by}} column contains less than 2 groups.")
    }
    groups <- unique(x = stats_dataframe[[plot_by]])

    comparisons <- combn(groups, 2)
    comparisons <- data.frame(comparisons, stringsAsFactors = FALSE)
    comparisons <- lapply(1:length(x = colnames(x = comparisons)), function(x){
      indiv_comp <- as.character(x = comparisons[[x]])
    })
    plot <- plot + ggpubr::stat_compare_means(comparisons = comparisons, ...)
  }

  return(plot)
}


#' QC Plots Sequencing metrics
#'
#' Plot the median UMIs per cell per sample
#'
#' @param metrics_dataframe data.frame contain Cell Ranger QC Metrics (see \code{\link{Read_Metrics_10X}}).
#' @param plot_by Grouping factor for the plot.  Default is to plot as single group with single point per sample.
#' @param colors_use colors to use for plot if plotting by group.  Defaults to RColorBrewer Dark2 palette if
#' less than 8 groups and `DiscretePalette_scCustomize(palette = "polychrome")` if more than 8.
#' @param dot_size size of the dots plotted if `plot_by` is not `sample_id`  Default is 1.
#' @param x_lab_rotate logical.  Whether to rotate the axes labels on the x-axis.  Default is FALSE.
#' @param significance logical.  Whether to calculate and plot p-value comparisons when plotting by
#' grouping factor.  Default is FALSE.
#' @param ... Other variables to pass to `ggpubr::stat_compare_means` when doing significance testing.
#'
#' @return A ggplot object
#'
#' @import cli
#' @import ggplot2
#' @importFrom ggbeeswarm geom_quasirandom
#' @importFrom rlang is_installed
#' @importFrom utils combn
#'
#' @export
#'
#' @concept seq_qc_plotting_basic
#'
#' @examples
#' \dontrun{
#' Seq_QC_Plot_UMIs(metrics_dataframe = metrics)
#' }
#'

Seq_QC_Plot_UMIs <- function(
  metrics_dataframe,
  plot_by = "sample_id",
  colors_use = NULL,
  dot_size = 1,
  x_lab_rotate = FALSE,
  significance = FALSE,
  ...
) {
  if (!plot_by %in% colnames(x = metrics_dataframe)) {
    cli_abort(message = "{.val {plot_by}} is not a column in the provided {.code metrics_dataframe}.")
  }

  # Change plot_by to character vector to make significance functions show all comparisons
  if (inherits(x = metrics_dataframe[[plot_by]], what = "factor")) {
    stats_dataframe <- metrics_dataframe
    stats_dataframe[[plot_by]] <- as.character(stats_dataframe[[plot_by]])
  } else {
    stats_dataframe <- metrics_dataframe
  }

  # Create color palette if null and check valid if provided
  length_plotby <- length(x = unique(x = metrics_dataframe[[plot_by]]))

  if (is.null(x = colors_use) && !plot_by == "sample_id") {
    if (length_plotby <= 8) {
      colors_use <- Dark2_Pal()
    } else {
      colors_use <- DiscretePalette_scCustomize(num_colors = length_plotby, palette = "polychrome")
    }
  } else {
    if (length(x = colors_use) < length_plotby && !plot_by == "sample_id") {
      cli_abort(message = c("Not enough colors provided.",
                            "i" = "The number of colors supplied: {.field {length(x = colors_use)}}, is less than the number of groups in {.val {plot_by}} column: {.field {length_plotby}}.")
      )
    } else {
      colors_use <- colors_use
    }
  }

  if (plot_by == "sample_id") {
    metrics_dataframe$samples_plotting <- "Samples"

    plot <- ggplot(metrics_dataframe, aes(x = .data[["samples_plotting"]], y = .data[["Median_UMI_Counts_per_Cell"]])) +
      geom_boxplot(fill = "white", outlier.color = NA) +
      geom_quasirandom() +
      ggtitle("Median UMIs per Cell") +
      ylab('Median UMIs') +
      xlab("") +
      theme_ggprism_mod()
  } else {
    plot <- ggplot(metrics_dataframe, aes(x=.data[[plot_by]], y = .data[["Median_UMI_Counts_per_Cell"]], fill = .data[[plot_by]])) +
      geom_boxplot(fill = "white") +
      geom_dotplot(binaxis ='y', stackdir = 'center', dotsize = dot_size) +
      scale_fill_manual(values = colors_use) +
      ggtitle("Median UMIs per Cell") +
      ylab('Median UMIs') +
      xlab(plot_by) +
      theme_ggprism_mod()
  }

  if (isTRUE(x = x_lab_rotate)) {
    plot <- plot + theme_ggprism_mod(axis_text_angle = 45)
  }

  if (isTRUE(x = significance)) {
    ggpubr_check <- is_installed(pkg = "ggpubr")
    if (isFALSE(x = ggpubr_check)) {
      cli_abort(message = c(
        "Please install the {.val ggpubr} package to calculate/plot significance values.",
        "i" = "This can be accomplished with the following commands: ",
        "----------------------------------------",
        "{.field `install.packages({symbol$dquote_left}ggpubr{symbol$dquote_right})`}",
        "----------------------------------------"
      ))
    }

    if (length(x = unique(x = stats_dataframe[[plot_by]])) < 2) {
      cli_abort(message = "Cannot calculate statistics when {.val {plot_by}} column contains less than 2 groups.")
    }
    groups <- unique(x = stats_dataframe[[plot_by]])

    comparisons <- combn(groups, 2)
    comparisons <- data.frame(comparisons, stringsAsFactors = FALSE)
    comparisons <- lapply(1:length(x = colnames(x = comparisons)), function(x){
      indiv_comp <- as.character(x = comparisons[[x]])
    })
    plot <- plot + ggpubr::stat_compare_means(comparisons = comparisons, ...)
  }

  return(plot)
}


#' QC Plots Sequencing metrics
#'
#' Plot the total genes detected per sample
#'
#' @param metrics_dataframe data.frame  contain Cell Ranger QC Metrics (see \code{\link{Read_Metrics_10X}}).
#' @param plot_by Grouping factor for the plot.  Default is to plot as single group with single point per sample.
#' @param colors_use colors to use for plot if plotting by group.  Defaults to RColorBrewer Dark2 palette if
#' less than 8 groups and `DiscretePalette_scCustomize(palette = "polychrome")` if more than 8.
#' @param dot_size size of the dots plotted if `plot_by` is not `sample_id`  Default is 1.
#' @param x_lab_rotate logical.  Whether to rotate the axes labels on the x-axis.  Default is FALSE.
#' @param significance logical.  Whether to calculate and plot p-value comparisons when plotting by
#' grouping factor.  Default is FALSE.
#' @param ... Other variables to pass to `ggpubr::stat_compare_means` when doing significance testing.
#'
#' @return A ggplot object
#'
#' @import cli
#' @import ggplot2
#' @importFrom ggbeeswarm geom_quasirandom
#' @importFrom rlang is_installed
#' @importFrom utils combn
#'
#' @export
#'
#' @concept seq_qc_plotting_basic
#'
#' @examples
#' \dontrun{
#' Seq_QC_Plot_Total_Genes(metrics_dataframe = metrics)
#' }
#'

Seq_QC_Plot_Total_Genes <- function(
  metrics_dataframe,
  plot_by = "sample_id",
  colors_use = NULL,
  dot_size = 1,
  x_lab_rotate = FALSE,
  significance = FALSE,
  ...
) {
  if (!plot_by %in% colnames(x = metrics_dataframe)) {
    cli_abort(message = "{.val {plot_by}} is not a column in the provided {.code metrics_dataframe}.")
  }

  # Change plot_by to character vector to make significance functions show all comparisons
  if (inherits(x = metrics_dataframe[[plot_by]], what = "factor")) {
    stats_dataframe <- metrics_dataframe
    stats_dataframe[[plot_by]] <- as.character(stats_dataframe[[plot_by]])
  } else {
    stats_dataframe <- metrics_dataframe
  }

  # Create color palette if null and check valid if provided
  length_plotby <- length(x = unique(x = metrics_dataframe[[plot_by]]))

  if (is.null(x = colors_use) && !plot_by == "sample_id") {
    if (length_plotby <= 8) {
      colors_use <- Dark2_Pal()
    } else {
      colors_use <- DiscretePalette_scCustomize(num_colors = length_plotby, palette = "polychrome")
    }
  } else {
    if (length(x = colors_use) < length_plotby && !plot_by == "sample_id") {
      cli_abort(message = c("Not enough colors provided.",
                            "i" = "The number of colors supplied: {.field {length(x = colors_use)}}, is less than the number of groups in {.val {plot_by}} column: {.field {length_plotby}}.")
      )
    } else {
      colors_use <- colors_use
    }
  }

  if (plot_by == "sample_id") {
    metrics_dataframe$samples_plotting <- "Samples"

    plot <- ggplot(metrics_dataframe, aes(x = .data[["samples_plotting"]], y = .data[["Total_Genes_Detected"]])) +
      geom_boxplot(fill = "white", outlier.color = NA) +
      geom_quasirandom() +
      ggtitle("Total Genes Detected per Sample") +
      ylab('Total Genes') +
      xlab("") +
      theme_ggprism_mod()
  } else {
    plot <- ggplot(metrics_dataframe, aes(x=.data[[plot_by]], y = .data[["Total_Genes_Detected"]], fill = .data[[plot_by]])) +
      geom_boxplot(fill = "white") +
      geom_dotplot(binaxis ='y', stackdir = 'center', dotsize = dot_size) +
      scale_fill_manual(values = colors_use) +
      ggtitle("Total Genes Detected per Sample") +
      ylab('Total Genes') +
      xlab(plot_by) +
      theme_ggprism_mod()
  }

  if (isTRUE(x = x_lab_rotate)) {
    plot <- plot + theme_ggprism_mod(axis_text_angle = 45)
  }

  if (isTRUE(x = significance)) {
    ggpubr_check <- is_installed(pkg = "ggpubr")
    if (isFALSE(x = ggpubr_check)) {
      cli_abort(message = c(
        "Please install the {.val ggpubr} package to calculate/plot significance values.",
        "i" = "This can be accomplished with the following commands: ",
        "----------------------------------------",
        "{.field `install.packages({symbol$dquote_left}ggpubr{symbol$dquote_right})`}",
        "----------------------------------------"
        ))
    }

    if (length(x = unique(x = stats_dataframe[[plot_by]])) < 2) {
      cli_abort(message = "Cannot calculate statistics when {.val {plot_by}} column contains less than 2 groups.")
    }
    groups <- unique(x = stats_dataframe[[plot_by]])

    comparisons <- combn(groups, 2)
    comparisons <- data.frame(comparisons, stringsAsFactors = FALSE)
    comparisons <- lapply(1:length(x = colnames(x = comparisons)), function(x){
      indiv_comp <- as.character(x = comparisons[[x]])
    })
    plot <- plot + ggpubr::stat_compare_means(comparisons = comparisons, ...)
  }

  return(plot)
}


#' QC Plots Sequencing metrics
#'
#' Plot the sequencing saturation percentage per sample
#'
#' @param metrics_dataframe data.frame  contain Cell Ranger QC Metrics (see \code{\link{Read_Metrics_10X}}).
#' @param plot_by Grouping factor for the plot.  Default is to plot as single group with single point per sample.
#' @param colors_use colors to use for plot if plotting by group.  Defaults to RColorBrewer Dark2 palette if
#' less than 8 groups and `DiscretePalette_scCustomize(palette = "polychrome")` if more than 8.
#' @param dot_size size of the dots plotted if `plot_by` is not `sample_id`  Default is 1.
#' @param x_lab_rotate logical.  Whether to rotate the axes labels on the x-axis.  Default is FALSE.
#' @param significance logical.  Whether to calculate and plot p-value comparisons when plotting by
#' grouping factor.  Default is FALSE.
#' @param ... Other variables to pass to `ggpubr::stat_compare_means` when doing significance testing.
#'
#' @return A ggplot object
#'
#' @import cli
#' @import ggplot2
#' @importFrom ggbeeswarm geom_quasirandom
#' @importFrom scales label_percent
#' @importFrom rlang is_installed
#' @importFrom utils combn
#'
#' @export
#'
#' @concept seq_qc_plotting_basic
#'
#' @examples
#' \dontrun{
#' Seq_QC_Plot_Saturation(metrics_dataframe = metrics)
#' }
#'

Seq_QC_Plot_Saturation <- function(
  metrics_dataframe,
  plot_by = "sample_id",
  colors_use = NULL,
  dot_size = 1,
  x_lab_rotate = FALSE,
  significance = FALSE,
  ...
) {
  if (!plot_by %in% colnames(x = metrics_dataframe)) {
    cli_abort(message = "{.val {plot_by}} is not a column in the provided {.code metrics_dataframe}.")
  }

  # Change plot_by to character vector to make significance functions show all comparisons
  if (inherits(x = metrics_dataframe[[plot_by]], what = "factor")) {
    stats_dataframe <- metrics_dataframe
    stats_dataframe[[plot_by]] <- as.character(stats_dataframe[[plot_by]])
  } else {
    stats_dataframe <- metrics_dataframe
  }

  # Create color palette if null and check valid if provided
  length_plotby <- length(x = unique(x = metrics_dataframe[[plot_by]]))

  if (is.null(x = colors_use) && !plot_by == "sample_id") {
    if (length_plotby <= 8) {
      colors_use <- Dark2_Pal()
    } else {
      colors_use <- DiscretePalette_scCustomize(num_colors = length_plotby, palette = "polychrome")
    }
  } else {
    if (length(x = colors_use) < length_plotby && !plot_by == "sample_id") {
      cli_abort(message = c("Not enough colors provided.",
                            "i" = "The number of colors supplied: {.field {length(x = colors_use)}}, is less than the number of groups in {.val {plot_by}} column: {.field {length_plotby}}.")
      )
    } else {
      colors_use <- colors_use
    }
  }

  # Modify dataframe
  metrics_dataframe[,"Sequencing_Saturation"] <- as.numeric(gsub("%", "", metrics_dataframe[,"Sequencing_Saturation"]))


  if (plot_by == "sample_id") {
    metrics_dataframe$samples_plotting <- "Samples"

    plot <- ggplot(metrics_dataframe, aes(x = .data[["samples_plotting"]], y = .data[["Sequencing_Saturation"]]), color = .data[["samples_plotting"]]) +
      geom_boxplot(fill = "white", outlier.color = NA) +
      geom_quasirandom() +
      ggtitle("Sequencing Saturation") +
      ylab('Sequencing Saturation Percent') +
      xlab("") +
      scale_y_continuous(labels = label_percent(accuracy = 0.01, scale = 1)) +
      theme_ggprism_mod()
  } else {
    plot <- ggplot(metrics_dataframe, aes(x=.data[[plot_by]], y = .data[["Sequencing_Saturation"]], fill = .data[[plot_by]])) +
      geom_boxplot(fill = "white") +
      geom_dotplot(binaxis ='y', stackdir = 'center', dotsize = dot_size) +
      scale_fill_manual(values = colors_use) +
      ggtitle("Sequencing Saturation") +
      ylab('Sequencing Saturation Percent') +
      xlab(plot_by)+
      scale_y_continuous(labels = label_percent(accuracy = 1, scale = 1)) +
      theme_ggprism_mod()
  }

  if (isTRUE(x = x_lab_rotate)) {
    plot <- plot + theme_ggprism_mod(axis_text_angle = 45)
  }

  if (isTRUE(x = significance)) {
    ggpubr_check <- is_installed(pkg = "ggpubr")
    if (isFALSE(x = ggpubr_check)) {
      cli_abort(message = c(
        "Please install the {.val ggpubr} package to calculate/plot significance values.",
        "i" = "This can be accomplished with the following commands: ",
        "----------------------------------------",
        "{.field `install.packages({symbol$dquote_left}ggpubr{symbol$dquote_right})`}",
        "----------------------------------------"
      ))
    }

    if (length(x = unique(x = stats_dataframe[[plot_by]])) < 2) {
      cli_abort(message = "Cannot calculate statistics when {.val {plot_by}} column contains less than 2 groups.")
    }
    groups <- unique(x = stats_dataframe[[plot_by]])

    comparisons <- combn(groups, 2)
    comparisons <- data.frame(comparisons, stringsAsFactors = FALSE)
    comparisons <- lapply(1:length(x = colnames(x = comparisons)), function(x){
      indiv_comp <- as.character(x = comparisons[[x]])
    })
    plot <- plot + ggpubr::stat_compare_means(comparisons = comparisons, ...)
  }

  return(plot)
}


#' QC Plots Sequencing metrics
#'
#' Plot the fraction of reads in cells per sample
#'
#' @param metrics_dataframe data.frame  contain Cell Ranger QC Metrics (see \code{\link{Read_Metrics_10X}}).
#' @param plot_by Grouping factor for the plot.  Default is to plot as single group with single point per sample.
#' @param colors_use colors to use for plot if plotting by group.  Defaults to RColorBrewer Dark2 palette if
#' less than 8 groups and `DiscretePalette_scCustomize(palette = "polychrome")` if more than 8.
#' @param dot_size size of the dots plotted if `plot_by` is not `sample_id`  Default is 1.
#' @param x_lab_rotate logical.  Whether to rotate the axes labels on the x-axis.  Default is FALSE.
#' @param significance logical.  Whether to calculate and plot p-value comparisons when plotting by
#' grouping factor.  Default is FALSE.
#' @param ... Other variables to pass to `ggpubr::stat_compare_means` when doing significance testing.
#'
#' @return A ggplot object
#'
#' @import cli
#' @import ggplot2
#' @importFrom ggbeeswarm geom_quasirandom
#' @importFrom scales label_percent
#' @importFrom rlang is_installed
#' @importFrom utils combn
#'
#' @export
#'
#' @concept seq_qc_plotting_basic
#'
#' @examples
#' \dontrun{
#' Seq_QC_Plot_Reads_in_Cells(metrics_dataframe = metrics)
#' }
#'

Seq_QC_Plot_Reads_in_Cells <- function(
  metrics_dataframe,
  plot_by = "sample_id",
  colors_use = NULL,
  dot_size = 1,
  x_lab_rotate = FALSE,
  significance = FALSE,
  ...
) {
  if (!plot_by %in% colnames(x = metrics_dataframe)) {
    cli_abort(message = "{.val {plot_by}} is not a column in the provided {.code metrics_dataframe}.")
  }

  # Change plot_by to character vector to make significance functions show all comparisons
  if (inherits(x = metrics_dataframe[[plot_by]], what = "factor")) {
    stats_dataframe <- metrics_dataframe
    stats_dataframe[[plot_by]] <- as.character(stats_dataframe[[plot_by]])
  } else {
    stats_dataframe <- metrics_dataframe
  }

  # Create color palette if null and check valid if provided
  length_plotby <- length(x = unique(x = metrics_dataframe[[plot_by]]))

  if (is.null(x = colors_use) && !plot_by == "sample_id") {
    if (length_plotby <= 8) {
      colors_use <- Dark2_Pal()
    } else {
      colors_use <- DiscretePalette_scCustomize(num_colors = length_plotby, palette = "polychrome")
    }
  } else {
    if (length(x = colors_use) < length_plotby && !plot_by == "sample_id") {
      cli_abort(message = c("Not enough colors provided.",
                            "i" = "The number of colors supplied: {.field {length(x = colors_use)}}, is less than the number of groups in {.val {plot_by}} column: {.field {length_plotby}}.")
      )
    } else {
      colors_use <- colors_use
    }
  }

  # Modify dataframe
  metrics_dataframe[,"Fraction_Reads_in_Cells"] <- as.numeric(gsub("%", "", metrics_dataframe[,"Fraction_Reads_in_Cells"]))


  if (plot_by == "sample_id") {
    metrics_dataframe$samples_plotting <- "Samples"

    plot <- ggplot(metrics_dataframe, aes(x = .data[["samples_plotting"]], y = .data[["Fraction_Reads_in_Cells"]]), color = .data[["samples_plotting"]]) +
      geom_boxplot(fill = "white", outlier.color = NA) +
      geom_quasirandom() +
      ggtitle("Fraction of Reads in Cells per Sample") +
      ylab('Fraction of Reads in Cells') +
      xlab("") +
      scale_y_continuous(labels = label_percent(accuracy = 0.01, scale = 1)) +
      theme_ggprism_mod()
  } else {
    plot <- ggplot(metrics_dataframe, aes(x=.data[[plot_by]], y = .data[["Fraction_Reads_in_Cells"]], fill = .data[[plot_by]])) +
      geom_boxplot(fill = "white") +
      geom_dotplot(binaxis ='y', stackdir = 'center', dotsize = dot_size) +
      scale_fill_manual(values = colors_use) +
      ggtitle("Fraction of Reads in Cells per Sample") +
      ylab('Fraction of Reads in Cells') +
      xlab(plot_by)+
      scale_y_continuous(labels = label_percent(accuracy = 1, scale = 1)) +
      theme_ggprism_mod()
  }

  if (isTRUE(x = x_lab_rotate)) {
    plot <- plot + theme_ggprism_mod(axis_text_angle = 45)
  }

  if (isTRUE(x = significance)) {
    ggpubr_check <- is_installed(pkg = "ggpubr")
    if (isFALSE(x = ggpubr_check)) {
      cli_abort(message = c(
        "Please install the {.val ggpubr} package to calculate/plot significance values.",
        "i" = "This can be accomplished with the following commands: ",
        "----------------------------------------",
        "{.field `install.packages({symbol$dquote_left}ggpubr{symbol$dquote_right})`}",
        "----------------------------------------"
      ))
    }

    if (length(x = unique(x = stats_dataframe[[plot_by]])) < 2) {
      cli_abort(message = "Cannot calculate statistics when {.val {plot_by}} column contains less than 2 groups.")
    }
    groups <- unique(x = stats_dataframe[[plot_by]])

    comparisons <- combn(groups, 2)
    comparisons <- data.frame(comparisons, stringsAsFactors = FALSE)
    comparisons <- lapply(1:length(x = colnames(x = comparisons)), function(x){
      indiv_comp <- as.character(x = comparisons[[x]])
    })
    plot <- plot + ggpubr::stat_compare_means(comparisons = comparisons, ...)
  }

  return(plot)
}


#' QC Plots Sequencing metrics (Alignment)
#'
#' Plot the fraction of reads confidently mapped to transcriptome
#'
#' @param metrics_dataframe data.frame  contain Cell Ranger QC Metrics (see \code{\link{Read_Metrics_10X}}).
#' @param plot_by Grouping factor for the plot.  Default is to plot as single group with single point per sample.
#' @param colors_use colors to use for plot if plotting by group.  Defaults to RColorBrewer Dark2 palette if
#' less than 8 groups and `DiscretePalette_scCustomize(palette = "polychrome")` if more than 8.
#' @param dot_size size of the dots plotted if `plot_by` is not `sample_id`  Default is 1.
#' @param x_lab_rotate logical.  Whether to rotate the axes labels on the x-axis.  Default is FALSE.
#' @param significance logical.  Whether to calculate and plot p-value comparisons when plotting by
#' grouping factor.  Default is FALSE.
#' @param ... Other variables to pass to `ggpubr::stat_compare_means` when doing significance testing.
#'
#' @return A ggplot object
#'
#' @import cli
#' @import ggplot2
#' @importFrom ggbeeswarm geom_quasirandom
#' @importFrom scales label_percent
#' @importFrom rlang is_installed
#' @importFrom utils combn
#'
#' @export
#'
#' @concept seq_qc_plotting_alignment
#'
#' @examples
#' \dontrun{
#' Seq_QC_Plot_Transcriptome(metrics_dataframe = metrics)
#' }
#'

Seq_QC_Plot_Transcriptome <- function(
  metrics_dataframe,
  plot_by = "sample_id",
  colors_use = NULL,
  dot_size = 1,
  x_lab_rotate = FALSE,
  significance = FALSE,
  ...
) {
  if (!plot_by %in% colnames(x = metrics_dataframe)) {
    cli_abort(message = "{.val {plot_by}} is not a column in the provided {.code metrics_dataframe}.")
  }

  # Change plot_by to character vector to make significance functions show all comparisons
  if (inherits(x = metrics_dataframe[[plot_by]], what = "factor")) {
    stats_dataframe <- metrics_dataframe
    stats_dataframe[[plot_by]] <- as.character(stats_dataframe[[plot_by]])
  } else {
    stats_dataframe <- metrics_dataframe
  }

  # Create color palette if null and check valid if provided
  length_plotby <- length(x = unique(x = metrics_dataframe[[plot_by]]))

  if (is.null(x = colors_use) && !plot_by == "sample_id") {
    if (length_plotby <= 8) {
      colors_use <- Dark2_Pal()
    } else {
      colors_use <- DiscretePalette_scCustomize(num_colors = length_plotby, palette = "polychrome")
    }
  } else {
    if (length(x = colors_use) < length_plotby && !plot_by == "sample_id") {
      cli_abort(message = c("Not enough colors provided.",
                            "i" = "The number of colors supplied: {.field {length(x = colors_use)}}, is less than the number of groups in {.val {plot_by}} column: {.field {length_plotby}}.")
      )
    } else {
      colors_use <- colors_use
    }
  }

  # Modify dataframe
  metrics_dataframe[,"Reads_Mapped_Confidently_to_Transcriptome"] <- as.numeric(gsub("%", "", metrics_dataframe[,"Reads_Mapped_Confidently_to_Transcriptome"]))


  if (plot_by == "sample_id") {
    metrics_dataframe$samples_plotting <- "Samples"

    plot <- ggplot(metrics_dataframe, aes(x = .data[["samples_plotting"]], y = .data[["Reads_Mapped_Confidently_to_Transcriptome"]]), color = .data[["samples_plotting"]]) +
      geom_boxplot(fill = "white", outlier.color = NA) +
      geom_quasirandom() +
      ggtitle("Percent of Reads Confidently Mapped to Transcriptome") +
      ylab('Percent of Reads ') +
      xlab("") +
      scale_y_continuous(labels = label_percent(accuracy = 0.01, scale = 1)) +
      theme_ggprism_mod()
  } else {
    plot <- ggplot(metrics_dataframe, aes(x=.data[[plot_by]], y = .data[["Reads_Mapped_Confidently_to_Transcriptome"]], fill = .data[[plot_by]])) +
      geom_boxplot(fill = "white") +
      geom_dotplot(binaxis ='y', stackdir = 'center', dotsize = dot_size) +
      scale_fill_manual(values = colors_use) +
      ggtitle("Percent of Reads Confidently Mapped to Transcriptome") +
      ylab('Percent of Reads') +
      xlab(plot_by) +
      scale_y_continuous(labels = label_percent(accuracy = 1, scale = 1)) +
      theme_ggprism_mod()
  }
  if (isTRUE(x = x_lab_rotate)) {
    plot <- plot + theme_ggprism_mod(axis_text_angle = 45)
  }

  if (isTRUE(x = significance)) {
    ggpubr_check <- is_installed(pkg = "ggpubr")
    if (isFALSE(x = ggpubr_check)) {
      cli_abort(message = c(
        "Please install the {.val ggpubr} package to calculate/plot significance values.",
        "i" = "This can be accomplished with the following commands: ",
        "----------------------------------------",
        "{.field `install.packages({symbol$dquote_left}ggpubr{symbol$dquote_right})`}",
        "----------------------------------------"
      ))
    }

    if (length(x = unique(x = stats_dataframe[[plot_by]])) < 2) {
      cli_abort(message = "Cannot calculate statistics when {.val {plot_by}} column contains less than 2 groups.")
    }
    groups <- unique(x = stats_dataframe[[plot_by]])

    comparisons <- combn(groups, 2)
    comparisons <- data.frame(comparisons, stringsAsFactors = FALSE)
    comparisons <- lapply(1:length(x = colnames(x = comparisons)), function(x){
      indiv_comp <- as.character(x = comparisons[[x]])
    })
    plot <- plot + ggpubr::stat_compare_means(comparisons = comparisons, ...)
  }

  return(plot)
}


#' QC Plots Sequencing metrics (Alignment)
#'
#' Plot the fraction of reads confidently mapped to genome
#'
#' @param metrics_dataframe data.frame  contain Cell Ranger QC Metrics (see \code{\link{Read_Metrics_10X}}).
#' @param plot_by Grouping factor for the plot.  Default is to plot as single group with single point per sample.
#' @param colors_use colors to use for plot if plotting by group.  Defaults to RColorBrewer Dark2 palette if
#' less than 8 groups and `DiscretePalette_scCustomize(palette = "polychrome")` if more than 8.
#' @param dot_size size of the dots plotted if `plot_by` is not `sample_id`  Default is 1.
#' @param x_lab_rotate logical.  Whether to rotate the axes labels on the x-axis.  Default is FALSE.
#' @param significance logical.  Whether to calculate and plot p-value comparisons when plotting by
#' grouping factor.  Default is FALSE.
#' @param ... Other variables to pass to `ggpubr::stat_compare_means` when doing significance testing.
#'
#' @return A ggplot object
#'
#' @import cli
#' @import ggplot2
#' @importFrom ggbeeswarm geom_quasirandom
#' @importFrom scales label_percent
#' @importFrom rlang is_installed
#' @importFrom utils combn
#'
#' @export
#'
#' @concept seq_qc_plotting_alignment
#'
#' @examples
#' \dontrun{
#' Seq_QC_Plot_Genome(metrics_dataframe = metrics)
#' }
#'

Seq_QC_Plot_Genome <- function(
  metrics_dataframe,
  plot_by = "sample_id",
  colors_use = NULL,
  dot_size = 1,
  x_lab_rotate = FALSE,
  significance = FALSE,
  ...
) {
  if (!plot_by %in% colnames(x = metrics_dataframe)) {
    cli_abort(message = "{.val {plot_by}} is not a column in the provided {.code metrics_dataframe}.")
  }

  # Change plot_by to character vector to make significance functions show all comparisons
  if (inherits(x = metrics_dataframe[[plot_by]], what = "factor")) {
    stats_dataframe <- metrics_dataframe
    stats_dataframe[[plot_by]] <- as.character(stats_dataframe[[plot_by]])
  } else {
    stats_dataframe <- metrics_dataframe
  }

  # Create color palette if null and check valid if provided
  length_plotby <- length(x = unique(x = metrics_dataframe[[plot_by]]))

  if (is.null(x = colors_use) && !plot_by == "sample_id") {
    if (length_plotby <= 8) {
      colors_use <- Dark2_Pal()
    } else {
      colors_use <- DiscretePalette_scCustomize(num_colors = length_plotby, palette = "polychrome")
    }
  } else {
    if (length(x = colors_use) < length_plotby && !plot_by == "sample_id") {
      cli_abort(message = c("Not enough colors provided.",
                            "i" = "The number of colors supplied: {.field {length(x = colors_use)}}, is less than the number of groups in {.val {plot_by}} column: {.field {length_plotby}}.")
      )
    } else {
      colors_use <- colors_use
    }
  }

  # Modify dataframe
  metrics_dataframe[,"Reads_Mapped_Confidently_to_Genome"] <- as.numeric(gsub("%", "", metrics_dataframe[,"Reads_Mapped_Confidently_to_Genome"]))


  if (plot_by == "sample_id") {
    metrics_dataframe$samples_plotting <- "Samples"

    plot <- ggplot(metrics_dataframe, aes(x = .data[["samples_plotting"]], y = .data[["Reads_Mapped_Confidently_to_Genome"]]), color = .data[["samples_plotting"]]) +
      geom_boxplot(fill = "white", outlier.color = NA) +
      geom_quasirandom() +
      ggtitle("Percent of Reads Confidently Mapped to Genome") +
      ylab('Percent of Reads ') +
      xlab("") +
      scale_y_continuous(labels = label_percent(accuracy = 0.01, scale = 1)) +
      theme_ggprism_mod()
  } else {
    plot <- ggplot(metrics_dataframe, aes(x=.data[[plot_by]], y = .data[["Reads_Mapped_Confidently_to_Genome"]], fill = .data[[plot_by]])) +
      geom_boxplot(fill = "white") +
      geom_dotplot(binaxis ='y', stackdir = 'center', dotsize = dot_size) +
      scale_fill_manual(values = colors_use) +
      ggtitle("Percent of Reads Confidently Mapped to Genome") +
      ylab('Percent of Reads') +
      xlab(plot_by) +
      scale_y_continuous(labels = label_percent(accuracy = 1, scale = 1)) +
      theme_ggprism_mod()
  }
  if (isTRUE(x = x_lab_rotate)) {
    plot <- plot + theme_ggprism_mod(axis_text_angle = 45)
  }

  if (isTRUE(x = significance)) {
    ggpubr_check <- is_installed(pkg = "ggpubr")
    if (isFALSE(x = ggpubr_check)) {
      cli_abort(message = c(
        "Please install the {.val ggpubr} package to calculate/plot significance values.",
        "i" = "This can be accomplished with the following commands: ",
        "----------------------------------------",
        "{.field `install.packages({symbol$dquote_left}ggpubr{symbol$dquote_right})`}",
        "----------------------------------------"
      ))
    }

    if (length(x = unique(x = stats_dataframe[[plot_by]])) < 2) {
      cli_abort(message = "Cannot calculate statistics when {.val {plot_by}} column contains less than 2 groups.")
    }
    groups <- unique(x = stats_dataframe[[plot_by]])

    comparisons <- combn(groups, 2)
    comparisons <- data.frame(comparisons, stringsAsFactors = FALSE)
    comparisons <- lapply(1:length(x = colnames(x = comparisons)), function(x){
      indiv_comp <- as.character(x = comparisons[[x]])
    })
    plot <- plot + ggpubr::stat_compare_means(comparisons = comparisons, ...)
  }

  return(plot)
}


#' QC Plots Sequencing metrics (Alignment)
#'
#' Plot the fraction of reads confidently mapped to intergenic regions
#'
#' @param metrics_dataframe data.frame  contain Cell Ranger QC Metrics (see \code{\link{Read_Metrics_10X}}).
#' @param plot_by Grouping factor for the plot.  Default is to plot as single group with single point per sample.
#' @param colors_use colors to use for plot if plotting by group.  Defaults to RColorBrewer Dark2 palette if
#' less than 8 groups and `DiscretePalette_scCustomize(palette = "polychrome")` if more than 8.
#' @param dot_size size of the dots plotted if `plot_by` is not `sample_id`  Default is 1.
#' @param x_lab_rotate logical.  Whether to rotate the axes labels on the x-axis.  Default is FALSE.
#' @param significance logical.  Whether to calculate and plot p-value comparisons when plotting by
#' grouping factor.  Default is FALSE.
#' @param ... Other variables to pass to `ggpubr::stat_compare_means` when doing significance testing.
#'
#' @return A ggplot object
#'
#' @import cli
#' @import ggplot2
#' @importFrom ggbeeswarm geom_quasirandom
#' @importFrom scales label_percent
#' @importFrom rlang is_installed
#' @importFrom utils combn
#'
#' @export
#'
#' @concept seq_qc_plotting_alignment
#'
#' @examples
#' \dontrun{
#' Seq_QC_Plot_Intergeneic(metrics_dataframe = metrics)
#' }
#'

Seq_QC_Plot_Intergenic <- function(
  metrics_dataframe,
  plot_by = "sample_id",
  colors_use = NULL,
  dot_size = 1,
  x_lab_rotate = FALSE,
  significance = FALSE,
  ...
) {
  if (!plot_by %in% colnames(x = metrics_dataframe)) {
    cli_abort(message = "{.val {plot_by}} is not a column in the provided {.code metrics_dataframe}.")
  }

  # Change plot_by to character vector to make significance functions show all comparisons
  if (inherits(x = metrics_dataframe[[plot_by]], what = "factor")) {
    stats_dataframe <- metrics_dataframe
    stats_dataframe[[plot_by]] <- as.character(stats_dataframe[[plot_by]])
  } else {
    stats_dataframe <- metrics_dataframe
  }

  # Create color palette if null and check valid if provided
  length_plotby <- length(x = unique(x = metrics_dataframe[[plot_by]]))

  if (is.null(x = colors_use) && !plot_by == "sample_id") {
    if (length_plotby <= 8) {
      colors_use <- Dark2_Pal()
    } else {
      colors_use <- DiscretePalette_scCustomize(num_colors = length_plotby, palette = "polychrome")
    }
  } else {
    if (length(x = colors_use) < length_plotby && !plot_by == "sample_id") {
      cli_abort(message = c("Not enough colors provided.",
                            "i" = "The number of colors supplied: {.field {length(x = colors_use)}}, is less than the number of groups in {.val {plot_by}} column: {.field {length_plotby}}.")
      )
    } else {
      colors_use <- colors_use
    }
  }

  # Modify dataframe
  metrics_dataframe[,"Reads_Mapped_Confidently_to_Intergenic_Regions"] <- as.numeric(gsub("%", "", metrics_dataframe[,"Reads_Mapped_Confidently_to_Intergenic_Regions"]))


  if (plot_by == "sample_id") {
    metrics_dataframe$samples_plotting <- "Samples"

    plot <- ggplot(metrics_dataframe, aes(x = .data[["samples_plotting"]], y = .data[["Reads_Mapped_Confidently_to_Intergenic_Regions"]]), color = .data[["samples_plotting"]]) +
      geom_boxplot(fill = "white", outlier.color = NA) +
      geom_quasirandom() +
      ggtitle("Percent of Reads Confidently Mapped to Intergenic Regions") +
      ylab('Percent of Reads ') +
      xlab("") +
      scale_y_continuous(labels = label_percent(accuracy = 0.01, scale = 1)) +
      theme_ggprism_mod()
  } else {
    plot <- ggplot(metrics_dataframe, aes(x=.data[[plot_by]], y = .data[["Reads_Mapped_Confidently_to_Intergenic_Regions"]], fill = .data[[plot_by]])) +
      geom_boxplot(fill = "white") +
      geom_dotplot(binaxis ='y', stackdir = 'center', dotsize = dot_size) +
      scale_fill_manual(values = colors_use) +
      ggtitle("Percent of Reads Confidently Mapped to Intergenic Regions") +
      ylab('Percent of Reads') +
      xlab(plot_by) +
      scale_y_continuous(labels = label_percent(accuracy = 1, scale = 1)) +
      theme_ggprism_mod()
  }
  if (isTRUE(x = x_lab_rotate)) {
    plot <- plot + theme_ggprism_mod(axis_text_angle = 45)
  }

  if (isTRUE(x = significance)) {
    ggpubr_check <- is_installed(pkg = "ggpubr")
    if (isFALSE(x = ggpubr_check)) {
      cli_abort(message = c(
        "Please install the {.val ggpubr} package to calculate/plot significance values.",
        "i" = "This can be accomplished with the following commands: ",
        "----------------------------------------",
        "{.field `install.packages({symbol$dquote_left}ggpubr{symbol$dquote_right})`}",
        "----------------------------------------"
      ))
    }

    if (length(x = unique(x = stats_dataframe[[plot_by]])) < 2) {
      cli_abort(message = "Cannot calculate statistics when {.val {plot_by}} column contains less than 2 groups.")
    }
    groups <- unique(x = stats_dataframe[[plot_by]])

    comparisons <- combn(groups, 2)
    comparisons <- data.frame(comparisons, stringsAsFactors = FALSE)
    comparisons <- lapply(1:length(x = colnames(x = comparisons)), function(x){
      indiv_comp <- as.character(x = comparisons[[x]])
    })
    plot <- plot + ggpubr::stat_compare_means(comparisons = comparisons, ...)
  }

  return(plot)
}


#' QC Plots Sequencing metrics (Alignment)
#'
#' Plot the fraction of reads confidently mapped to intronic regions
#'
#' @param metrics_dataframe data.frame  contain Cell Ranger QC Metrics (see \code{\link{Read_Metrics_10X}}).
#' @param plot_by Grouping factor for the plot.  Default is to plot as single group with single point per sample.
#' @param colors_use colors to use for plot if plotting by group.  Defaults to RColorBrewer Dark2 palette if
#' less than 8 groups and `DiscretePalette_scCustomize(palette = "polychrome")` if more than 8.
#' @param dot_size size of the dots plotted if `plot_by` is not `sample_id`  Default is 1.
#' @param x_lab_rotate logical.  Whether to rotate the axes labels on the x-axis.  Default is FALSE.
#' @param significance logical.  Whether to calculate and plot p-value comparisons when plotting by
#' grouping factor.  Default is FALSE.
#' @param ... Other variables to pass to `ggpubr::stat_compare_means` when doing significance testing.
#'
#' @return A ggplot object
#'
#' @import cli
#' @import ggplot2
#' @importFrom ggbeeswarm geom_quasirandom
#' @importFrom scales label_percent
#' @importFrom rlang is_installed
#' @importFrom utils combn
#'
#' @export
#'
#' @concept seq_qc_plotting_alignment
#'
#' @examples
#' \dontrun{
#' Seq_QC_Plot_Intronic(metrics_dataframe = metrics)
#' }
#'

Seq_QC_Plot_Intronic <- function(
  metrics_dataframe,
  plot_by = "sample_id",
  colors_use = NULL,
  dot_size = 1,
  x_lab_rotate = FALSE,
  significance = FALSE,
  ...
) {
  if (!plot_by %in% colnames(x = metrics_dataframe)) {
    cli_abort(message = "{.val {plot_by}} is not a column in the provided {.code metrics_dataframe}.")
  }

  # Change plot_by to character vector to make significance functions show all comparisons
  if (inherits(x = metrics_dataframe[[plot_by]], what = "factor")) {
    stats_dataframe <- metrics_dataframe
    stats_dataframe[[plot_by]] <- as.character(stats_dataframe[[plot_by]])
  } else {
    stats_dataframe <- metrics_dataframe
  }

  # Create color palette if null and check valid if provided
  length_plotby <- length(x = unique(x = metrics_dataframe[[plot_by]]))

  if (is.null(x = colors_use) && !plot_by == "sample_id") {
    if (length_plotby <= 8) {
      colors_use <- Dark2_Pal()
    } else {
      colors_use <- DiscretePalette_scCustomize(num_colors = length_plotby, palette = "polychrome")
    }
  } else {
    if (length(x = colors_use) < length_plotby && !plot_by == "sample_id") {
      cli_abort(message = c("Not enough colors provided.",
                            "i" = "The number of colors supplied: {.field {length(x = colors_use)}}, is less than the number of groups in {.val {plot_by}} column: {.field {length_plotby}}.")
      )
    } else {
      colors_use <- colors_use
    }
  }

  # Modify dataframe
  metrics_dataframe[,"Reads_Mapped_Confidently_to_Intronic_Regions"] <- as.numeric(gsub("%", "", metrics_dataframe[,"Reads_Mapped_Confidently_to_Intronic_Regions"]))


  if (plot_by == "sample_id") {
    metrics_dataframe$samples_plotting <- "Samples"

    plot <- ggplot(metrics_dataframe, aes(x = .data[["samples_plotting"]], y = .data[["Reads_Mapped_Confidently_to_Intronic_Regions"]]), color = .data[["samples_plotting"]]) +
      geom_boxplot(fill = "white", outlier.color = NA) +
      geom_quasirandom() +
      ggtitle("Percent of Reads Confidently Mapped to Intronic Regions") +
      ylab('Percent of Reads ') +
      xlab("") +
      scale_y_continuous(labels = label_percent(accuracy = 0.01, scale = 1)) +
      theme_ggprism_mod()
  } else {
    plot <- ggplot(metrics_dataframe, aes(x=.data[[plot_by]], y = .data[["Reads_Mapped_Confidently_to_Intronic_Regions"]], fill = .data[[plot_by]])) +
      geom_boxplot(fill = "white") +
      geom_dotplot(binaxis ='y', stackdir = 'center', dotsize = dot_size) +
      scale_fill_manual(values = colors_use) +
      ggtitle("Percent of Reads Confidently Mapped to Intronic Regions") +
      ylab('Percent of Reads') +
      xlab(plot_by) +
      scale_y_continuous(labels = label_percent(accuracy = 1, scale = 1)) +
      theme_ggprism_mod()
  }
  if (isTRUE(x = x_lab_rotate)) {
    plot <- plot + theme_ggprism_mod(axis_text_angle = 45)
  }

  if (isTRUE(x = significance)) {
    ggpubr_check <- is_installed(pkg = "ggpubr")
    if (isFALSE(x = ggpubr_check)) {
      cli_abort(message = c(
        "Please install the {.val ggpubr} package to calculate/plot significance values.",
        "i" = "This can be accomplished with the following commands: ",
        "----------------------------------------",
        "{.field `install.packages({symbol$dquote_left}ggpubr{symbol$dquote_right})`}",
        "----------------------------------------"
      ))
    }

    if (length(x = unique(x = stats_dataframe[[plot_by]])) < 2) {
      cli_abort(message = "Cannot calculate statistics when {.val {plot_by}} column contains less than 2 groups.")
    }
    groups <- unique(x = stats_dataframe[[plot_by]])

    comparisons <- combn(groups, 2)
    comparisons <- data.frame(comparisons, stringsAsFactors = FALSE)
    comparisons <- lapply(1:length(x = colnames(x = comparisons)), function(x){
      indiv_comp <- as.character(x = comparisons[[x]])
    })
    plot <- plot + ggpubr::stat_compare_means(comparisons = comparisons, ...)
  }

  return(plot)
}


#' QC Plots Sequencing metrics (Alignment)
#'
#' Plot the fraction of reads confidently mapped to Exonic regions
#'
#' @param metrics_dataframe data.frame  contain Cell Ranger QC Metrics (see \code{\link{Read_Metrics_10X}}).
#' @param plot_by Grouping factor for the plot.  Default is to plot as single group with single point per sample.
#' @param colors_use colors to use for plot if plotting by group.  Defaults to RColorBrewer Dark2 palette if
#' less than 8 groups and `DiscretePalette_scCustomize(palette = "polychrome")` if more than 8.
#' @param dot_size size of the dots plotted if `plot_by` is not `sample_id`  Default is 1.
#' @param x_lab_rotate logical.  Whether to rotate the axes labels on the x-axis.  Default is FALSE.
#' @param significance logical.  Whether to calculate and plot p-value comparisons when plotting by
#' grouping factor.  Default is FALSE.
#' @param ... Other variables to pass to `ggpubr::stat_compare_means` when doing significance testing.
#'
#' @return A ggplot object
#'
#' @import cli
#' @import ggplot2
#' @importFrom ggbeeswarm geom_quasirandom
#' @importFrom scales label_percent
#' @importFrom rlang is_installed
#' @importFrom utils combn
#'
#' @export
#'
#' @concept seq_qc_plotting_alignment
#'
#' @examples
#' \dontrun{
#' Seq_QC_Plot_Exonic(metrics_dataframe = metrics)
#' }
#'

Seq_QC_Plot_Exonic <- function(
  metrics_dataframe,
  plot_by = "sample_id",
  colors_use = NULL,
  dot_size = 1,
  x_lab_rotate = FALSE,
  significance = FALSE,
  ...
) {
  if (!plot_by %in% colnames(x = metrics_dataframe)) {
    cli_abort(message = "{.val {plot_by}} is not a column in the provided {.code metrics_dataframe}.")
  }

  # Change plot_by to character vector to make significance functions show all comparisons
  if (inherits(x = metrics_dataframe[[plot_by]], what = "factor")) {
    stats_dataframe <- metrics_dataframe
    stats_dataframe[[plot_by]] <- as.character(stats_dataframe[[plot_by]])
  } else {
    stats_dataframe <- metrics_dataframe
  }

  # Create color palette if null and check valid if provided
  length_plotby <- length(x = unique(x = metrics_dataframe[[plot_by]]))

  if (is.null(x = colors_use) && !plot_by == "sample_id") {
    if (length_plotby <= 8) {
      colors_use <- Dark2_Pal()
    } else {
      colors_use <- DiscretePalette_scCustomize(num_colors = length_plotby, palette = "polychrome")
    }
  } else {
    if (length(x = colors_use) < length_plotby && !plot_by == "sample_id") {
      cli_abort(message = c("Not enough colors provided.",
                            "i" = "The number of colors supplied: {.field {length(x = colors_use)}}, is less than the number of groups in {.val {plot_by}} column: {.field {length_plotby}}.")
      )
    } else {
      colors_use <- colors_use
    }
  }

  # Modify dataframe
  metrics_dataframe[,"Reads_Mapped_Confidently_to_Exonic_Regions"] <- as.numeric(gsub("%", "", metrics_dataframe[,"Reads_Mapped_Confidently_to_Exonic_Regions"]))


  if (plot_by == "sample_id") {
    metrics_dataframe$samples_plotting <- "Samples"

    plot <- ggplot(metrics_dataframe, aes(x = .data[["samples_plotting"]], y = .data[["Reads_Mapped_Confidently_to_Exonic_Regions"]]), color = .data[["samples_plotting"]]) +
      geom_boxplot(fill = "white", outlier.color = NA) +
      geom_quasirandom() +
      ggtitle("Percent of Reads Confidently Mapped to Exonic Regions") +
      ylab('Percent of Reads ') +
      xlab("") +
      scale_y_continuous(labels = label_percent(accuracy = 0.01, scale = 1)) +
      theme_ggprism_mod()
  } else {
    plot <- ggplot(metrics_dataframe, aes(x=.data[[plot_by]], y = .data[["Reads_Mapped_Confidently_to_Exonic_Regions"]], fill = .data[[plot_by]])) +
      geom_boxplot(fill = "white") +
      geom_dotplot(binaxis ='y', stackdir = 'center', dotsize = dot_size) +
      scale_fill_manual(values = colors_use) +
      ggtitle("Percent of Reads Confidently Mapped to Exonic Regions") +
      ylab('Percent of Reads') +
      xlab(plot_by) +
      scale_y_continuous(labels = label_percent(accuracy = 1, scale = 1)) +
      theme_ggprism_mod()
  }
  if (isTRUE(x = x_lab_rotate)) {
    plot <- plot + theme_ggprism_mod(axis_text_angle = 45)
  }

  if (isTRUE(x = significance)) {
    ggpubr_check <- is_installed(pkg = "ggpubr")
    if (isFALSE(x = ggpubr_check)) {
      cli_abort(message = c(
        "Please install the {.val ggpubr} package to calculate/plot significance values.",
        "i" = "This can be accomplished with the following commands: ",
        "----------------------------------------",
        "{.field `install.packages({symbol$dquote_left}ggpubr{symbol$dquote_right})`}",
        "----------------------------------------"
      ))
    }

    if (length(x = unique(x = stats_dataframe[[plot_by]])) < 2) {
      cli_abort(message = "Cannot calculate statistics when {.val {plot_by}} column contains less than 2 groups.")
    }
    groups <- unique(x = stats_dataframe[[plot_by]])

    comparisons <- combn(groups, 2)
    comparisons <- data.frame(comparisons, stringsAsFactors = FALSE)
    comparisons <- lapply(1:length(x = colnames(x = comparisons)), function(x){
      indiv_comp <- as.character(x = comparisons[[x]])
    })
    plot <- plot + ggpubr::stat_compare_means(comparisons = comparisons, ...)
  }

  return(plot)
}


#' QC Plots Sequencing metrics (Alignment)
#'
#' Plot the fraction of reads mapped Antisense to Gene
#'
#' @param metrics_dataframe data.frame  contain Cell Ranger QC Metrics (see \code{\link{Read_Metrics_10X}}).
#' @param plot_by Grouping factor for the plot.  Default is to plot as single group with single point per sample.
#' @param colors_use colors to use for plot if plotting by group.  Defaults to RColorBrewer Dark2 palette if
#' less than 8 groups and `DiscretePalette_scCustomize(palette = "polychrome")` if more than 8.
#' @param dot_size size of the dots plotted if `plot_by` is not `sample_id`  Default is 1.
#' @param x_lab_rotate logical.  Whether to rotate the axes labels on the x-axis.  Default is FALSE.
#' @param significance logical.  Whether to calculate and plot p-value comparisons when plotting by
#' grouping factor.  Default is FALSE.
#' @param ... Other variables to pass to `ggpubr::stat_compare_means` when doing significance testing.
#'
#' @return A ggplot object
#'
#' @import cli
#' @import ggplot2
#' @importFrom ggbeeswarm geom_quasirandom
#' @importFrom scales label_percent
#' @importFrom rlang is_installed
#' @importFrom utils combn
#'
#' @export
#'
#' @concept seq_qc_plotting_alignment
#'
#' @examples
#' \dontrun{
#' Seq_QC_Plot_Antisense(metrics_dataframe = metrics)
#' }
#'

Seq_QC_Plot_Antisense <- function(
  metrics_dataframe,
  plot_by = "sample_id",
  colors_use = NULL,
  dot_size = 1,
  x_lab_rotate = FALSE,
  significance = FALSE,
  ...
) {
  if (!plot_by %in% colnames(x = metrics_dataframe)) {
    cli_abort(message = "{.val {plot_by}} is not a column in the provided {.code metrics_dataframe}.")
  }

  # Change plot_by to character vector to make significance functions show all comparisons
  if (inherits(x = metrics_dataframe[[plot_by]], what = "factor")) {
    stats_dataframe <- metrics_dataframe
    stats_dataframe[[plot_by]] <- as.character(stats_dataframe[[plot_by]])
  } else {
    stats_dataframe <- metrics_dataframe
  }

  # Create color palette if null and check valid if provided
  length_plotby <- length(x = unique(x = metrics_dataframe[[plot_by]]))

  if (is.null(x = colors_use) && !plot_by == "sample_id") {
    if (length_plotby <= 8) {
      colors_use <- Dark2_Pal()
    } else {
      colors_use <- DiscretePalette_scCustomize(num_colors = length_plotby, palette = "polychrome")
    }
  } else {
    if (length(x = colors_use) < length_plotby && !plot_by == "sample_id") {
      cli_abort(message = c("Not enough colors provided.",
                            "i" = "The number of colors supplied: {.field {length(x = colors_use)}}, is less than the number of groups in {.val {plot_by}} column: {.field {length_plotby}}.")
      )
    } else {
      colors_use <- colors_use
    }
  }

  # Modify dataframe
  metrics_dataframe[,"Reads_Mapped_Antisense_to_Gene"] <- as.numeric(gsub("%", "", metrics_dataframe[,"Reads_Mapped_Antisense_to_Gene"]))


  if (plot_by == "sample_id") {
    metrics_dataframe$samples_plotting <- "Samples"

    plot <- ggplot(metrics_dataframe, aes(x = .data[["samples_plotting"]], y = .data[["Reads_Mapped_Antisense_to_Gene"]]), color = .data[["samples_plotting"]]) +
      geom_boxplot(fill = "white", outlier.color = NA) +
      geom_quasirandom() +
      ggtitle("Percent of Reads Confidently Mapped to Antisense to Gene") +
      ylab('Percent of Reads ') +
      xlab("") +
      scale_y_continuous(labels = label_percent(accuracy = 0.01, scale = 1)) +
      theme_ggprism_mod()
  } else {
    plot <- ggplot(metrics_dataframe, aes(x=.data[[plot_by]], y = .data[["Reads_Mapped_Antisense_to_Gene"]], fill = .data[[plot_by]])) +
      geom_boxplot(fill = "white") +
      geom_dotplot(binaxis ='y', stackdir = 'center', dotsize = dot_size) +
      scale_fill_manual(values = colors_use) +
      ggtitle("Percent of Reads Confidently Mapped to Antisense to Gene") +
      ylab('Percent of Reads') +
      xlab(plot_by) +
      scale_y_continuous(labels = label_percent(accuracy = 1, scale = 1)) +
      theme_ggprism_mod()
  }
  if (isTRUE(x = x_lab_rotate)) {
    plot <- plot + theme_ggprism_mod(axis_text_angle = 45)
  }

  if (isTRUE(x = significance)) {
    ggpubr_check <- is_installed(pkg = "ggpubr")
    if (isFALSE(x = ggpubr_check)) {
      cli_abort(message = c(
        "Please install the {.val ggpubr} package to calculate/plot significance values.",
        "i" = "This can be accomplished with the following commands: ",
        "----------------------------------------",
        "{.field `install.packages({symbol$dquote_left}ggpubr{symbol$dquote_right})`}",
        "----------------------------------------"
      ))
    }

    if (length(x = unique(x = stats_dataframe[[plot_by]])) < 2) {
      cli_abort(message = "Cannot calculate statistics when {.val {plot_by}} column contains less than 2 groups.")
    }
    groups <- unique(x = stats_dataframe[[plot_by]])

    comparisons <- combn(groups, 2)
    comparisons <- data.frame(comparisons, stringsAsFactors = FALSE)
    comparisons <- lapply(1:length(x = colnames(x = comparisons)), function(x){
      indiv_comp <- as.character(x = comparisons[[x]])
    })
    plot <- plot + ggpubr::stat_compare_means(comparisons = comparisons, ...)
  }

  return(plot)
}


#' QC Plots Sequencing metrics (Layout)
#'
#' Plot a combined plot of the basic QC metrics from sequencing output.
#'
#' @param metrics_dataframe data.frame  contain Cell Ranger QC Metrics (see \code{\link{Read_Metrics_10X}}).
#' @param plot_by Grouping factor for the plot.  Default is to plot as single group with single point per sample.
#' @param colors_use colors to use for plot if plotting by group.  Defaults to RColorBrewer Dark2 palette if
#' less than 8 groups and `DiscretePalette_scCustomize(palette = "polychrome")` if more than 8.
#' @param dot_size size of the dots plotted if `plot_by` is not `sample_id`  Default is 1.
#' @param x_lab_rotate logical.  Whether to rotate the axes labels on the x-axis.  Default is FALSE.
#' @param patchwork_title Title to use for the patchworked plot output.
#' @param significance logical.  Whether to calculate and plot p-value comparisons when plotting by
#' grouping factor.  Default is FALSE.
#' @param ... Other variables to pass to `ggpubr::stat_compare_means` when doing significance testing.
#'
#' @return A ggplot object
#'
#' @import ggplot2
#' @importFrom patchwork plot_layout plot_annotation
#' @importFrom rlang is_installed
#' @importFrom stringr str_wrap
#'
#' @export
#'
#' @concept seq_qc_plotting_layout
#'
#' @examples
#' \dontrun{
#' Seq_QC_Plot_Basic_Combined(metrics_dataframe = metrics)
#' }
#'

Seq_QC_Plot_Basic_Combined <- function(
  metrics_dataframe,
  plot_by = "sample_id",
  colors_use = NULL,
  dot_size = 1,
  x_lab_rotate = FALSE,
  patchwork_title = "Sequencing QC Plots: Basic Cell Metrics",
  significance = FALSE,
  ...
) {
  # Create rotated axis value
  if (isTRUE(x = x_lab_rotate)) {
    axis_angle <- 45
  } else {
    axis_angle <- 0
  }

  # Create Plots & modify for plotting together
  p1 <- Seq_QC_Plot_Number_Cells(metrics_dataframe = metrics_dataframe, plot_by = plot_by, colors_use = colors_use, significance = significance, dot_size = dot_size,)
  p1 <- p1 +
    labs(title = str_wrap(p1$labels$title, 18)) +
    theme_ggprism_mod(base_size = 10, axis_text_angle = axis_angle)

  p2 <- Seq_QC_Plot_Reads_per_Cell(metrics_dataframe = metrics_dataframe, plot_by = plot_by, colors_use = colors_use, significance = significance, dot_size = dot_size,)
  p2 <- p2 + labs(title = str_wrap(p2$labels$title, 18)) +
    theme_ggprism_mod(base_size = 10, axis_text_angle = axis_angle)

  p3 <- Seq_QC_Plot_Genes(metrics_dataframe = metrics_dataframe, plot_by = plot_by, colors_use = colors_use, significance = significance, dot_size = dot_size,)
  p3 <- p3 + labs(title = str_wrap(p3$labels$title, 18)) +
    theme_ggprism_mod(base_size = 10, axis_text_angle = axis_angle)

  p4 <- Seq_QC_Plot_UMIs(metrics_dataframe = metrics_dataframe, plot_by = plot_by, colors_use = colors_use, significance = significance, dot_size = dot_size,)
  p4 <- p4 + labs(title = str_wrap(p4$labels$title, 18)) +
    theme_ggprism_mod(base_size = 10, axis_text_angle = axis_angle)

  p5 <- Seq_QC_Plot_Total_Genes(metrics_dataframe = metrics_dataframe, plot_by = plot_by, colors_use = colors_use, significance = significance, dot_size = dot_size,)
  p5 <- p5 + labs(title = str_wrap(p5$labels$title, 18)) +
    theme_ggprism_mod(base_size = 10, axis_text_angle = axis_angle)

  p6 <- Seq_QC_Plot_Saturation(metrics_dataframe = metrics_dataframe, plot_by = plot_by, colors_use = colors_use, significance = significance, dot_size = dot_size,)
  p6 <- p6 + labs(title = str_wrap(p6$labels$title, 18)) +
    theme_ggprism_mod(base_size = 10, axis_text_angle = axis_angle)

  p7 <- Seq_QC_Plot_Reads_in_Cells(metrics_dataframe = metrics_dataframe, plot_by = plot_by, colors_use = colors_use, significance = significance, dot_size = dot_size,)
  p7 <- p7 + labs(title = str_wrap(p7$labels$title, 18)) +
    theme_ggprism_mod(base_size = 10, axis_text_angle = axis_angle)

  p8 <- Seq_QC_Plot_Transcriptome(metrics_dataframe = metrics_dataframe, plot_by = plot_by, colors_use = colors_use, significance = significance, dot_size = dot_size,)
  p8 <- p8 + labs(title = str_wrap(p8$labels$title, 18)) +
    theme_ggprism_mod(base_size = 10, axis_text_angle = axis_angle)

  # Assemble plots and unifying legends
  plot <- (p1 | p3 | p5 | p7) /
    (p2 | p4 | p6 | p8)
  plot <- plot + plot_layout(guides = 'collect') + plot_annotation(title = patchwork_title, theme = theme(plot.title = element_text(hjust = 0.5, face = "bold", size = rel(1.5))))

  # Print plots
  suppressMessages(print(plot))
}


#' QC Plots Sequencing metrics (Alignment) (Layout)
#'
#' Plot a combined plot of the Alignment QC metrics from sequencing output.
#'
#' @param metrics_dataframe data.frame  contain Cell Ranger QC Metrics (see \code{\link{Read_Metrics_10X}}).
#' @param plot_by Grouping factor for the plot.  Default is to plot as single group with single point per sample.
#' @param colors_use colors to use for plot if plotting by group.  Defaults to RColorBrewer Dark2 palette if
#' less than 8 groups and `DiscretePalette_scCustomize(palette = "polychrome")` if more than 8.
#' @param dot_size size of the dots plotted if `plot_by` is not `sample_id`  Default is 1.
#' @param x_lab_rotate logical.  Whether to rotate the axes labels on the x-axis.  Default is FALSE.
#' @param patchwork_title Title to use for the patchworked plot output.
#' @param significance logical.  Whether to calculate and plot p-value comparisons when plotting by
#' grouping factor.  Default is FALSE.
#' @param ... Other variables to pass to `ggpubr::stat_compare_means` when doing significance testing.
#'
#' @return A ggplot object
#'
#' @import ggplot2
#' @importFrom patchwork plot_layout plot_annotation
#' @importFrom rlang is_installed
#' @importFrom stringr str_wrap
#'
#' @export
#'
#' @concept seq_qc_plotting_layout
#'
#' @examples
#' \dontrun{
#' Seq_QC_Plot_Alignment_Combined(metrics_dataframe = metrics)
#' }
#'

Seq_QC_Plot_Alignment_Combined <- function(
  metrics_dataframe,
  plot_by = "sample_id",
  colors_use = NULL,
  dot_size = 1,
  x_lab_rotate = FALSE,
  patchwork_title = "Sequencing QC Plots: Read Alignment Metrics",
  significance = FALSE,
  ...
) {
  # Create rotated axis value
  if (isTRUE(x = x_lab_rotate)) {
    axis_angle <- 45
  } else {
    axis_angle <- 0
  }

  # Create Plots & modify for plotting together
  p1 <- Seq_QC_Plot_Genome(metrics_dataframe = metrics_dataframe, plot_by = plot_by, colors_use = colors_use, significance = significance, dot_size = dot_size, ...)
  p1 <- p1 +
    labs(title = str_wrap(p1$labels$title, 18)) +
    theme_ggprism_mod(base_size = 10, axis_text_angle = axis_angle)

  p2 <- Seq_QC_Plot_Intergenic(metrics_dataframe = metrics_dataframe, plot_by = plot_by, colors_use = colors_use, significance = significance, dot_size = dot_size,)
  p2 <- p2 + labs(title = str_wrap(p2$labels$title, 18)) +
    theme_ggprism_mod(base_size = 10, axis_text_angle = axis_angle)

  p3 <- Seq_QC_Plot_Transcriptome(metrics_dataframe = metrics_dataframe, plot_by = plot_by, colors_use = colors_use, significance = significance, dot_size = dot_size,)
  p3 <- p3 + labs(title = str_wrap(p3$labels$title, 18)) +
    theme_ggprism_mod(base_size = 10, axis_text_angle = axis_angle)

  p4 <- Seq_QC_Plot_Exonic(metrics_dataframe = metrics_dataframe, plot_by = plot_by, colors_use = colors_use, significance = significance, dot_size = dot_size,)
  p4 <- p4 + labs(title = str_wrap(p4$labels$title, 18)) +
    theme_ggprism_mod(base_size = 10, axis_text_angle = axis_angle)

  p5 <- Seq_QC_Plot_Intronic(metrics_dataframe = metrics_dataframe, plot_by = plot_by, colors_use = colors_use, significance = significance, dot_size = dot_size,)
  p5 <- p5 + labs(title = str_wrap(p5$labels$title, 18)) +
    theme_ggprism_mod(base_size = 10, axis_text_angle = axis_angle)

  p6 <- Seq_QC_Plot_Antisense(metrics_dataframe = metrics_dataframe, plot_by = plot_by, colors_use = colors_use, significance = significance, dot_size = dot_size,)
  p6 <- p6 + labs(title = str_wrap(p6$labels$title, 18)) +
    theme_ggprism_mod(base_size = 10, axis_text_angle = axis_angle)

  # Assemble plots and unifying legends
  plot <- (p1 | p3 | p5) /
    (p2 | p4 | p6)
  plot <- plot + plot_layout(guides = 'collect') + plot_annotation(title = patchwork_title, theme = theme(plot.title = element_text(hjust = 0.5, face = "bold", size = rel(1.5))))

  # Print plots
  suppressMessages(print(plot))
}


#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#################### Barcode Rank QC ####################
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


#' Create Barcode Rank Plot
#'
#' Plot UMI vs. Barcode Rank with inflection and knee.  Requires input from DropletUtils package.
#'
#' @param br_out DFrame output from \code{\link[DropletUtils]{barcodeRanks}}.
#' @param pt.size point size for plotting, default is 6.
#' @param plot_title Title for plot, default is "Barcode Ranks".
#' @param raster_dpi Pixel resolution for rasterized plots, passed to geom_scattermore().
#' Default is c(1024, 1024).
#' @param plateau numerical value at which to add vertical line designating estimated
#' empty droplet plateau (default is NULL).
#'
#' @return A ggplot object
#'
#' @import cli
#' @import ggplot2
#' @importFrom scattermore geom_scattermore
#' @importFrom cowplot theme_cowplot
#'
#' @export
#'
#' @concept seq_qc_plotting_basic
#'
#' @examples
#' \dontrun{
#' mat <- Read10X_h5(filename = "raw_feature_bc_matrix.h5")
#'
#' br_results <- DropletUtils::barcodeRanks(mat)
#'
#' Barcode_Plot(br_out = br_results)
#' }
#'

Barcode_Plot <- function(
    br_out,
    pt.size = 6,
    plot_title = "Barcode Ranks",
    raster_dpi = c(1024, 1024),
    plateau = NULL
) {
  # Check br_out is correct
  if (!inherits(x = br_out, what = "DFrame")) {
    cli_abort(message = c("{.code br_out} must be object of class {.field DFrame}.",
                          "i" = "Ensure {.code br_out} is output of {.code {.field DropletUtils::barcodeRanks}}."))
  }

  if (!all(c("knee", "inflection") %in% names(x = br_out@metadata)) && !all(c("rank", "total", "fitted") %in% names(x = br_out@listData))) {
    cli_abort(message = c("{.code br_out} appears to be missing necessarily information.",
                          "i" = "Ensure {.code br_out} is output of {.code {.field DropletUtils::barcodeRanks}} and no errors occured when running code."))
  }

  plot <- ggplot(data = data.frame(br_out@listData), aes(x = .data[["rank"]], y = .data[["total"]])) +
    geom_scattermore(pointsize = pt.size, pixels = raster_dpi) +
    scale_y_log10() +
    scale_x_log10() +
    theme_cowplot() +
    geom_line(mapping = aes(x = .data[["rank"]], y = .data[["fitted"]], color = "red"), show.legend = FALSE) +
    geom_hline(yintercept = br_out@metadata$knee, linetype = "dashed", color = "dodgerblue") +
    geom_hline(yintercept = br_out@metadata$inflection, linetype = "dashed", color = "forestgreen") +
    annotate("text", x = 1, y = br_out@metadata$knee, label = paste0("Knee (", br_out@metadata$knee, ")"), vjust = -0.5, hjust = 0) +
    annotate("text", x = 1, y = br_out@metadata$inflection, label = paste0("Inflection (", br_out@metadata$inflection, ")"), vjust = -0.5, hjust = 0) +
    ylab("UMIs") +
    xlab("Barcode Rank") +
    ggtitle(plot_title) +
    theme(plot.title = element_text(hjust = 0.5))

  # Add plateau if specified
  if (!is.null(x = plateau)) {
    plot <- plot +
      geom_vline(xintercept = plateau, linetype = "dashed", color = "dodgerblue") +
      annotate("text", x = plateau, y = max(br_out$total), label = paste0("Plateau (", plateau, ")"), vjust = -0.5, hjust = -0.05)
  }

  # return plot
  return(plot)
}


#' Iterative Barcode Rank Plots
#'
#' Read data, calculate `DropletUtils::barcodeRanks`, create barcode rank plots, and outout single PDF output.
#'
#' @param dir_path_h5 path to parent directory (if `multi_directory = TRUE`) or directory containing
#' all h5 files (if `multi_directory = FALSE`).
#' @param multi_directory logical, whether or not all h5 files are in their own subdirectories or in a
#' single directory (default is TRUE; each in own subdirectory (e.g. output from Cell Ranger)).
#' @param h5_filename Either the file name of h5 file (if `multi_directory = TRUE`) or the shared
#' suffix (if `multi_directory = FALSE`)
#' @param cellranger_multi logical, whether the outputs to be read are from Cell Ranger `multi` as opposed
#' to Cell Ranger `count` (default is FALSE).  Only valid if `multi_directory = FALSE`.
#' @param parallel logical, should files be read in parallel (default is FALSE).
#' @param num_cores Number of cores to use in parallel if `parallel = TRUE`.
#' @param file_path file path to use for saving PDF output.
#' @param file_name Name of PDF output file.
#' @param pt.size point size for plotting, default is 6.
#' @param raster_dpi Pixel resolution for rasterized plots, passed to geom_scattermore().
#' Default is c(1024, 1024).
#' @param plateau numerical values at which to add vertical line designating estimated
#' empty droplet plateau (default is NULL).  Must be vector equal in length to number of samples.
#' @param ... Additional parameters passed to `Read10X_h5_Multi_Directory` or `Read10X_h5_GEO`.
#'
#' @return pdf document
#'
#' @import cli
#' @import ggplot2
#' @importFrom grDevices dev.off pdf
#' @importFrom pbapply pblapply pboptions
#' @importFrom utils txtProgressBar setTxtProgressBar
#'
#' @export
#'
#' @concept seq_qc_plotting_basic
#'
#' @examples
#' \dontrun{
#' Iterate_Barcode_Rank_Plot(dir_path_h5 = "H5_PATH/", multi_directory = TRUE,
#' h5_filename = "raw_feature_bc_matrix", parallel = TRUE, num_cores = 12, file_path = "OUTPUT_PATH",
#' file_name = "Barcode_Rank_Plots")
#' }
#'

Iterate_Barcode_Rank_Plot <- function(
    dir_path_h5,
    multi_directory = TRUE,
    h5_filename = "raw_feature_bc_matrix.h5",
    cellranger_multi = FALSE,
    parallel = FALSE,
    num_cores = NULL,
    file_path = NULL,
    file_name = NULL,
    pt.size = 6,
    raster_dpi = c(1024, 1024),
    plateau = NULL,
    ...
) {
  DropletUtils_check <- is_installed(pkg = "DropletUtils")
  if (!DropletUtils_check[1]) {
    cli_abort(message = c(
      "Please install the {.val DropletUtils} package to use {.code Create_10X_H5}",
      "i" = "This can be accomplished with the following commands: ",
      "----------------------------------------",
      "{.field `install.packages({symbol$dquote_left}BiocManager{symbol$dquote_right})`}",
      "{.field `BiocManager::install({symbol$dquote_left}DropletUtils{symbol$dquote_right})`}",
      "----------------------------------------"
    ))
  }

  # Set file_path before path check if current dir specified as opposed to leaving set to NULL
  if (!is.null(x = file_path) && file_path == "") {
    file_path <- NULL
  }

  # Check file path is valid
  if (!is.null(x = file_path)) {
    if (!dir.exists(paths = file_path)) {
      cli_abort(message = "Provided {.code file_path}: {symbol$dquote_left}{.field {file_path}}{symbol$dquote_right} does not exist.")
    }
  }

  # Check if file name provided
  if (is.null(x = file_name)) {
    cli_abort(message = "No file name provided.  Please provide a file name using {.code file_name}.")
  }

  # Set file type for single pdf option
  file_type <- ".pdf"

  # Read in data
  if (multi_directory) {
    all_mat <- Read10X_h5_Multi_Directory(base_path = dir_path_h5, h5_filename = h5_filename, parallel = parallel, num_cores = num_cores, ...)
  } else {
    all_mat <- Read10X_h5_GEO(data_dir = dir_path_h5, parallel = parallel, num_cores = num_cores, shared_suffix = h5_filename, ...)
  }

  cli_inform(message = "{.field Calculating Barcode Rank Statistics}")
  pboptions(char = "=")
  barcode_ranks_list <- pblapply(1:length(x = all_mat), function(x) {
    br_file <- DropletUtils::barcodeRanks(m = all_mat[[x]])
  })

  sample_names <- names(x = all_mat)

  rm(all_mat)
  gc()

  num_samples <- length(x = barcode_ranks_list)

  if (!is.null(x = plateau) && length(x = plateau) != num_samples) {
    cli_abort(message = "The number of values for plateau ({.field {length(x = plateau)}}) must be equal to the number of samples ({.field {num_samples}}).")
  }

  # Single PDF option
  cli_inform(message = "{.field Generating plots}")
  pboptions(char = "=")
  all_plots <- pblapply(1:num_samples, function(j) {
    Barcode_Plot(br_out = barcode_ranks_list[[j]], pt.size = pt.size, plot_title = sample_names[j], raster_dpi = raster_dpi, plateau = plateau[j])
  })
  cli_inform(message = "{.field Saving plots to file}")
  # Save plots
  pdf(paste(file_path, file_name, file_type, sep=""))
  pb <- txtProgressBar(min = 0, max = length(all_plots), style = 3, file = stderr())
  for (i in 1:length(all_plots)) {
    print(all_plots[[i]])
    setTxtProgressBar(pb = pb, value = i)
  }
  close(con = pb)
  dev.off()
}
samuel-marsh/scCustomize documentation built on Dec. 20, 2024, 7:41 a.m.