R/eset_distribution.R

Defines functions eset_distribution

Documented in eset_distribution

#' Visualize Expression Set Distribution
#'
#' @description
#' Generates boxplots and density plots to analyze the distribution of
#' expression values in an expression set. Useful for quality control and
#' assessing data normalization.
#'
#' @param eset Expression matrix or data frame with genes in rows and samples
#'   in columns.
#' @param quantile Integer specifying the divisor for sampling columns.
#'   Default is 3 (samples 1/3 of columns).
#' @param log Logical indicating whether to perform log2 transformation.
#'   Default is `TRUE`.
#' @param project Optional output directory path for saving files. If `NULL`,
#'   no files are saved. Default is `NULL`.
#'
#' @return Invisibly returns `NULL`. If `project` is provided, saves PNG files to disk.
#'
#' @export
#'
#' @examples
#' # Simulate data
#' set.seed(123)
#' sim_eset <- matrix(rnorm(1000 * 10, mean = 5, sd = 2), 1000, 10)
#' rownames(sim_eset) <- paste0("Gene", 1:1000)
#' colnames(sim_eset) <- paste0("Sample", 1:10)
#'
#' # Run distribution plot
#' result <- eset_distribution(sim_eset)
eset_distribution <- function(eset, quantile = 3, log = TRUE, project = NULL) {
  if (is.null(eset)) return(NULL)
  if (!is.matrix(eset) && !is.data.frame(eset)) {
    cli::cli_abort("{.arg eset} must be a matrix or data frame")
  }
  if (nrow(eset) == 0 || ncol(eset) == 0) {
    cli::cli_abort("{.arg eset} must have at least one row and one column")
  }
  if (!is.numeric(quantile) || quantile <= 0) {
    cli::cli_abort("{.arg quantile} must be a positive number")
  }

  feas <- feature_manipulation(data = eset, feature = rownames(eset), is_matrix = TRUE)
  eset <- eset[rownames(eset) %in% feas, , drop = FALSE]

  n_samples <- ncol(eset)
  n_select <- max(1, round(n_samples / quantile))
  index <- sample(seq_len(n_samples), min(n_select, n_samples))
  eset1 <- eset[, index, drop = FALSE]

  if (log) {
    eset1 <- log2eset(eset1)
  }

  eset1 <- t(eset1)
  eset1 <- as.data.frame(eset1)
  eset1$Sample.Name <- rownames(eset1)

  rlang::check_installed("reshape2")
  eset.melt <- reshape2::melt(eset1, id.vars = "Sample.Name")
  colnames(eset.melt)[2:3] <- c("Symbol", "Intensity")

  if (!is.null(project)) {
    path <- creat_folder(project)
  }

  p <- ggplot2::ggplot(eset.melt, ggplot2::aes(x = .data$Sample.Name, y = .data$Intensity)) +
    ggplot2::geom_boxplot() +
    ggplot2::theme_bw() +
    ggplot2::theme(
      plot.title = ggplot2::element_text(size = 23),
      axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust = 1.0)
    ) +
    ggplot2::ggtitle(
      "Normalized signal intensity",
      paste0(
        "Patient No. is: ", n_samples, "; ",
        "Number of features is: ", nrow(eset), ";  ",
        "Maximum is: ", round(max(eset, na.rm = TRUE), 2)
      )
    ) +
    ggplot2::ylab("Intensity") +
    ggplot2::xlab("Sample") +
    ggplot2::theme(
      legend.position = "none",
      panel.grid.major = ggplot2::element_blank(),
      panel.grid.minor = ggplot2::element_blank(),
      plot.subtitle = ggplot2::element_text(size = 18, hjust = 0.1, face = "italic", color = "black")
    )

  p2 <- ggplot2::ggplot(eset.melt, ggplot2::aes(.data$Intensity, group = .data$Sample.Name)) +
    ggplot2::geom_density() +
    ggplot2::theme_bw() +
    ggplot2::theme(
      panel.grid.major = ggplot2::element_blank(),
      panel.grid.minor = ggplot2::element_blank()
    ) +
    ggplot2::ggtitle("Histogram of Log2 Expression") +
    ggplot2::ylab("Density") +
    ggplot2::xlab("Log2 Expression")

  if (!is.null(project)) {
    ggplot2::ggsave(
      filename = paste0("1-", project, "-boxplot.png"),
      plot = p,
      width = 15, height = 8,
      path = path$folder_name
    )

    ggplot2::ggsave(
      filename = paste0("2-", project, "-Densityplot.png"),
      plot = p2,
      width = 9, height = 6,
      path = path$folder_name
    )
  }

  invisible(NULL)
}

Try the IOBR package in your browser

Any scripts or data that you put into this service are public.

IOBR documentation built on May 30, 2026, 5:07 p.m.