R/plots.R

Defines functions all_resolutions_plots plot_qploidy_standardization plot_baf_hist plot_baf

Documented in all_resolutions_plots plot_baf plot_baf_hist plot_qploidy_standardization

globalVariables(c("Chr", "baf", "z", "ratio", "median", "window", "prop_het"))

#' Plot BAF
#'
#' This function generates a BAF (B-allele frequency) plot for visualizing
#' genomic data. It allows customization of dot size, expected and estimated
#' peaks, centromere positions, and area colors.
#'
#' @param data_sample A data.frame containing BAF and genomic position
#' information. Must include columns `Chr`, `Position`, and `sample`.
#' @param area_single Numeric value defining the area around the expected peak
#' to be considered.
#' @param ploidy Integer or vector specifying the expected ploidy. If a vector,
#' it must match the number of chromosomes in `data_sample`.
#' @param dot.size Numeric value for the size of the dots in the plot. Default
#' is 1.
#' @param add_estimated_peaks Logical. If TRUE, adds lines for estimated peaks.
#' Default is FALSE.
#' @param add_expected_peaks Logical. If TRUE, adds lines for expected peaks.
#' Default is FALSE.
#' @param colors Logical. If TRUE, adds area colors to the plot. Default is
#' FALSE.
#' @param centromeres Named vector defining centromere positions for each
#' chromosome. Names must match chromosome IDs in `data_sample`.
#' @param add_centromeres Logical. If TRUE, adds vertical lines at centromere
#' positions. Default is FALSE.
#' @param font_size Numeric value for the font size of plot labels. Default is
#' 12.
#' @return A ggplot object representing the BAF plot.
#'
#' @import ggplot2
#' @importFrom dplyr filter
#' @importFrom dplyr %>%
#' @export
plot_baf <- function(data_sample,
                     area_single,
                     ploidy,
                     dot.size = 1,
                     add_estimated_peaks = FALSE,
                     add_expected_peaks = FALSE,
                     centromeres = NULL,
                     add_centromeres = FALSE,
                     colors = FALSE,
                     font_size = 12) {
  if (add_centromeres) {
    # centromeres <- c("1 = 49130338, 5 = 49834357")
    if (length(centromeres) == 1 && any(grepl(",", centromeres))) {
      centromeres <- gsub("\ ", "", centromeres)
      centromeres <- unlist(strsplit(centromeres, ","))
      centromeres <- sapply(centromeres, function(x) strsplit(x, "="))
      centromeres_df <- data.frame(
        Chr = sapply(centromeres, "[[", 1),
        value = sapply(centromeres, "[[", 2)
      )
    } else {
      centromeres_df <- data.frame(Chr = names(centromeres), value = centromeres)
    }
  }

  if (add_estimated_peaks || add_expected_peaks || colors) {
    if (length(ploidy) == 1) {
      ploidy <- rep(ploidy, length(unique(data_sample$Chr)))
    } else if (length(ploidy) > 1 &&
      length(ploidy) < length(unique(data_sample$Chr))) {
      stop("Provide a ploidy for each chromosome.")
    }

    data_sample2 <- rets2 <- data.frame()
    for (z in seq_along(unique(data_sample$Chr))) {
      ymin <- seq(0, 1, 1 / ploidy[z]) - (area_single / (ploidy[z] * 2))
      ymax <- seq(0, 1, 1 / ploidy[z]) + (area_single / (ploidy[z] * 2))

      ymin[which(ymin < 0)] <- 0
      ymax[which(ymax > 1)] <- 1
      rets <- data.frame(ymin, ymax,
        xmax = Inf, xmin = -Inf,
        Chr = sort(unique(data_sample$Chr))[z]
      )

      data_chr <- data_sample[which(data_sample$Chr ==
        sort(unique(data_sample$Chr))[z]), ]
      idx_tot <- FALSE
      idx <- list()
      for (i in seq_len(nrow(rets))) {
        idx <- data_chr$sample >= rets$ymin[i] & data_chr$sample <= rets$ymax[i]
        idx_tot <- idx_tot | idx
      }

      data_chr$color <- NA
      data_chr$color[which(!idx_tot)] <- "red"
      data_chr$color[which(idx_tot)] <- "black"

      rets2 <- rbind(rets2, rets)
      data_sample2 <- rbind(data_sample2, data_chr)
    }
  } else {
    data_sample2 <- data_sample
  }

  p_baf <- data_sample2 %>% ggplot(aes(x = Position, y = sample)) +
    {
      if (colors) {
        geom_point(aes(color = color), alpha = 0.7, size = dot.size)
      } else {
        geom_point(alpha = 0.7, size = dot.size)
      }
    } +
    scale_color_manual(values = c("red", "black")) +
    facet_grid(~Chr, scales = "free_x") +
    theme_bw() +
    {
      if (add_expected_peaks) {
        geom_rect(
          data = rets2, inherit.aes = FALSE,
          aes(
            ymin = ymin, ymax = ymax,
            xmax = xmax, xmin = xmin,
            alpha = 0.001
          ),
          color = "red"
        )
      }
    } +
    ylab("BAF") +
    theme(
      axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
      legend.position = "none", text = element_text(size = font_size)
    ) +
    {
      if (add_centromeres) {
        geom_vline(
          data = centromeres_df,
          aes(xintercept = as.numeric(value)),
          color = "blue",
          linewidth = 0.8
        )
      }
    }
  return(p_baf)
}

#' Plot BAF Histogram
#'
#' This function generates a histogram of BAF (B-allele frequency) values. It
#' supports options for adding estimated and expected peaks, area colors, and
#' filtering homozygous calls.
#'
#' @param data_sample A data.frame containing BAF and genomic position
#' information. Must include columns `Chr`, `Position`, and `sample`.
#' @param area_single Numeric value defining the area around the expected peak
#' to be considered.
#' @param ploidy Integer or vector specifying the expected ploidy. If a vector,
#' it must match the number of chromosomes in `data_sample`.
#' @param colors Logical. If TRUE, adds area colors to the histogram. Default
#' is FALSE.
#' @param add_estimated_peaks Logical. If TRUE, adds lines for estimated peaks.
#' Default is TRUE.
#' @param add_expected_peaks Logical. If TRUE, adds lines for expected peaks.
#' Default is FALSE.
#' @param BAF_hist_overall Logical. If TRUE, plots the BAF histogram for the
#' entire genome. Default is FALSE.
#' @param ratio Logical. If TRUE, plots the raw ratio instead of BAF. Default
#' is FALSE.
#' @param rm_homozygous Logical. If TRUE, removes homozygous calls from the
#' histogram. Default is FALSE.
#' @param font_size Numeric value for the font size of plot labels. Default is
#' 12.
#' @return A ggplot object representing the BAF histogram.
#'
#' @import ggplot2
#' @importFrom dplyr filter
#' @export
plot_baf_hist <- function(data_sample,
                          area_single,
                          ploidy,
                          colors = FALSE,
                          add_estimated_peaks = TRUE,
                          add_expected_peaks = FALSE,
                          BAF_hist_overall = FALSE,
                          ratio = FALSE,
                          rm_homozygous = FALSE,
                          font_size = 12) {
  if ((add_estimated_peaks || add_expected_peaks || colors) &&
    !all(is.na(ploidy))) {
    if (length(ploidy) == 1) {
      ploidy <- rep(ploidy, length(unique(data_sample$Chr)))
    } else if (length(ploidy) != 1 &&
      length(ploidy) != length(unique(data_sample$Chr))) {
      stop("Provide a ploidy for each chromosome.")
    }

    if (BAF_hist_overall) {
      Chrs <- "all"
    } else {
      Chrs <- sort(unique(data_sample$Chr))
    }
    data_sample2 <- modes.df3 <- data.frame()
    for (z in seq_along(Chrs)) {
      ymin <- seq(0, 1, 1 / ploidy[z]) - (area_single / (ploidy[z] * 2))
      ymax <- seq(0, 1, 1 / ploidy[z]) + (area_single / (ploidy[z] * 2))

      ymin[which(ymin < 0)] <- 0
      ymax[which(ymax > 1)] <- 1
      rets <- data.frame(ymin, ymax, xmax = Inf, xmin = -Inf, Chr = Chrs[z])

      if (all(Chrs == "all")) {
        split_data <- data_sample
      } else {
        split_data <- data_sample[which(data_sample$Chr ==
          sort(unique(data_sample$Chr))[z]), ]
      }

      if (ratio) split_data$sample <- split_data$ratio

      idx_tot <- FALSE
      modes.df2 <- data.frame()
      for (i in seq_len(nrow(rets))) {
        idx <- split_data$sample >= rets$ymin[i] & split_data$sample <= rets$ymax[i]
        idx_tot <- idx_tot | idx

        estimated <- mode(split_data$sample[which(split_data$sample >
          rets$ymin[i] &
          split_data$sample <
            rets$ymax[i])])
        expected <- seq(0, 1, 1 / ploidy[z])[i]
        modes.df <- cbind(
          Chr = Chrs[z],
          pivot_longer(data.frame(estimated, expected),
            cols = 1:2
          )
        )
        modes.df2 <- rbind(modes.df2, modes.df)
      }

      modes.df2$alpha <- modes.df2$name
      modes.df2$alpha <- gsub("expected", 1, modes.df2$alpha)
      modes.df2$alpha <- as.numeric(gsub("estimated", 0.5, modes.df2$alpha))

      split_data$color <- NA
      split_data$color[which(!idx_tot)] <- "outside area"
      split_data$color[which(idx_tot)] <- "inside area"

      modes.df3 <- rbind(modes.df2, modes.df3)
      data_sample2 <- rbind(data_sample2, split_data)
    }
  } else {
    data_sample2 <- data_sample
    add_estimated_peaks <- add_expected_peaks <- colors <- FALSE
  }

  if (rm_homozygous) data_sample2 <- data_sample2 %>% filter(sample != 0 & sample != 1)
  if (ratio) data_sample2$sample <- data_sample2$ratio

  if (!BAF_hist_overall) {
    p_hist <- data_sample2 %>% ggplot(aes(x = sample)) +
      {
        if (colors) geom_histogram(aes(fill = color)) else geom_histogram()
      } +
      # scale_x_continuous(breaks = round(seq(0, 1, 1/ploidy),2)) +
      {
        if (add_estimated_peaks) {
          geom_vline(
            data = modes.df3[which(modes.df3$name == "estimated"), ],
            aes(
              xintercept = value,
              color = name,
              linetype = name,
              alpha = alpha
            ),
            linewidth = 0.8
          )
        }
      } +
      {
        if (add_expected_peaks) {
          geom_vline(
            data = modes.df3[which(modes.df3$name == "expected"), ],
            aes(
              xintercept = value,
              color = name,
              linetype = name,
              alpha = alpha
            ),
            linewidth = 0.8
          )
        }
      } +
      {
        if (colors) scale_fill_manual(values = c("red", "black"))
      } +
      {
        if (add_expected_peaks || add_estimated_peaks) {
          scale_color_manual(values = c("blue", "purple"))
        }
      } +
      scale_linetype_manual(values = c("dashed", "solid"), guide = "none") +
      scale_alpha(range = c(0.7, 1), guide = "none") +
      facet_grid(~Chr, scales = "free_x") +
      theme_bw() +
      xlab("BAF") +
      theme(
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
        legend.position = "bottom", text = element_text(size = font_size)
      ) +
      {
        if (add_expected_peaks || add_estimated_peaks) labs(color = "Peaks")
      } +
      {
        if (colors) labs(fill = "Area")
      }
  } else {
    if (rm_homozygous) data_sample <- data_sample %>% filter(sample != 0 & sample != 1)
    if (ratio) data_sample$sample <- data_sample$ratio

    p_hist_all <- data_sample %>% ggplot(aes(x = sample)) +
      geom_histogram() +
      {
        if (colors) geom_histogram(aes(fill = color)) else geom_histogram()
      } +
      # scale_x_continuous(breaks = round(seq(0, 1, 1/ploidy),2)) +
      {
        if (add_estimated_peaks) {
          geom_vline(
            data = modes.df3[which(modes.df3$name == "estimated"), ],
            aes(
              xintercept = value,
              color = name,
              linetype = name,
              alpha = alpha
            ),
            linewidth = 0.8
          )
        }
      } +
      {
        if (add_expected_peaks) {
          geom_vline(
            data = modes.df3[which(modes.df3$name == "expected"), ],
            aes(
              xintercept = value,
              color = name,
              linetype = name,
              alpha = alpha
            ),
            linewidth = 0.8
          )
        }
      } +
      {
        if (colors) scale_fill_manual(values = c("red", "black"))
      } +
      {
        if (add_expected_peaks || add_estimated_peaks) {
          scale_color_manual(values = c("blue", "purple"))
        }
      } +
      scale_linetype_manual(values = c("dashed", "solid"), guide = "none") +
      scale_alpha(range = c(0.7, 1), guide = "none") +
      theme_bw() +
      {
        if (ratio) xlab("ratio") else xlab("BAF")
      } +
      theme(
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
        legend.position = "bottom", text = element_text(size = font_size)
      ) +
      {
        if (add_expected_peaks || add_estimated_peaks) labs(color = "Peaks")
      } +
      {
        if (colors) labs(fill = "Area")
      }

    p_hist <- p_hist_all
  }

  return(p_hist)
}

#' Plot Method for Qploidy Standardization
#'
#' This function generates various plots for visualizing the results of Qploidy
#' standardization. It supports multiple plot types, including BAF, z-score,
#' and histograms.
#'
#' @param x An object of class `qploidy_standardization`.
#' @param sample Character string indicating the sample ID to plot.
#' @param chr Character or numeric vector specifying the chromosomes to plot.
#' Default is NULL (plots all chromosomes).
#' @param type Character vector defining the plot types. Options include:
#'   - "all": Generates all available plot types.
#'   - "het": Plots heterozygous locus counts across genomic windows.
#'   - "BAF": Plots B-allele frequency (BAF) for each chromosome.
#'   - "zscore": Plots z-scores for each chromosome.
#'   - "BAF_hist": Plots BAF histograms for each chromosome.
#'   - "BAF_hist_overall": Plots a BAF histogram for the entire genome.
#'   - "Ratio_hist_overall": Plots a histogram of raw ratios for the entire
#'   genome.
#'   - "ratio": Plots raw ratios for each chromosome.
#'   Default is "all".
#' @param area_single Numeric value defining the area around the expected peak
#' to be considered. Default is 0.75.
#' @param ploidy Integer specifying the expected ploidy. Default is 4.
#' @param dot.size Numeric value for the size of the dots in the plots. Default
#' is 1.
#' @param font_size Numeric value for the font size of plot labels. Default is
#' 12.
#' @param add_estimated_peaks Logical. If TRUE, adds lines for estimated peaks.
#' Default is FALSE.
#' @param add_expected_peaks Logical. If TRUE, adds lines for expected peaks.
#' Default is FALSE.
#' @param centromeres Named vector defining centromere positions for each
#' chromosome. Names must match chromosome IDs in `x`.
#' @param add_centromeres Logical. If TRUE, adds vertical lines at centromere
#' positions. Default is FALSE.
#' @param colors Logical. If TRUE, adds area colors to the plots. Default is
#' FALSE.
#' @param window_size Numeric value defining the genomic position window for
#' heterozygous locus counts. Default is 2000000.
#' @param het_interval Numeric value defining the interval to consider as
#' heterozygous. Default is 0.1.
#' @param rm_homozygous Logical. If TRUE, removes homozygous calls from BAF
#' histogram plots. Default is FALSE.
#' @param ... Additional plot parameters.
#' @return A ggarrange object containing the requested plots.
#'
#' @details The function supports the following plot types:
#'
#' - **all**: Generates all available plot types.
#' - **het**: Plots the proportion of heterozygous loci across genomic windows,
#' useful for identifying regions with high or low heterozygosity.
#' - **BAF**: Plots the B-allele frequency (BAF) for each chromosome, showing
#' the distribution of allele frequencies.
#' - **zscore**: Plots z-scores for each chromosome, which can help identify
#' outliers or regions with unusual data distributions.
#' - **BAF_hist**: Plots histograms of BAF values for each chromosome,
#' providing a summary of allele frequency distributions.
#' - **BAF_hist_overall**: Plots a single histogram of BAF values for the
#' entire genome, summarizing allele frequency distributions genome-wide.
#' - **Ratio_hist_overall**: Plots a histogram of raw ratios for the entire
#' genome, useful for visualizing overall ratio distributions.
#' - **ratio**: Plots raw ratios for each chromosome, showing the distribution
#' of observed ratios.
#'
#'
#' @importFrom ggpubr ggarrange annotate_figure text_grob
#' @import ggplot2
#' @importFrom dplyr filter group_by summarise
#' @importFrom tidyr pivot_wider
#' @export
plot_qploidy_standardization <- function(x,
                                         sample = NULL,
                                         chr = NULL,
                                         type = c(
                                           "all", "het", "BAF", "zscore",
                                           "BAF_hist", "ratio",
                                           "BAF_hist_overall",
                                           "Ratio_hist_overall"
                                         ),
                                         area_single = 0.75,
                                         ploidy = 4,
                                         dot.size = 1,
                                         font_size = 12,
                                         add_estimated_peaks = FALSE,
                                         add_expected_peaks = FALSE,
                                         centromeres = NULL,
                                         add_centromeres = FALSE,
                                         colors = FALSE,
                                         window_size = 2000000,
                                         het_interval = 0.1,
                                         rm_homozygous = FALSE, ...) {
  if (!inherits(x, "qploidy_standardization")) {
    stop("Object is not of class qploidy_standardization")
  }

  if (is.null(sample)) stop("Define sample ID")

  data_sample <- x$data[which(x$data$SampleName == sample), ]

  if (is.null(chr)) {
    chr <- sort(unique(data_sample$Chr))
  } else if (is.numeric(chr)) {
    chr <- sort(unique(data_sample$Chr))[chr]
  }

  if(!all(chr %in% unique(data_sample$Chr))) {
    stop("Chromosome not found in data.")
  }

  data_sample <- data_sample %>%
    filter(Chr %in% chr) %>%
    select(MarkerName, SampleName, Chr, Position, baf, z, ratio)

  data_sample$Position <- as.numeric(data_sample$Position)

  if (nrow(data_sample) == 0) stop("Sample or chromosome not found.")

  baf_sample <- data_sample %>% pivot_wider(
    names_from = SampleName,
    values_from = baf
  )
  zscore_sample <- data_sample %>% pivot_wider(
    names_from = SampleName,
    values_from = z
  )

  colnames(baf_sample)[ncol(baf_sample)] <- "sample"

  baf_point <- baf_hist <- p_z <- raw_ratio <- het_rate <- baf_hist_overall <-
    ratio_hist_overall <- NULL

  if (any(type == "all" | type == "BAF")) {
    baf_point <- plot_baf(baf_sample,
      area_single,
      ploidy,
      dot.size = dot.size,
      add_estimated_peaks,
      add_expected_peaks,
      centromeres,
      add_centromeres,
      colors,
      font_size = font_size
    )
  }

  if (add_centromeres) {
    # centromeres <- c("1 = 49130338, 5 = 49834357")
    if (length(centromeres) == 1 && any(grepl(",", centromeres))) {
      centromeres <- gsub("\ ", "", centromeres)
      centromeres <- unlist(strsplit(centromeres, ","))
      centromeres <- sapply(centromeres, function(x) strsplit(x, "="))
      centromeres_df <- data.frame(
        Chr = sapply(centromeres, "[[", 1),
        value = sapply(centromeres, "[[", 2)
      )
    } else {
      centromeres_df <- data.frame(Chr = names(centromeres), value = centromeres)
    }

    for (i in seq_along(centromeres)) {
      idx <- which(baf_sample$Chr == centromeres_df$Chr[i] &
        baf_sample$Position < centromeres_df$value[i])
      baf_sample$Chr[idx] <- paste0(baf_sample$Chr[idx], ".1")
      idx <- which(baf_sample$Chr == centromeres_df$Chr[i] &
        baf_sample$Position >= centromeres_df$value[i])
      baf_sample$Chr[idx] <- paste0(baf_sample$Chr[idx], ".2")
      if (any(type == "zscore")) {
        idx <- which(zscore_sample$Chr == centromeres_df$Chr[i] &
          zscore_sample$Position < centromeres_df$value[i])
        zscore_sample$Chr[idx] <- paste0(zscore_sample$Chr[idx], ".1")
        idx <- which(zscore_sample$Chr == centromeres_df$Chr[i] &
          zscore_sample$Position >= centromeres_df$value[i])
        zscore_sample$Chr[idx] <- paste0(zscore_sample$Chr[idx], ".2")
      }
    }
  }

  if (any(type == "all" | type == "BAF_hist")) {
    baf_hist <- plot_baf_hist(
      data_sample = baf_sample,
      area_single,
      ploidy,
      colors,
      add_estimated_peaks,
      add_expected_peaks,
      BAF_hist_overall = FALSE,
      rm_homozygous = rm_homozygous,
      font_size = font_size
    )
  }

  if (any(type == "all" | type == "BAF_hist_overall")) {
    baf_hist_overall <- plot_baf_hist(
      data_sample = baf_sample,
      area_single,
      ploidy,
      colors,
      add_estimated_peaks,
      add_expected_peaks,
      BAF_hist_overall = TRUE,
      rm_homozygous = rm_homozygous,
      font_size = font_size
    )
  }

  if (any(type == "all" | type == "Ratio_hist_overall")) {
    ratio_hist_overall <- plot_baf_hist(
      data_sample = baf_sample,
      area_single,
      ploidy,
      colors,
      add_estimated_peaks,
      add_expected_peaks,
      BAF_hist_overall = TRUE,
      ratio = TRUE,
      rm_homozygous = rm_homozygous,
      font_size = font_size
    )
  }

  if (any(type == "zscore")) {
    colnames(zscore_sample)[ncol(zscore_sample)] <- "z"
    p_z <- zscore_sample %>%
      ggplot(aes(x = Position, y = z)) +
      facet_grid(. ~ Chr, scales = "free") +
      geom_smooth(method = "gam") +
      theme_bw() +
      theme(
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
        text = element_text(size = font_size)
      ) +
      geom_hline(yintercept = median(zscore_sample$z), linetype = "dashed")
  }

  if (any(type == "ratio")) {
    raw_ratio <- data_sample %>% ggplot(aes(x = Position, y = ratio)) +
      geom_point(alpha = 0.7, size = dot.size) +
      facet_grid(. ~ Chr, scales = "free") +
      theme_bw() +
      theme(
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
        text = element_text(size = font_size)
      )
  }

  if (any(type == "het")) {
    data_sample$het_rate <- NA
    data_sample$het_rate[which(data_sample$ratio <= 0 + het_interval |
      data_sample$ratio >= 1 - het_interval)] <- 0
    data_sample$het_rate[which(data_sample$ratio > 0 + het_interval &
      data_sample$ratio < 1 - het_interval)] <- 1

    data_sample_lst <- split.data.frame(data_sample, data_sample$Chr)

    for (j in seq_along(data_sample_lst)) {
      data_sample_lst[[j]]$window <- 1
      if (length(unique(data_sample_lst[[j]]$Position)) == 1) next()
      intervals_start <- c(seq(
        1, max(data_sample_lst[[j]]$Position),
        window_size
      ))
      intervals_end <- c(seq(
        1, max(data_sample_lst[[j]]$Position),
        window_size
      ) - 1, max(data_sample_lst[[j]]$Position))
      intervals_end <- intervals_end[-1]
      for (i in seq_along(intervals_start)) {
        data_sample_lst[[j]]$window[which(data_sample_lst[[j]]$Position >=
          intervals_start[i] &
          data_sample_lst[[j]]$Position <=
            intervals_end[i])] <- intervals_start[i]
      }
    }

    data_sample <- do.call(rbind, data_sample_lst)

    het_rate <- data_sample %>%
      group_by(Chr, window) %>%
      summarise(prop_het = sum(het_rate, na.rm = TRUE) /
        length(which(het_rate == 1 | het_rate == 0))) %>%
      ggplot(aes(x = window, y = prop_het, color = prop_het)) +
      geom_line() +
      scale_color_gradient(low = "black", high = "red") +
      facet_grid(. ~ Chr, scales = "free") +
      theme_bw() +
      ylab("proportion of heterozygous loci") +
      xlab("Position") +
      theme(
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
        legend.position = "none", text = element_text(size = font_size)
      )
  }

  p_all <- list(
    het_rate, raw_ratio, baf_point, baf_hist, baf_hist_overall,
    ratio_hist_overall, p_z
  )

  rm <- which(sapply(p_all, is.null))
  if (length(rm) != 0) p_all <- p_all[-rm]

  p_result <- ggarrange(plotlist = p_all, ncol = 1)

  p_result <- annotate_figure(p_result, top = text_grob(sample,
    face = "bold",
    size = 14
  ))

  return(p_result)
}

#' Plot graphics for ploidy visual inspection for each resolution
#'
#' This function generates and saves plots for visual inspection of ploidy at
#' different resolutions: chromosome, chromosome-arm, and sample levels. It is
#' designed for parallelization purposes and supports customization of
#' centromere positions and chromosome selection.
#'
#' @param data_standardized An object of class `qploidy_standardization`
#' containing standardized data for ploidy analysis.
#' @param sample A character string specifying the sample name to be analyzed.
#' @param ploidy A numeric value indicating the expected ploidy of the sample.
#' This parameter is required.
#' @param centromeres A named vector with centromere positions (in base pairs)
#' for each chromosome. The names must match the chromosome IDs in the dataset.
#' This is used for chromosome-arm level resolution.
#' @param file_name A character string defining the output file path and name
#' prefix for the saved plots. The function appends resolution-specific
#' suffixes to this prefix. If NULL, plots are not saved to files.
#' @param chr A vector specifying the chromosomes to include in the analysis.
#' If NULL, all chromosomes are included.
#' @param types_chromosome A character vector defining the plot types for
#' chromosome-level resolution. Options include:
#'   - "het": Plots heterozygous locus counts.
#'   - "BAF": Plots B-allele frequency (BAF).
#'   - "zscore": Plots z-scores.
#'   - "BAF_hist": Plots BAF histograms for each chromosome.
#'   - "ratio": Plots raw ratios for each chromosome.
#'   Default is c("Ratio_hist", "BAF_hist", "zscore").
#' @param types_chromosome_arm A character vector defining the plot types for
#' chromosome-arm level resolution. Options include:
#'   - "het": Plots heterozygous locus counts.
#'   - "BAF": Plots B-allele frequency (BAF).
#'   - "zscore": Plots z-scores.
#'   - "BAF_hist": Plots BAF histograms for each chromosome arm.
#'   - "ratio": Plots raw ratios for each chromosome arm.
#'   Default is c("Ratio_hist", "BAF_hist", "zscore").
#' @param types_sample A character vector defining the plot types for
#' sample-level resolution. Options include:
#'   - "Ratio_hist_overall": Plots a histogram of raw ratios for the entire
#'   genome.
#'   - "BAF_hist_overall": Plots a BAF histogram for the entire genome.
#'   Default is c("Ratio_hist_overall", "BAF_hist_overall").
#' @return A list containing the generated plots for each resolution:
#' - `chromosome`: Plot for chromosome-level resolution.
#' - `chromosome_arm`: Plot for chromosome-arm level resolution (if centromeres
#' are provided).
#' - `sample`: Plot for sample-level resolution.
#' @details The function generates three types of plots:
#'
#' - **Chromosome-level resolution**: Plots raw ratio, BAF histograms,
#' z-scores, heterozygous locus counts, and BAF for each chromosome.
#' - **Chromosome-arm level resolution**: Similar to chromosome-level but
#' splits data by chromosome arms using centromere positions.
#' - **Sample-level resolution**: Combines all markers in the sample to
#' generate overall raw ratio and BAF histograms.
#'
#' The plots are saved as PNG files with the following suffixes:
#' - `_res:chromosome.png`
#' - `_res:chromosome_arm.png`
#' - `_res:sample.png`
#'
#' If `file_name` is NULL, the plots are not saved to files but are returned in
#' the output list.
#'
#' @import ggplot2
#' @export
all_resolutions_plots <- function(
    data_standardized,
    sample,
    ploidy,
    centromeres,
    types_chromosome = c("Ratio_hist", "BAF_hist", "zscore"),
    types_chromosome_arm = c("Ratio_hist", "BAF_hist", "zscore"),
    types_sample = c("Ratio_hist_overall", "BAF_hist_overall"),
    file_name = NULL,
    chr = NULL) {
  # Input checks
  if (!inherits(data_standardized, "qploidy_standardization")) {
    stop("The 'data_standardized' parameter must be of class 'qploidy_standardization'.")
  }

  if (!is.character(sample) || length(sample) != 1) {
    stop("The 'sample' parameter must be a single character string.")
  }

  if (!is.numeric(ploidy)) {
    stop("The 'ploidy' parameter must be a numeric value.")
  }

  if (!is.null(chr) && !is.vector(chr)) {
    stop("The 'chr' parameter must be a vector or NULL.")
  }

  p_list <- list()
  # Raw ratio and BAF histograms (chromosome level resolution)
  p <- plot_qploidy_standardization(
    x = data_standardized,
    sample = sample,
    type = types_chromosome,
    chr = chr,
    add_expected_peaks = TRUE,
    ploidy = ploidy
  )
  p_list[[1]] <- p
  if (!is.null(file_name)) ggsave(p, filename = paste0(file_name, "_res:chromosome.png"))

  # Raw ratio and BAF histograms combining all markers in the sample (chromosome-arm level resolution)
  if (!is.null(centromeres)) {
    if (!is.vector(centromeres) || is.null(names(centromeres))) {
      stop("The 'centromeres' parameter must be a named vector with chromosome IDs as names.")
    }

    ploidy_cent <- ploidy
    if(length(ploidy) > 1 && length(ploidy) < length(chr)) ploidy_cent <- rep(ploidy, 2)

    p <- plot_qploidy_standardization(
      x = data_standardized,
      sample = sample,
      type = types_chromosome_arm,
      chr = chr,
      ploidy = ploidy_cent,
      add_expected_peaks = TRUE,
      add_centromeres = TRUE,
      centromeres = centromeres
    )

    p_list[[2]] <- p
    if (!is.null(file_name)) ggsave(p, filename = paste0(file_name, "_res:chromosome_arm.png"))
  }

  # Raw ratio and BAF histograms combining all markers in the sample (sample level resolution)
  p <- plot_qploidy_standardization(
    x = data_standardized,
    sample = sample,
    type = types_sample,
    chr = chr,
    ploidy = ploidy,
    add_expected_peaks = TRUE
  )

  p_list[[3]] <- p
  if (!is.null(file_name)) ggsave(p, filename = paste0(file_name, "_res:sample.png"))

  names(p_list) <- c("chromosome", "chromosome_arm", "sample")
  return(p_list)
}

Try the Qploidy package in your browser

Any scripts or data that you put into this service are public.

Qploidy documentation built on June 8, 2025, 10 a.m.