gbuehler-MiniChip: MiniChip

Documented in plotTracks

#' @title plotTracks
#'
#' @description Plot tracks for a given genomic region of interest (ROI).
#'
#' @details plotTracks is used to take the ChIP, repeat, and gene annotation data using \code{calcTracks} and plot tracks 
#' for a given region of interest (ROI).
#'
#' @param trackData A list of data frames generated with calcTracks.
#' @param bigwigNames Character vector containing the names to describe the \code{bigwigFiles} you are using (for example: "H3K9me3_replicate1"). 
#' @param color_map Named vector indicating the plotting color for each group of tracks.(for example: c("H3K9me3" = "red")).
#'
#' @return A plot of genomic tracks of the data in \code{bigwigFiles} and the repeat and gene annotation.
#'
#' @examples
#' #see vignette
#'
#' @importFrom dplyr group_by summarise filter pull
#' @importFrom ggplot2 ggplot geom_area facet_wrap scale_fill_manual scale_x_continuous scale_y_continuous annotate
#' @importFrom ggplot2 labs theme element_blank element_text element_rect margin arrow geom_text aes geom_segment
#' @importFrom cowplot theme_cowplot
#' @importFrom patchwork wrap_plots plot_layout
#' @importFrom stats na.omit
#' @importFrom S4Vectors mcols
#'
#' @export
plotTracks <- function(trackData, bigwigNames, color_map){ 
  # Input validation
  
  # Check if trackData is provided and is a list
  if (missing(trackData)) {
    stop("Error: 'trackData' argument is missing. Please provide data generated by calcTracks().")
  }
  if (!is.list(trackData)) {
    stop("Error: 'trackData' must be a list object generated by calcTracks().")
  }
  
  # Check if required components exist in trackData
  required_components <- c("combined_coverage", "reps_df", "genes_df", "exons_df")
  missing_components <- setdiff(required_components, names(trackData))
  if (length(missing_components) > 0) {
    stop(paste("Error: trackData is missing the following required components:", 
               paste(missing_components, collapse=", "), 
               ". Please ensure you're using data generated by calcTracks()."))
  }
  
  # Check if combined_coverage contains necessary columns
  required_columns <- c("index", "score", "group", "file_name", "seqnames")
  missing_columns <- setdiff(required_columns, colnames(trackData$combined_coverage))
  if (length(missing_columns) > 0) {
    stop(paste("Error: combined_coverage is missing required columns:", 
               paste(missing_columns, collapse=", ")))
  }
  
  # Check if bigwigNames is provided and valid
  if (missing(bigwigNames)) {
    stop("Error: 'bigwigNames' argument is missing. Please provide a character vector of names for your bigwig files.")
  }
  if (!is.character(bigwigNames)) {
    stop("Error: 'bigwigNames' must be a character vector.")
  }
  
  # Check if the bigwigNames match the file_name values in combined_coverage
  track_files <- unique(trackData$combined_coverage$file_name)
  if (!all(track_files %in% bigwigNames) || !all(bigwigNames %in% track_files)) {
    warning(paste("Warning: Mismatch between 'bigwigNames' and file names in trackData. ",
                  "This may cause incorrect labeling in plots. ",
                  "bigwigNames should exactly match the file names used in calcTracks()."))
  }
  
  # Check if color_map is provided and valid
  if (missing(color_map)) {
    stop("Error: 'color_map' argument is missing. Please provide a named vector of colors for each group.")
  }
  if (!is.vector(color_map) || is.null(names(color_map))) {
    stop("Error: 'color_map' must be a named vector, e.g., c('H3K9me3' = 'red').")
  }
  
  # Check if all groups in the data have corresponding colors
  groups_in_data <- unique(trackData$combined_coverage$group)
  missing_colors <- setdiff(groups_in_data, names(color_map))
  if (length(missing_colors) > 0) {
    stop(paste("Error: Missing colors for the following groups:", 
               paste(missing_colors, collapse=", "), 
               ". Please provide colors for all groups in your data."))
  }
  
  # Check if reps_df has required columns
  if (nrow(trackData$reps_df) > 0) {
    rep_required_cols <- c("start", "end", "ypos", "repeat_name")
    missing_rep_cols <- setdiff(rep_required_cols, colnames(trackData$reps_df))
    if (length(missing_rep_cols) > 0) {
      stop(paste("Error: reps_df is missing required columns:", 
                 paste(missing_rep_cols, collapse=", ")))
    }
  }
  
  # Check if genes_df has required columns
  if (nrow(trackData$genes_df) > 0) {
    gene_required_cols <- c("start", "end", "ypos", "gene_name")
    missing_gene_cols <- setdiff(gene_required_cols, colnames(trackData$genes_df))
    if (length(missing_gene_cols) > 0) {
      stop(paste("Error: genes_df is missing required columns:", 
                 paste(missing_gene_cols, collapse=", ")))
    }
  }
  
  # Check if exons_df has required columns
  if (nrow(trackData$exons_df) > 0) {
    exon_required_cols <- c("start", "end", "ypos")
    missing_exon_cols <- setdiff(exon_required_cols, colnames(trackData$exons_df))
    if (length(missing_exon_cols) > 0) {
      stop(paste("Error: exons_df is missing required columns:", 
                 paste(missing_exon_cols, collapse=", ")))
    }
  }
  
  # Check for empty data
  if (nrow(trackData$combined_coverage) == 0) {
    stop("Error: No coverage data found. Please check your input regions and bigwig files.")
  }
  
  # Check for NA values in critical columns
  if (any(is.na(trackData$combined_coverage$index)) || 
      any(is.na(trackData$combined_coverage$score)) || 
      any(is.na(trackData$combined_coverage$group))) {
    warning("Warning: NA values found in critical columns (index, score, or group). This may affect plot appearance.")
  }
  
  # Extract data
  combined_coverage <- trackData$combined_coverage
  reps_df <- trackData$reps_df
  genes_df <- trackData$genes_df
  exons_df <- trackData$exons_df
  
  # Determine x-axis limits (shared across all plots)
  x_limits <- range(combined_coverage$index)
  
  # Calculate maximum score per group
  group_max_scores <- combined_coverage %>%
    group_by(group) %>%
    summarise(max_score = max(na.omit(score)))
  
  #empty list for plots
  group_plots <- list()
  
  # Reorder the factor levels of the file_name column
  combined_coverage$file_name <- factor(combined_coverage$file_name, levels = bigwigNames)
  
  # Loop through each group to create individual group plots
  for (grp in unique(combined_coverage$group)) {
    # Filter data for the current group
    group_data <- combined_coverage %>% dplyr::filter(group == grp)
    
    # Get the maximum score for the group
    max_score <- group_max_scores %>% dplyr::filter(group == grp) %>% pull(max_score)
    
    # Initialize an empty vector for grpcolor
    grpcolor <- c()
    
    for (g in grp) {
      grpcolor <- c(grpcolor, rep(color_map[g], length(unique(group_data$file_name))))  
    }
    
    
    # Create a coverage plot for the group
    group_plot <- ggplot(group_data) +
      geom_area(aes(x = index, y = score, fill = group)) +
      facet_wrap(~ file_name, ncol = 1) +  # Facet for individual tracks
      scale_fill_manual(values = grpcolor) +
      scale_x_continuous(limits = x_limits) +
      scale_y_continuous(limits = c(0, max_score)) +  # Scale to group max
      labs(x = "Genomic Position", y = paste0("Cpm in ", grp)) +
      
      
      # Adjust themes
      theme_cowplot() +
      theme(
        panel.grid = element_blank(),  # Remove grid lines
        strip.text = element_text(size = 12, face = "bold"),
        legend.position = "none",
        panel.spacing = unit(0.5, "lines"),
        axis.text.y = element_text(size = 10),  
        axis.title.y = element_text(size = 12, angle = 90, vjust = 0.5),
        axis.text.x = element_blank(),  # Hide x-axis text
        axis.ticks.x = element_blank(),  # Hide x-axis ticks
        axis.title.x =  element_blank(),
        axis.line.x = element_blank(),
        strip.background = element_rect(fill = NA, colour = NA),
        plot.margin = margin(t = 10, b = 0, l = 10, r = 10)
      ) 
    
    # Add the group's plot to the list
    group_plots[[grp]] <- group_plot
  }
  
  # Create the repeat annotation plot (shared across all groups)
  annotation_plot <- ggplot() +
    scale_x_continuous(limits = x_limits) +
    scale_y_continuous(limits = c(-8,8)) +
    labs(x = unique(combined_coverage$seqnames), y = NULL) +
    theme_cowplot() +
    theme(
      axis.text.y = element_blank(),
      axis.ticks.y = element_blank(),
      axis.text.x = element_text(size = 12),
      panel.grid.major.y = element_blank(),
      panel.grid.minor = element_blank(),
      plot.margin = margin(t = 2, b = 10, l = 10, r = 10)
    )
  
  # Add repeats if data exists
  if (nrow(reps_df) > 0) {
    annotation_plot <- annotation_plot +
      geom_segment(data = reps_df, 
                   aes(x = start, xend = end, y = ypos, yend = ypos),
                   arrow = arrow(length = unit(0.1, "inches")),
                   lineend="butt", linejoin="mitre",
                   color = "black", size = 8) +
      geom_text(data = reps_df,
                aes(x = (start + end) / 2, y = ypos, label = repeat_name),
                color = "white", size = 4.5, hjust = 0.5)
  }
  
  # Create the gene annotation plot (shared across all groups)
  gene_annotation_plot <- ggplot() +
    scale_x_continuous(limits = x_limits) +
    scale_y_continuous(limits = c(-8,9)) +
    labs(x = unique(combined_coverage$seqnames), y = NULL) +
    
    # Add scale bar (200bp) 
    annotate("segment",
             x = max(x_limits) - 200, xend = max(x_limits),
             y = 7, yend =  7,
             size = 0.8, color = "black") +
    annotate("text",
             x = max(x_limits) - 100, y = 6,
             label = "200bp", size = 3, hjust = 0.5) +
    
    theme_cowplot() +
    theme(
      axis.text.y = element_blank(),
      axis.ticks.y = element_blank(),
      axis.text.x = element_text(size = 12),
      panel.grid.major.y = element_blank(),
      panel.grid.minor = element_blank(),
      plot.margin = margin(t = 2, b = 10, l = 10, r = 10)
    )
  
  # Add genes if data exists
  if (nrow(genes_df) > 0) {
    gene_annotation_plot <- gene_annotation_plot +
      geom_segment(data = genes_df, 
                   aes(x = start, xend = end, y = ypos, yend = ypos),
                   arrow = arrow(length = unit(0.05, "inches")),
                   lineend="butt", linejoin="mitre",
                   color = "black", size = 1) +
      geom_text(data = genes_df,
                aes(x = (start + end) / 2, y = ypos + 3, label = gene_name),
                color = "black", size = 4.5, hjust = 0.5)
  }
  
  # Add exons if data exists
  if (nrow(exons_df) > 0) {
    gene_annotation_plot <- gene_annotation_plot +
      geom_segment(data = exons_df, 
                   aes(x = start, xend = end, y = ypos, yend = ypos),
                   arrow = arrow(length = unit(0.1, "inches")),
                   lineend="butt", linejoin="mitre",
                   color = "black", size = 6)
  }
  
  # Check if there are any plots to combine
  if (length(group_plots) == 0) {
    stop("Error: No group plots could be created. Please check your data.")
  }
  
  # Combine all group plots and the annotation plot using patchwork
  tryCatch({
    final_plot <- wrap_plots(group_plots, ncol = 1) / annotation_plot + gene_annotation_plot +
      plot_layout(heights = c(rep(5, length(group_plots)), 2, 2))  # Adjust heights for better spacing
    
    # Display the final plot
    return(final_plot)
  }, error = function(e) {
    stop(paste("Error creating the final plot:", e$message, 
               "This might be due to incompatible data structures or missing required packages."))
  })
}