R/plot_coverage.R

#' @title Plot coverage
#'
#' @description Plots a heatmap of coverage from a table generated with RADSex subset, RADSex signif, or RADSex loci.
#'
#' @param input_file_path Path to a coverage table generated by RADSex.
#'
#' @param popmap_file_path Path to a population map file to colorize males and females names (default NULL).
#'
#' @param width Width of the output file in inches (default 10).
#'
#' @param height Height of the output file in inches (default 8).
#'
#' @param dpi Resolution of the output file (default 300).
#'
#' @param title Plot title (default NULL).
#'
#' @param min.coverage Minimum coverage value to consider a sequence present in an individual: coverage lower than min.coverage will be set to 0 (default 0).
#'
#' @param max.coverage Maximum sequence coverage allowed in an individual: coverage higher than max.coverage will be set to max.coverage (default 150).
#'
#' @param distance.method Method to use to compute the distance matrix.
#' Possible values: "euclidean", "maximum", "manhattan", "canberra", "binary" or "minkowski". See \code{\link[stats]{dist}} for details (default "euclidean").
#'
#' @param clustering.method Method to use in the clustering step.
#' Possible values: "ward.D", "ward.D2", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC).
#' See \code{\link[stats]{hclust}} for details (default "ward.D").
#'
#' @param males.color If a popmap is specified, sets the color of male individual names on the horizontal axis (default "dodgerblue3").
#'
#' @param females.color If a popmap is specified, sets the color of female individual names on the horizontal axis (default "red3").
#'
#' @param coverage.palette Color palette for coverage. The value should be a vector of length 5 corresponding to the following intervals/values:
#' [0, 1 : mean, mean : 3rd quartile, 3rd quartile : (max - 1), max] (default c("white", "royalblue2", "black", "gold2", "red3"))
#'
#' @param individual.names If TRUE, shows individual names on the x-axis (default TRUE).
#'
#' @param sequence.names If TRUE, shows sequence names on the y-axis (default FALSE).
#'
#' @param individual.dendrogram If TRUE, shows individual clustering dendrogram on the x-axis (default TRUE).
#'
#' @param sequence.dendrogram If TRUE, shows sequence clustering dendrogram on the y-axis (default TRUE).
#'
#' @examples
#' plot_coverage("coverage_table.tsv", popmap_file_path = "popmap.tsv",
#'               width = 12, height = 10,
#'               title = "Individuals and sequences clustering based on coverage",
#'               min.coverage = 5,
#'               clustering.method = "ward.D2", distance.method = "binary",
#'               males.color = "blue", females.color = "red",
#'               coverage.palette = c("white", "green", "black", "red", "red3"),
#'               individual.names = TRUE, sequence.names = TRUE,
#'               individual.dendrogram = TRUE, sequence.dendrogram = TRUE)


plot_coverage <- function(input_file_path, output_file_path = NULL, popmap_file_path = NULL,
                          width = 10, height = 8, dpi = 300,
                          title = NULL,
                          min.coverage = 0, max.coverage = 150,
                          distance.method = "euclidean", clustering.method = "ward.D",
                          males.color = "dodgerblue3", females.color = "red3",
                          coverage.palette = c("white", "royalblue2", "black", "gold2", "red3"),
                          individual.names = TRUE, sequence.names = FALSE,
                          individual.dendrogram = TRUE, sequence.dendrogram = TRUE) {

    # Check if input file exists
    if (!file.exists(input_file_path)) {
        stop(paste0("The specified input file (", input_file_path, ") does not exist."))
    }

    # Load coverage data
    raw_data <- load_coverage_table(input_file_path)

    # Load popmap if specified
    if (!is.null(popmap_file_path)) {
        popmap <- load_popmap(popmap_file_path)
    } else {
        popmap <- NULL
    }

    # Cluster data
    data <- coverage_clustering(raw_data,
                                min.coverage = min.coverage,
                                max.coverage = max.coverage,
                                distance.method = distance.method,
                                clustering.method = clustering.method)

    # Generate the plot
    heatmap <- coverage_heatmap(data,
                                popmap = popmap,
                                title = title,
                                males.color = males.color,
                                females.color = females.color,
                                coverage.palette = coverage.palette,
                                individual.names = individual.names,
                                sequence.names = sequence.names,
                                individual.dendrogram = individual.dendrogram,
                                sequence.dendrogram = sequence.dendrogram)

    # Save the plot to output file if specified, otherwise display the plot
    if (!is.null(output_file_path)) {
        cowplot::ggsave(output_file_path, plot = heatmap, width = width, height = height, dpi = dpi)
    } else {
        grid::grid.newpage()
        grid::grid.draw(heatmap)
    }
    return(heatmap)
}
RomainFeron/radsex-vis documentation built on May 23, 2019, 2:48 p.m.