R/batch_volcanos.R

Defines functions produce_single_volcano_batch validate_volcano_infos complete_volcano_infos batch_volcano

Documented in batch_volcano

#' Produce volcano plots in batch
#'
#' The goal of this function is to take a csv file or a \code{data.frame}
#' describing all the volcano plots to produce and launch them in batches.
#'
#' The table may contain the following columns, in no specific order:
#'   * id_plot: unique identifier for each volcano plot. Mandatory.
#'   * id_de: unique identifier for each DE analysis results. Mandatory.
#'   Default: \code{TRUE}.
#'   * y_axis: Column in the DE results to use for the y-axis. "padj" or
#'   "pvalue" Default: "padj"
#'   * p_threshold: Maximal pvalue or padj to be considered significative.
#'   Default: 0.05
#'   * fc_threshold: Minimal fold-change to be considered significative.
#'   Default: 1.5
#'   * title: The title of the volcano. Default: NA
#'   * show_signif_counts: show the number of up- and down-regulated genes?
#'   * show_signif_lines: show lines at the threshold for significance? "none",
#'   "vertical","horizontal" or "both". Default: "vertical"
#'   * show_signif_color: Show color for significant genes? Default: TRUE
#'   * col_up: Color of the up-regulated genes. Default: "#E73426"
#'   * col_down: Color of the down-regulated genes. Default: "#0020F5"
#'   * size: The point size. Default: 3
#'
#' @param volcano_infos A csv file or a \code{data.frame} describing the
#' volcanos to produce.
#' @param de_results A \code{list} of DE results where the name of each DE
#' corresponds to the id_de value. Or the \code{outdir} directory where
#' the csv files of the \code{batch_de} function where saved.
#' @param add_labels A vector of the symbols to show on the volcano plot. If
#' NULL, no symbols will be shown. Default: NULL
#' @param outdir The directory where to save the volcano plots in pdf format.
#' If \code{NULL}, the results won't be saved as pdf. The files will be saved
#' as <outdir>/<id_plot>.pdf. Default: \code{NULL}.
#' @param r_objects The directory where to save volcano plots in rds format. If
#' \code{NULL}, the volcano plots won't be saved as rds files. The files will
#' be saved as <r_objects>/<id_plot>.rds. Default: \code{NULL}.
#' @param force Should the files be re-created if they already exists? Default:
#' \code{FALSE}.
#' @param cores Number of cores for the volcano creation. Default: 1
#'
#' @return Invisibly returns a \code{list} of all the volcano plots.
#'
#' @examples
#' \dontrun{
#' # First launch batch_de
#' de_infos <- get_demo_de_infos_file()
#' txi <- get_demo_txi()
#' design <- get_demo_design()
#' de_list <- batch_de(de_infos, txi, design)
#' # Then do the volcano_batch
#' volcano_infos <- get_demo_volcano_infos_file()
#' volcano_list <- batch_volcano(volcano_infos, de_list)
#' }
#'
#' @importFrom readr read_csv
#' @importFrom purrr map
#'
#' @importFrom parallel mclapply
#' @importFrom tibble rownames_to_column
#' @importFrom dplyr full_join
#' @importFrom dplyr select
#' @importFrom dplyr everything
#' @importFrom readr write_csv
#'
#' @export
batch_volcano <- function(volcano_infos, de_results, add_labels = NULL,
                          outdir = NULL, r_objects = NULL, force = FALSE,
                          cores = 1) {

    # 1. Data validation
    ## volcano_infos
    stopifnot(is(volcano_infos, "data.frame") | is(volcano_infos, "character"))
    if (is.character(volcano_infos)) {
        stopifnot(file.exists(volcano_infos))
        volcano_infos <- readr::read_csv(volcano_infos, show_col_types = FALSE)
    }
    stopifnot(nrow(volcano_infos) > 0)
    stopifnot("id_plot" %in% colnames(volcano_infos))
    stopifnot("id_de" %in% colnames(volcano_infos))
    stopifnot(!any(is.na(volcano_infos$id_plot)))
    stopifnot(!any(is.na(volcano_infos$id_de)))
    stopifnot(!any(duplicated(volcano_infos$id_plot)))

    ## Directories
    if (!is.null(outdir)) {
        stopifnot(is(outdir, "character"))
        if (!dir.exists(outdir)) {
            dir.create(outdir)
        }
    }
    if (!is.null(r_objects)) {
        stopifnot(is(r_objects, "character"))
        if (!dir.exists(r_objects)) {
            dir.create(r_objects)
        }
    }

    ## add_labels
    if (!is.null(add_labels)) {
        stopifnot(is(add_labels, "character"))
        stopifnot(all(nchar(add_labels) > 0))
    }

    ## force
    stopifnot(is(force, "logical"))
    stopifnot(!is.na(force))

    ## cores
    stopifnot(is(cores, "numeric"))
    stopifnot(cores == round(cores))
    stopifnot(cores > 0)

    ## de_results
    stopifnot(is(de_results, "list") | is(de_results, "character"))
    if (is.character(de_results)) {
        unique_de_ids <- unique(volcano_infos$id_de)
        de_files <- paste0(de_results, "/", unique_de_ids, ".csv")
        names(de_files) <- unique_de_ids
        stopifnot(all(file.exists(de_files)))
        de_results <- purrr::map(de_files, readr::read_csv,
                                 show_col_types = FALSE)
    }

    # Complete volcano infos
    volcano_infos <- complete_volcano_infos(volcano_infos)
    stopifnot(length(validate_volcano_infos(volcano_infos, de_results)) == 0)

    # Produce the volcano plots
    res <- list()
    volcano_analysis <- function(i) {
        current_id <- volcano_infos$id_plot[i]
        output_pdf <- paste0(outdir, "/", current_id, ".pdf")
        output_rds <- paste0(r_objects, "/", current_id, ".rds")

        if (!force & !is.null(r_objects) & file.exists(output_rds)) {
            current_volcano <- readRDS(output_rds)
        } else {
            current_infos <- volcano_infos[i,,drop=FALSE]
            current_volcano <- produce_single_volcano_batch(current_infos,
                                                            de_results,
                                                            add_labels)
        }
        if (!is.null(outdir)) {
            if (!file.exists(output_pdf) | force) {
                pdf(output_pdf)
                print(current_volcano$p)
                dev.off()
            }
        }
        if (!is.null(r_objects)) {
            if (!file.exists(output_rds) | force) {
                saveRDS(current_volcano$p, output_rds)
            }
        }
        res[[current_id]] <- current_volcano$p
    }
    res_volcano <- parallel::mclapply(1:nrow(volcano_infos), volcano_analysis, mc.cores = cores)
    names(res_volcano) <- volcano_infos$id_plot
    invisible(res_volcano)
}

complete_volcano_infos <- function(volcano_infos) {
    # Fill missing
    if (!"y_axis" %in% colnames(volcano_infos))
        volcano_infos[["y_axis"]] <- "padj"
    if (!"p_threshold" %in% colnames(volcano_infos))
        volcano_infos[["p_threshold"]] <- 0.05
    if (!"fc_threshold" %in% colnames(volcano_infos))
        volcano_infos[["fc_threshold"]] <- 1.5
    if (!"show_signif_counts" %in% colnames(volcano_infos))
        volcano_infos[["show_signif_counts"]] <- TRUE
    if (!"show_signif_lines" %in% colnames(volcano_infos))
        volcano_infos[["show_signif_lines"]] <- "vertical"
    if (!"show_signif_color" %in% colnames(volcano_infos))
        volcano_infos[["show_signif_color"]] <- TRUE
    if (!"col_up" %in% colnames(volcano_infos))
        volcano_infos[["col_up"]] <- "#E73426"
    if (!"col_down" %in% colnames(volcano_infos))
        volcano_infos[["col_down"]] <- "#0020F5"
    if (!"size" %in% colnames(volcano_infos))
        volcano_infos$size <- 3
    if (!"title" %in% colnames(volcano_infos))
        volcano_infos$title <- NA
    volcano_infos
}

validate_volcano_infos <- function(volcano_infos, design, txi) {
    errors <- list()
    for (i in seq_along(volcano_infos$id_de)) {
        current_id  <- volcano_infos$id_de[i]

        # y_axis checks
        current_y_axis <- volcano_infos$y_axis[i]

        if (!is(current_y_axis, "character")) {
            msg <- "y_axis value should be in character format"
            errors[[current_id]] <- c(errors[[current_id]], msg)
        } else {
            if (!current_y_axis %in% c("pvalue", "padj")) {
                msg <- 'Invalid y_axis value. Should be "pvalue" or "padj".'
                errors[[current_id]] <- c(errors[[current_id]], msg)
            }
        }

        # p_threshold checks
        current_p_threshold <- volcano_infos$p_threshold[i]
        if (!is(current_p_threshold, "numeric")) {
            msg <- "p_threshold value should be in numeric format"
            errors[[current_id]] <- c(errors[[current_id]], msg)
        } else {
            if (current_p_threshold < 0 | current_p_threshold > 1) {
                msg <- "p_threshold value should be between 0 and 1"
                errors[[current_id]] <- c(errors[[current_id]], msg)
            }
        }

        # fc_threshold checks
        current_fc_threshold <- volcano_infos$fc_threshold[i]
        if (!is(current_fc_threshold, "numeric")) {
            msg <- "fc_threshold value should be in numeric format"
            errors[[current_id]] <- c(errors[[current_id]], msg)
        } else {
            if (current_fc_threshold <= 0) {
                msg <- "fc_threshold value should be greater or equal to 0"
                errors[[current_id]] <- c(errors[[current_id]], msg)
            }
        }

        # title checks
        current_title <- volcano_infos$title[i]
        if (!is.na(current_title)) {
            if (!is(current_title, "character")) {
                msg <- "title must be a character"
                errors[[current_id]] <- c(errors[[current_id]], msg)
            }
        }

        # show_signif_counts checks
        current_show_signif_counts <- volcano_infos$show_signif_counts[i]
        if (!is(current_show_signif_counts, "logical")) {
            msg <- "show_signif_counts value should be in logical format"
            errors[[current_id]] <- c(errors[[current_id]], msg)
        }

        # show_signif_lines checks
        current_show_signif_lines <- volcano_infos$show_signif_lines[i]
        if (!is(current_show_signif_lines, "character")) {
            msg <- "show_signif_lines value should be in character format"
            errors[[current_id]] <- c(errors[[current_id]], msg)
        } else {
            expected_values <- c("none", "both", "vertical", "horizontal")
            if (!current_show_signif_lines %in% expected_values) {
                msg <- "show_signif_lines value show be one of: "
                msg <- paste0(msg, '"none", "both", "vertical", "horizontal"')
                errors[[current_id]] <- c(errors[[current_id]], msg)
            }
        }

        # show_signif_color checks
        current_show_signif_color <- volcano_infos$show_signif_color[i]
        if (!is(current_show_signif_color, "logical")) {
            msg <- "show_signif_color value should be in logical format"
            errors[[current_id]] <- c(errors[[current_id]], msg)
        }

        # color checks
        ## col_up checks
        current_col_up <- volcano_infos$col_up[i]
        if (!is(current_col_up, "character")) {
            msg <- "col_up value should be in character format"
            errors[[current_id]] <- c(errors[[current_id]], msg)
        } else {
            if (!is_color(current_col_up)) {
                msg <- "col_up value is not a valid color"
                errors[[current_id]] <- c(errors[[current_id]], msg)
            }
        }

        ## col_down checks
        current_col_down <- volcano_infos$col_down[i]
        if (!is(current_col_down, "character")) {
            msg <- "col_down value should be in character format"
            errors[[current_id]] <- c(errors[[current_id]], msg)
        } else {
            if (!is_color(current_col_down)) {
                msg <- "col_down value is not a valid color"
                errors[[current_id]] <- c(errors[[current_id]], msg)
            }
        }

        # size checks
        current_size <- volcano_infos$size[i]

        if (!is.na(current_size)) {
            if (!is(current_size, "numeric")) {
                msg <- "size must be a numeric value"
                errors[[current_id]] <- c(errors[[current_id]], msg)
            } else {
                if (!identical(current_size, round(current_size))) {
                    msg <- "size must be an integer"
                    errors[[current_id]] <- c(errors[[current_id]], msg)
                }
                if (current_size <= 0) {
                    msg <- "size must be greater than 0"
                    errors[[current_id]] <- c(errors[[current_id]], msg)
                }
            }
        } else {
            msg <- "size must not be NA"
            errors[[current_id]] <- c(errors[[current_id]], msg)
        }
    }

    if (length(errors) > 0) {
        for (i in seq_along(errors)) {
            message(paste0(names(errors)[i], ":"))
            for (j in seq_along(errors[[i]])) {
                message(paste0("    ", errors[[i]][j]))
            }
        }
    }
    errors
}

produce_single_volcano_batch <- function(current_volcano_info, de_results, add_labels) {
    cvi <- current_volcano_info
    de_res <- de_results[[cvi$id_de]]

    # TODO: add_labels
    produce_volcano(de_res,
                    fc_threshold = cvi$fc_threshold,
                    p_threshold = cvi$p_threshold,
                    y_axis = cvi$y_axis,
                    show_signif_counts = cvi$show_signif_counts,
                    show_signif_lines = cvi$show_signif_lines,
                    show_signif_color = cvi$show_signif_color,
                    col_up = cvi$col_up,
                    col_down = cvi$col_down,
                    size = cvi$size,
                    title = cvi$title,
                    graph = FALSE)

    #add faceting, modify

}
CharlesJB/rnaseq documentation built on Oct. 17, 2023, 5:37 p.m.