R/test_multiple.R

Defines functions test_multiple determine_ncores

Documented in test_multiple

#' Run the HRD Tests on Multiple Files
#' 
#' This is a convenience function to run_test() on multiple files.
#' @param snv_file Character vector of paths to SNV files
#' @param output_file Path to an output file where a table of the results will be stored.
#' @param genome ID of the genome being used (default: 'hg19')
#' @param number_of_cores Specifies number of cores to run across. If NULL, then determines number of cores automatically. (default: NULL)
#' @param previous_output Path to output file previously generated by test_multiple. If provided, will only append new cases and not update existing ones. (default: NULL)
#' @importFrom snow setDefaultClusterOptions
#' @import snowfall
#' @import parallel
#' @import futile.logger
#' @export

test_multiple <- function(loh_files, output_file, genome='hg19', number_of_cores=NULL, previous_output=NULL, log_path=NULL) {
    flog.threshold(TRACE)
    if (!is.null(log_path)) {
        tryCatch({
            flog.appender(appender.file(log_path), name = 'HRDtools')
        }, error = function(e) {
            flog.appender(appender.console(), name = 'HRDtools')
        })
    } else {
        flog.appender(appender.console(), name = 'HRDtools')
    }

    if (!is.null(previous_output)) {
        if (file.exists(previous_output)) {
            flog.info("Will append output to %s", previous_output)
            previous_df <- read.table(previous_output, sep='\t', header=F, quote="", stringsAsFactors=F)
            previous_samples <- previous_df[, 1]
            flog.info('%s samples previously processed.', length(previous_samples))
            loh_files <- loh_files[! loh_files %in% previous_samples]

            if (length(loh_files) == 0) {
                flog.info('No new samples to analyze.')
                return(1)
            } else {
                flog.info('Analyzing %s new samples.', length(loh_files))
            }
        } else {
            flog.warn("File %s does not exist. Building HRD scores from scratch.", previous_output)
            previous_output <- NULL
        }
    }

    number_of_cores <- as.numeric(number_of_cores)
    if (number_of_cores > 1) {
        if (is.null(number_of_cores)) {
            number_of_cores <- determine_ncores(length(loh_files))
        } else {
            number_of_cores <- as.numeric(number_of_cores)
        }

        flog.info('Opening %s cores...', number_of_cores)
        sfInit(parallel=T, cpus=number_of_cores)

        flog.info('Loading libraries on clusters')
        sfLibrary(hrdtools)
        sfLibrary(plyr)
        sfLibrary(GenomicRanges)

        flog.info('Running calculations')
        results = sfSapply(loh_files, function(loh_file) {
            results <- run_test(loh_file, silent=TRUE)

            return(c(loh_file, as.numeric(results$loh),
                     as.numeric(results$tai),
                     as.numeric(results$lst),
                     as.numeric(results$loh) + as.numeric(results$lst) + as.numeric(results$tai)))
        })
        sfStop()
    } else {
        results = sapply(loh_files, function(loh_file) {
            flog.info(loh_file)
            results <- run_test(loh_file, silent=TRUE)
 
            return(c(loh_file, as.numeric(results$loh),
                     as.numeric(results$tai),
                     as.numeric(results$lst),
                     as.numeric(results$loh) + as.numeric(results$lst) + as.numeric(results$tai)))
        })
    }

    results <- t(results)

    if (!is.null(previous_output)) {
        results <- rbind(previous_df, as.data.frame(results))
        flog.info('Output appended to previous HRD scores.')
    }

    write.table(results, file=output_file, quote=FALSE, sep='\t', row.names=FALSE, col.names=FALSE)
}


#' Determines the number of cores automatically
#'
#' This function detects number of cores, N. It returns either the number of samples or N-1, whichever is less.
#' @param number_of_samples Number of samples to process
#' @import parallel

determine_ncores <- function(number_of_samples) {
    maxCores <- detectCores() - 1
    return(min(maxCores, number_of_samples))
}
eyzhao/hrdtools documentation built on May 21, 2019, 3:09 a.m.