#' Run the HRD Tests on Multiple Files
#'
#' This is a convenience function to run_test() on multiple files.
#' @param snv_file Character vector of paths to SNV files
#' @param output_file Path to an output file where a table of the results will be stored.
#' @param genome ID of the genome being used (default: 'hg19')
#' @param number_of_cores Specifies number of cores to run across. If NULL, then determines number of cores automatically. (default: NULL)
#' @param previous_output Path to output file previously generated by test_multiple. If provided, will only append new cases and not update existing ones. (default: NULL)
#' @importFrom snow setDefaultClusterOptions
#' @import snowfall
#' @import parallel
#' @import futile.logger
#' @export
test_multiple <- function(loh_files, output_file, genome='hg19', number_of_cores=NULL, previous_output=NULL, log_path=NULL) {
flog.threshold(TRACE)
if (!is.null(log_path)) {
tryCatch({
flog.appender(appender.file(log_path), name = 'HRDtools')
}, error = function(e) {
flog.appender(appender.console(), name = 'HRDtools')
})
} else {
flog.appender(appender.console(), name = 'HRDtools')
}
if (!is.null(previous_output)) {
if (file.exists(previous_output)) {
flog.info("Will append output to %s", previous_output)
previous_df <- read.table(previous_output, sep='\t', header=F, quote="", stringsAsFactors=F)
previous_samples <- previous_df[, 1]
flog.info('%s samples previously processed.', length(previous_samples))
loh_files <- loh_files[! loh_files %in% previous_samples]
if (length(loh_files) == 0) {
flog.info('No new samples to analyze.')
return(1)
} else {
flog.info('Analyzing %s new samples.', length(loh_files))
}
} else {
flog.warn("File %s does not exist. Building HRD scores from scratch.", previous_output)
previous_output <- NULL
}
}
number_of_cores <- as.numeric(number_of_cores)
if (number_of_cores > 1) {
if (is.null(number_of_cores)) {
number_of_cores <- determine_ncores(length(loh_files))
} else {
number_of_cores <- as.numeric(number_of_cores)
}
flog.info('Opening %s cores...', number_of_cores)
sfInit(parallel=T, cpus=number_of_cores)
flog.info('Loading libraries on clusters')
sfLibrary(hrdtools)
sfLibrary(plyr)
sfLibrary(GenomicRanges)
flog.info('Running calculations')
results = sfSapply(loh_files, function(loh_file) {
results <- run_test(loh_file, silent=TRUE)
return(c(loh_file, as.numeric(results$loh),
as.numeric(results$tai),
as.numeric(results$lst),
as.numeric(results$loh) + as.numeric(results$lst) + as.numeric(results$tai)))
})
sfStop()
} else {
results = sapply(loh_files, function(loh_file) {
flog.info(loh_file)
results <- run_test(loh_file, silent=TRUE)
return(c(loh_file, as.numeric(results$loh),
as.numeric(results$tai),
as.numeric(results$lst),
as.numeric(results$loh) + as.numeric(results$lst) + as.numeric(results$tai)))
})
}
results <- t(results)
if (!is.null(previous_output)) {
results <- rbind(previous_df, as.data.frame(results))
flog.info('Output appended to previous HRD scores.')
}
write.table(results, file=output_file, quote=FALSE, sep='\t', row.names=FALSE, col.names=FALSE)
}
#' Determines the number of cores automatically
#'
#' This function detects number of cores, N. It returns either the number of samples or N-1, whichever is less.
#' @param number_of_samples Number of samples to process
#' @import parallel
determine_ncores <- function(number_of_samples) {
maxCores <- detectCores() - 1
return(min(maxCores, number_of_samples))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.