#' check_resources(opt = opt, application = "WGS")
#'
#' JAMSalpha function
#' @export
check_resources <- function(opt = opt, applications_to_check = c("base", "reads", "assembly", "kraken", "host", "annotation", "16S", "blast")){
opt$abort <- FALSE
deplist <- list(base = c("pigz", "wget"), reads = c("sratoolkit", "trimmomatic"), host = c("bowtie2"), assembly = c("megahit", "spades", "samtools", "checkm", "metabat2"), annotation = c("prokka", "convert2bed", "bedtools"), kraken = "kraken2")
dependencies_to_check <- unname(unlist(deplist[applications_to_check]))
#Check for non-R software dependencies
missingdep <- FALSE
for (dep in dependencies_to_check) {
cmd <- dep
if (cmd == "sratoolkit") cmd = "fastq-dump"
if (cmd == "spades") cmd = "spades.py"
if (system(str_c("which ", cmd), ignore.stdout = TRUE)==1) { # not found
flog.info(str_c("You are missing ", dep))
missingdep <- TRUE
}
}
if (missingdep == TRUE) {
flog.info("Please install missing dependencies before using JAMSalpha. Aborting.")
opt$abort<-TRUE
}
if ("kraken" %in% applications_to_check){
#Check for kraken2 database
flog.info("Checking for presence of kraken2 database")
k2dbfiles <- list.files(opt$dbdir, recursive=TRUE, full.names=TRUE, include.dirs=FALSE, pattern="*.k2d")
if (length(k2dbfiles) != 3){
flog.info(paste0("A kraken2 database was not found in", opt$dbdir,". Check your database structure. Aborting."))
opt$abort <- TRUE
}
opt$krakendb <- paste0(unlist(strsplit(k2dbfiles[1], split="/"))[1:length(unlist(strsplit(k2dbfiles[1], split="/")))-1], collapse="/")
if (!(dir.exists(opt$krakendb))){
flog.info("JAMS kraken db directory not found. Aborting.")
opt$abort <- TRUE
} else {
flog.info(paste("JAMS kraken2 database found at", opt$krakendb))
krakendbsize <- file.size(file.path(opt$krakendb, "hash.k2d"))
if (is.na(krakendbsize)){
flog.info("Kraken2 database supplied seems to be corrupt or not valid. Check your database structure. Aborting.")
opt$abort <- TRUE
} else if (krakendbsize > opt$totmembytes) {
flog.info("It seems there is not enough RAM memory to use the kraken2 database supplied. Try again on a system with more RAM. Aborting.")
opt$abort <- TRUE
} else {
if (file.exists(file.path(opt$krakendb, "JAMSKdb.ver"))){
opt$JAMS_Kdb_Version <- as.character(read.table(file.path(opt$krakendb, "JAMSKdb.ver"), header=FALSE, quote=NULL)[1,1])
} else {
opt$JAMS_Kdb_Version <- "UNDETERMINED"
}
flog.info(paste("The JAMS kraken2 database supplied is version", opt$JAMS_Kdb_Version, "and has", round((krakendbsize/1000000000), 1), "Gb."))
}
}
flog.info("Checking for presence of CheckM database")
checkmfiles <- list.files(opt$dbdir, recursive=TRUE, full.names=TRUE, include.dirs=FALSE, pattern="taxon_marker_sets.tsv")
if (length(checkmfiles) > 0){
opt$CheckMdb <- paste0(unlist(strsplit(checkmfiles[1], split="/"))[1:length(unlist(strsplit(checkmfiles[1], split="/")))-1], collapse="/")
} else {
flog.warn("CheckM database not found. Will not evaluate genome completeness using CheckM")
#Ensure NULL if db is absent
opt$CheckMdb <- NULL
}
}
if ("blast" %in% applications_to_check){
#Check for blast databases
blastfiles <- list.files(opt$dbdir, recursive=TRUE, full.names=TRUE, include.dirs=TRUE, pattern="\\.[p|n]hr")
if (length(blastfiles) > 0){
opt$blastanalyses <- sapply(1:length(blastfiles), function (x) { unlist(strsplit(blastfiles[x], split="/"))[(length(unlist(strsplit(blastfiles[x], split="/"))) - 1)] })
opt$blastpath <- paste0(unlist(strsplit(blastfiles[1], split="/"))[1:(length(unlist(strsplit(blastfiles[1], split="/")))-2)], collapse = "/")
flog.info(paste("Found blast databases for the following blast analyses:", paste0(opt$blastanalyses, collapse = ", ")))
flog.info(paste("Blast databases are in:", opt$blastpath))
} else {
opt$blastanalyses <- NULL
flog.info("No BLAST databases found in the JAMSdb folder supplied.")
}
}
if ("host" %in% applications_to_check){
#Set host species to none if isolates or if using contigs.
if (opt$analysis == "isolate"){
opt$host <- "none"
}
if (!(is.null(opt$contigs))){
opt$host <- "none"
}
#Check for host bowtie indices
if (opt$host %in% c("mouse", "human")){
flog.info("Checking for the presence of pre-compiled host genome bowtie2 indices.")
#Set host prefix
if (opt$host == "mouse"){
opt$hostspecies <- "Mmusculus"
} else if (opt$host == "human"){
opt$hostspecies <- "Hsapiens"
}
indexfiles <- list.files(opt$dbdir, recursive=TRUE, full.names=TRUE, include.dirs=FALSE, pattern="*.bt2$")
opt$relindexfiles <- indexfiles[grep(opt$hostspecies, indexfiles)]
if (length(opt$relindexfiles) == 6){
opt$indexpath <- paste0(unlist(strsplit(opt$relindexfiles[1], split="/"))[1:length(unlist(strsplit(opt$relindexfiles[1], split="/")))-1], collapse="/")
flog.info(paste0("Pre-built bowtie2 indices for the ", opt$host, " genome were found at ", opt$indexpath))
} else {
flog.info(paste0("Pre-built bowtie2 indices for the ", opt$host, " genome were not found. Check your database structure. Aborting."))
opt$abort <- TRUE
}
} else if (opt$host == "none") {
flog.info("No need to eliminate sequencing reads coming from a host species.")
} else {
flog.info("Host species supplied is not human, mouse or none.")
if (file.exists(opt$host) == TRUE){
flog.info("Host species bowtie index has been supplied as a path.")
opt$relindexfiles <- list.files(opt$host, recursive=TRUE, full.names=TRUE, include.dirs=FALSE, pattern="*.bt2$")
#Need 6 index files
if (length(opt$relindexfiles) == 6){
opt$indexpath <- paste0(unlist(strsplit(opt$relindexfiles[1], split="/"))[1:length(unlist(strsplit(opt$relindexfiles[1], split="/")))-1], collapse="/")
indexfile1 <- unlist(strsplit(opt$relindexfiles[1], split="/"))[length(unlist(strsplit(opt$relindexfiles[1], split="/")))]
opt$host <- unlist(strsplit(indexfile1, split="\\."))[1]
opt$hostspecies <- opt$host
flog.info(paste0("Pre-built bowtie2 indices for the ", opt$host, " genome were found at ", opt$indexpath))
} else {
flog.info(paste0("Pre-built bowtie2 indices for the host genome were not found or look weird. Check the path supplied. Aborting."))
opt$abort <- TRUE
}
} else {
flog.info("Host species genome has been supplied as an NCBI species taxid or organism name. Checking Tax ID and best genome to download.")
flog.info("Trying as Tax ID first.")
host_assemblies <- get_genomes_NCBI(organisms = NULL, outputdir = NULL, species_taxid = opt$host, nobs = TRUE, fileformat = "fasta", simulate = TRUE)
host_assemblies <- subset(host_assemblies, genome_rep == "Full")
if (nrow(host_assemblies) == 0){
flog.info("Not Tax ID. Trying as organism_name.")
host_assemblies <- get_genomes_NCBI(organisms = NULL, outputdir = NULL, organism_name = opt$host, nobs = TRUE, fileformat = "fasta", simulate = TRUE)
}
#Only consider full genomes
host_assemblies <- subset(host_assemblies, genome_rep == "Full")
if (nrow(host_assemblies) > 0){
flog.info("Deteriming best host genome assembly to download.")
#Give priority to reference and then representative genomes, if available.
if (any(c("reference genome", "representative genome") %in% host_assemblies$refseq_category)){
refseqcats <- (c("reference genome", "representative genome") %in% host_assemblies$refseq_category)
host_assemblies <- subset(host_assemblies, refseq_category == c("reference genome", "representative genome")[min(which(refseqcats==TRUE))])
}
#Go with the most recent
host_assemblies <- host_assemblies[order(host_assemblies$seq_rel_date, decreasing=TRUE), ][1 , ]
opt$host_accession_number <- host_assemblies$assembly_accession
flog.info(paste("Will build index with", host_assemblies$organism_name, "genome under accession number", opt$host_accession_number, "of", host_assemblies$seq_rel_date))
opt$host <- paste(host_assemblies$organism_name, opt$host_accession_number, sep="_")
opt$hostspecies <- host_assemblies$organism_name
} else {
flog.info("Could not find the Tax ID or organism name to download. Aborting now.")
opt$abort <- TRUE
}
}
}
}
#If testing dependencies and leave is true then it is time to abort.
if(opt$testdependencies == TRUE){
opt$abort <- TRUE
}
return(opt)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.