R/blast_baits.R

Defines functions blast_baits

Documented in blast_baits

#' Blast baits against a database
#' 
#' Finds the number of matches for each bait generated by the do_baits function. Uses BLAST to match the baits against
#' either the same database, or a different one (if using the argument db). The blastn parameters can be adjusted using
#' the blastn_args argument. The following parameters are already set and should not be changed: -db, -query, -out and -outfmt.
#' 
#' @param x the output of the do_baits function
#' @param db OPTIONAL: A FASTA file against which the baits should be blased. If left empty, the database from which
#' 			the baits were created is used.
#' @param blastn_args Additional parameters that can be passed to the blastn call.
#' 
#' @return The same output as do_baits, but with an additional column in the baits showing the number of matches.
#' 
#' @export
#' 
blast_baits <- function(x, db = NULL, blastn_args = '') {

	if (any(names(x) != c("baits", "excluded.baits", "input.summary")) | is.null(attributes(x)$database))
		stop('Could not recognise x as the output of do_baits().', call. = FALSE)

	# create repository files to work with blastn
	aux <- tempfile()
	baits_file <- paste0(tempfile(), '.fasta')
	results_file <- paste0(tempfile(), '_blast_output.txt')

	# convert baits to single table to export to cpp
	baits <- data.table::rbindlist(x$baits)

	if (any(baits$bait_seq_size < 60))
		stop('blast_baits can only work with baits with 60 or more base pairs', call. = FALSE)

	baits <- baits[,c('bait_no', 'bait_seq')]

	# write baits to baits file in fasta format
	writeFasta(baits = baits, fasta_path = baits_file)

	# if no db, use the same as was used to create the baits
	db <- ifelse(is.null(db), attributes(x)$database, db)

	# transform db into a blast database
	make_blast_db(db)

	# blast the baits
	system(paste(find_executable('blastn'), "-db", db, "-query", baits_file, 
        "-out", results_file, "-outfmt \"10\"", blastn_args))

	# read the results
	blast_results <- importBlastResults(path = results_file)

	# merge them back with the do_baits output
	output <- lapply(x$baits, function(seq) {
		# link <- match(seq$bait_no, recipient$bait_no)
		seq$n_matches <- NULL
		merge(x = seq, y = blast_results, by = 'bait_no', all.x = TRUE, all.y = FALSE)
	})

	x$baits <- output

	return(x)
}


#' Adapted from rBLAST's .findExecutable function
#' 
#' @param exe a command to be found in the system
#' 
#' @return the full path to the command
#' 
#' @keywords internal
#' 
find_executable <- function (exe) {
    path <- Sys.which(exe)

    if (all(path == "")) {
        stop("Executable for ", paste(exe, collapse = " or "), 
            " not found! Please make sure that the software is correctly installed and, if necessary, path variables are set.", 
            call. = FALSE)
    }
    
    output <- path[which(path != "")[1]]
    return(output)
}


#' copied from rBLAST's makeblastdb function
#' 
#' @param file a FASTA file to be converted to a blast database
#' 
#' @return Nothing. Creates the database files in the same location as the input file
#' 
#' @keywords internal
#' 
make_blast_db <- function (file) {
    system(paste(find_executable("makeblastdb"), "-in", file, 
        "-dbtype nucl"))
}
BelenJM/supeRbaits documentation built on Jan. 28, 2022, 1:44 a.m.