#' Run the Bedtools program
#'
#' @description Runs the bedtools program
#'
#' @param command Bedtools command to run, at present can only choose from 'coverage' or 'bamtobed'or "getfasta", required
#' @param input List of aligned files in bam format, required
#' @param reference GTF/GFF3 file for calculating the depth of coverage intervals
#' @param out.dir Name of the directory from the Bedtools output
#' @param sample.names List of sample names, required
#' @param counts Only report the count of overlaps, default set to FALSE
#' @param name Use the name field and coordinates for the FASTA header
#' @param strand Force strandedness
#' @param parallel Run in parallel, default set to FALSE
#' @param cores Number of cores/threads to use for parallel processing, default set to 4
#' @param execute Whether to execute the commands or not, default set to TRUE
#' @param bedtools Path to the bedtools program, required
#' @param version Returns the version number
#'
#' @examples
#' \dontrun{
#' # Version
#' bedtools.path <- "/software/bedtools2/bin/bedtools"
#' bedtools.version <- run_bedtools(bedtools = bedtools.path,
#' version = TRUE)
#'
#' # Bedtools coverage
#' command = "coverage"
#' outputDirectory <- "coverage"
#' sam.files <- list.files(path = alignments.path, pattern = "sam$",
#' full.names = TRUE,
#' recursive = TRUE)
#' sorted.bam.files <- gsub(sam.files,
#' pattern = ".sam",
#' replacement = "_sorted.bam")
#' sample_names <- unlist(lapply(strsplit(list.files(path = reads_path,
#' pattern = "*_R1_001.fastq$",
#' full.names = FALSE),"_"), `[[`, 1))
#' gtfFile <- "mirBase/hsa.ensembl.gff3"
#'
#' bedtools.cmds <- run_bedtools(command = command,
#' input = sorted.bam.files,
#' reference = gtfFile,
#' out.dir = outputDirectory,
#' sample.names = sample.names,
#' counts = TRUE,
#' bedtools = bedtools.path)
#' bedtools.cmds
#' }
#'
#' @return A list with the bedtools commands
#'
#' @export
#'
run_bedtools <- function(command = NULL,
input = NULL,
reference = NULL,
out.dir = NULL,
sample.names = NULL,
counts = NULL,
name = NULL,
strand = NULL,
parallel = FALSE,
cores = 4,
execute = TRUE,
bedtools = NULL,
version = FALSE){
# Check bedtools program can be found
sprintf("type -P %s &>//dev//null && echo 'Found' || echo 'Not Found'", bedtools)
# Version
if (isTRUE(version)){
bedtools.run <- sprintf('%s --version',
bedtools)
result <- system(bedtools.run, intern = TRUE)
return(result)
}
# Create the sample directories for the per sample bedtools results
lapply(paste(out.dir,sample.names, sep = "/"), function(cmd) dir.create(cmd, showWarnings = FALSE, recursive = TRUE))
# Set the additional arguments
args <- ""
# Counts
if (!is.null(counts)){
args <- paste(args,"-counts", sep = " ")
}
# Name
if (!is.null(name)){
args <- paste(args,"-name+", sep = " ")
}
# Strand
if (!is.null(strand)){
args <- paste(args,"-s", sep = " ")
}
if (command == "coverage"){
# Set the names and paths for the bedtools counts files
out.files <- paste(out.dir,sample.names,paste(sample.names,"txt",sep = "."),sep = "/")
bedtools.run <- sprintf('%s %s %s -a %s -b %s > %s',
bedtools,command,args,reference,input,out.files)
}
if (command == "bamtobed"){
# Set the names and paths for the bedtools bamtobed files
out.files <- paste(out.dir,sample.names,paste(sample.names,"bed",sep = "."),sep = "/")
bedtools.run <- sprintf('%s %s %s -i %s > %s',
bedtools,command,args,input,out.files)
}
if (command == "getfasta"){
# Set the names and paths for the bedtools bamtobed files
out.file <- paste(out.dir,paste(basename(reference),"fasta",sep ="."),sep = "/")
bedtools.run <- sprintf('%s %s %s -fi %s -bed %s > %s',
bedtools,command,args,input,reference,out.file)
}
if (isTRUE(execute)){
if (isTRUE(parallel)){
cluster <- makeCluster(cores)
parLapply(cluster, bedtools.run, function (cmd) system(cmd))
stopCluster(cluster)
}else{
lapply(bedtools.run, function (cmd) system(cmd))
}
}
return(bedtools.run)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.