#' SAM files filtered and converted to BAM format
#'
#' Take a list of SAM files, unknown, mitochondrial, and "random" chromosomes, and save each to BAM
#' Needs Rsamtools.
#' @param filenames Character - List of SAM files
#' @param fileFilteredSuffix String -
#' @param fileBamSuffix String -
#' @param outDest String -
#' @param filterVerbose Logical -
#' @details # Filter out mitochondrial, unmapped, and "random" chromosome reads from sam files generated by mapping with STAR.
#' Convert the sam file to bam, sort the bam, and create an index file for it..
#' This script operates on sam files generated by mapping with STAR.
#' Not bam, because Erin found that "random" chromosome reads weren't getting removed from bams along with the mito and unknown ones.
#' But maybe I should try that again now that we're doing three separate commands; maybe that was the problem somehow.
#' TIME:
#' On yasuimac/Smintheus:
#' 3.5 G sam file - 4m/5.5m filtering, 5.5m/6.6m asBam()
#' 5 G sam file - 5.8m filtering, 7m asBam()
#' 18 G sam file - Xm/26.6m filtering, Xm/33.5m asBam()
#' Here are the equivalent commands at the command line:
#' Filter sam file (took maybe 3-4m for a 4.6G file, and cut it down to 3.6G):
#' sed '/chrM/d;/random/d;/chrUn/d' samplename_mapped_Aligned.out.sam > samplename_mapped_Aligned.out.filter.sam
#' Convert to bam:
#' samtools view -b samplename_mapped_Aligned.out.filter.sam > samplename_mapped_Aligned.out.filter.bam
#' Sort the bam file (it will create a bunch of temporary files while it's working, and then merge them):
#' samtools sort samplename_mapped_Aligned.out.filter.bam samplename_mapped_Aligned.out.filter.sorted -@ 4
#' Index the bam file:
#' samtools index samplename_mapped_Aligned.out.filter.sorted.bam
#' @examples
#' fn = list.files(pattern='.sam')
#' sam_to_bam(fn, outDest='/path/to/filtered/sams')
#' @author Emma Myers
#' @export
sam_to_bam = function(filenames, fileFilteredSuffix=".filtered.sam", fileBamSuffix=".filtered",
outDest="./", filterVerbose=FALSE) {
# Check that destination directory exists
outDest = dir_check(outDest)
for (f in filenames) {
writeLines("\n")
# Make sure file exists and has extension .sam
if ( ! file_checks(f, extension = ".sam", verbose = TRUE) ) { next }
flush.console()
writeLines(paste("Processing file:", f))
flush.console()
###########################################################################
# Remove mitochondrial, unmapped, and random chromosomes with sam_filter().
###########################################################################
# Define name of filtered file and check that it doesn't already exist
fileFiltered = sub(".sam", fileFilteredSuffix, basename(f))
fileFilteredFull = paste(outDest, fileFiltered, sep="")
if ( file_checks(fileFilteredFull, shouldExist=FALSE, verbose=TRUE) ){
writeLines("Filtering...", sep="")
flush.console()
# Announce output file name
writeLines(paste("file will be saved as ", fileFilteredFull), sep="")
flush.console()
# Call and time sam_filter()
tStartFilter = proc.time()[3]
sam_filter(f, fileFilteredFull, verbose = filterVerbose)
tElapsedFilter = proc.time()[3] - tStartFilter
# Announce you're done
writeLines(paste("...done (", round(tElapsedFilter/60, digits=2), "m).", sep=""))
flush.console()
} else { writeLines("Not filtering.") }
###############################################################################
# Convert to sam to bam, sort, and generate index file. All donw with asBam().
###############################################################################
# Define name of bam file and check that it doesn't already exist
# asBam tacks ".bam" onto the end, so our "full" filename doesn't actually include it
fileBamFull = paste(outDest, sub(fileFilteredSuffix, fileBamSuffix, basename(fileFiltered)), sep="")
if ( file_checks(paste(fileBamFull, ".bam", sep=""), shouldExist=FALSE, verbose=TRUE) ) {
writeLines("Converting to bam, sorting, and generating index file...", sep="")
flush.console()
# Announce output file names
flush.console(); writeLines(paste("file will be saved as ", fileBamFull, sep=""), sep="")
flush.console()
# Call and time asBam()
tStartBam = proc.time()[3]
# asBam() automatically adds .bam to the end of the file, so get rid of that if it's there
Rsamtools::asBam(fileFilteredFull, destination = fileBamFull)
tElapsedBam = proc.time()[3] - tStartBam
# Announce you're done
writeLines(paste("...done (", round(tElapsedBam/60, digits=2), "m).", sep=""))
flush.console()
} else { writeLines("Not converting to bam, sorting, and generating index file.") }
writeLines("Done with file. Don\'t be concerned by any warning messages concerning merging files; asBam() does that and it\'s fine.")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.