### -----------------------------------------------------------------
### sortIndexBam: sort and index a bam file
### Exported!!
sortIndexBam <- function(inputs, outputs,
maxMem="4096M", removeBam=TRUE,
mc.cores=getThreads(), binary="samtools"){
if(length(inputs) != length(outputs)){
stop("The number of input bam names differ from output bam names!")
}
for(i in 1:length(inputs)){
args <- c("sort",
paste("-m", maxMem),
paste("-@", mc.cores),
paste("-o", outputs[i]),
inputs[i])
system2(command=binary, args=args)
if(removeBam){
file.remove(inputs[i])
}
args <- c("index", outputs[i])
system2(command=binary, args=args)
}
invisible(outputs)
}
### -----------------------------------------------------------------
### adds the number of reads in reads1 as a comment to the header of bamFile
### Exported!
addInputReadCounts <- function(bamFiles, reads1s, binary="samtools"){
if(length(bamFiles) != length(reads1s)){
stop("The number of input bam names differ from input fastq names!")
}
for(i in 1:length(bamFiles)){
bamFile <- bamFiles[i]
reads1 <- reads1s[i]
# count the reads in the reads1 file
nReads <- countReadsInFastq(reads1)
# use sammtools view -H bamFile > header.txt
headerFile <- tempfile(fileext=".header")
args <- c("view -H", bamFile, ">", headerFile)
system2(command=binary, args=args)
# append header.txt with a line CO <TAB>INPUTREADCOUNT:NNNN
cat("@CO\tINPUTREADCOUNT:", format(nReads, scientific=FALSE), "\n",
sep="", file=headerFile, append=TRUE)
# use "samtools reheader header.txt bamFile > out.bam" to change the header
tempBam <- tempfile(fileext=".bam")
args <- c("reheader", headerFile, bamFile, ">", tempBam)
system2(command=binary, args=args)
file.remove(headerFile)
# Move the tempBam back
file.copy(tempBam, bamFile, overwrite=TRUE)
file.remove(tempBam)
# recreate the index file if exists
baiFile <- paste0(bamFile, ".bai")
if(file.exists(baiFile)){
indexBam(bamFile)
}
}
invisible(bamFiles)
}
### -----------------------------------------------------------------
### bam2bigwig: convert a bam file into bw file with coverage along the genome
### We load all the mapped reads: no matter it's paired/proper paired or not
### Exported!
bam2bigwig <- function(bamFiles,
bigwigFiles=sub("\\.bam$", ".bw", bamFiles,
ignore.case=TRUE)
){
if(length(bamFiles) != length(bigwigFiles)){
stop("The number of input bam names differs from output bigwig names!")
}
for(i in 1:length(bamFiles)){
job <- my.jobStart(paste("start", basename(bamFiles)))
gal1 <- readGAlignments(bamFiles[i])
cov1 <- coverage(gal1)
export.bw(cov1, bigwigFiles[i])
my.writeElapsed(job, status="finished writing bigwig")
rm(gal1, cov1)
}
invisible(bigwigFiles)
}
### -----------------------------------------------------------------
### getBamMultiMatching: get the numbers of multi-hits for bam file
### The input bam files have to be sorted and indexed.
### Exported!
getBamMultiMatching <- function(bamFile,
pairedMode=c("paired", "first", "second")){
pairedMode <- match.arg(pairedMode)
baiFile <- paste0(bamFile, ".bai")
if(!file.exists(baiFile)){
stop("The input bam file has to be sorted and indexed!")
}
job <- my.jobStart(paste("bam multimatch", basename(bamFile)))
param <- ScanBamParam(what="qname")
bamFlag(param) <- scanBamFlag(isUnmappedQuery=FALSE)
## test paired-end or not
paired <- testPairedEndBam(bamFile)
if(paired){
bamFlag(param) <- switch(pairedMode,
paired=scanBamFlag(isFirstMateRead=TRUE,
isProperPair=TRUE,
isUnmappedQuery=FALSE),
first=scanBamFlag(isFirstMateRead=TRUE,
isUnmappedQuery=FALSE),
second=scanBamFlag(isSecondMateRead=TRUE,
isUnmappedQuery=FALSE))
}
bamReads <- scanBam(bamFile, param=param)[[1]]$qname
result <- table(bamReads)
result2 <- table(result)
nReads <- getBamInputReadCount(bamFile)
if(!is.na(nReads)){
nReadsUnmapped <- nReads - length(unique(bamReads))
result2 <- c("0"=nReadsUnmapped, result2)
my.writeElapsed(job, "nreads from header loaded")
}
my.writeElapsed(job, "done")
return(result2)
}
### -----------------------------------------------------------------
### getBamInputReadCount: get the number of input reads
### Exported!
getBamInputReadCount <- function(bamFiles){
ans <- c()
for(i in 1:length(bamFiles)){
bamFile <- bamFiles[i]
header <- scanBamHeader(bamFile)[[1]]
## test whether there is any comment line
if(sum(names(header$text) == "@CO") == 0){
ans <- c(ans, NA)
}
## get the comment lines
commentLinesIndex <- which(names(header$text) == "@CO")
for(i in 1:length(commentLinesIndex)){
commentLine <- header$text[[commentLinesIndex[i]]]
searchResults <- grepl("^INPUTREADCOUNT:", commentLine)
numOfCount <- sum(searchResults)
if(numOfCount == 0){
next
}else if(numOfCount == 1){
nReads <- as.integer(sub("^INPUTREADCOUNT:", "", commentLine[which(searchResults)]))
ans <- c(ans, nReads)
}else{
stop("more than one INPUTREADCOUNT found in ", bamFile)
}
}
}
return(ans)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.