Nothing
#' Bulk alignment function
#'
#' @param lalista list of samples
#' @param nodes logic cores
#' @param readsPath sample folders
#' @param GenomeIndex genome index
#' @param outBam output folder
#' @param threads processes
#' @param outFormat BAM or SAM
#' @param phredScore quality score
#' @param maxExtractedSubreads number of subreads
#' @param consensusVote consensus
#' @param mismatchMax mismatch
#' @param uniqueOnly no multimapping
#' @param maxMultiMapped multimapping
#' @param indelLength indel
#' @param fragmentMinLength fragment minumum length
#' @param fragmentMaxLength fragment maximum length
#' @param matesOrientation mate orientation
#' @param readOrderConserved read order
#' @param coordinatesSorting sorting
#' @param allJunctions junctions
#' @param tempfolder temporary folder
BulkAlignment <- function(lalista, nodes,
readsPath,
GenomeIndex,
outBam,
threads,
outFormat,
phredScore,
maxExtractedSubreads,
consensusVote,
mismatchMax,
uniqueOnly,
maxMultiMapped,
indelLength,
fragmentMinLength,
fragmentMaxLength,
matesOrientation,
readOrderConserved,
coordinatesSorting,
allJunctions,
tempfolder
) {
## nested function
## unmapped extraction
## filtering BAM from unmapped
UnmappedExtraction <- function(
temporarySource,
outputBam,
prefix,
sampleBasename,
subsetting = 1000000
) {
fromBamToFastq <- function(alignFile, Setting, OutPut) {
SubsetImported <- Rsamtools::scanBam(
file = alignFile,
param = Setting
)
SubsetShortReads <- ShortRead::ShortReadQ(sread = SubsetImported[[1]]$seq,
quality = SubsetImported[[1]]$qual,
id = Biostrings::BStringSet(SubsetImported[[1]]$qname))
ShortRead::writeFastq(object = SubsetShortReads,
file = OutPut,
mode = "a", compress = TRUE)
}
##### R1 unmapped, mate unmapped
## parameters
fieldR1unmap2 <- Rsamtools::ScanBamParam(
flag = Rsamtools::scanBamFlag(
isFirstMateRead = TRUE,
isUnmappedQuery = TRUE,
hasUnmappedMate = TRUE),
reverseComplement = TRUE,
what = c("qname", "seq", "qual")
)
## PROCESSING
#1. setting chunk number of BAM
docBam <- open(Rsamtools::BamFile(
file.path(temporarySource,
paste(prefix, sampleBasename, "_supplementary", sep = "")),
yieldSize = subsetting))
cycleBAM <- 0
while(nrec <- length(Rsamtools::scanBam(docBam)[[1]][[1]])) {
cat("records:", nrec, "\n")
cycleBAM <- cycleBAM+1
print(cycleBAM)
}
close(docBam)
#2. inizialization
file.create(
file.path(
temporarySource, paste(
prefix, "FastqR1unmap2_", sampleBasename, "_Unmapped_R1.fq.gz", sep = "")
)
)
#3. iterate,import,class,write
docBam <- open(Rsamtools::BamFile(
file.path(temporarySource,
paste(prefix, sampleBasename, "_supplementary", sep = "")),
yieldSize = subsetting))
R1up <- 1
while (R1up<=cycleBAM) {
print(R1up)
fromBamToFastq(alignFile = docBam,
Setting = fieldR1unmap2,
OutPut = file.path(
temporarySource, paste(
prefix, "FastqR1unmap2_", sampleBasename, "_Unmapped_R1.fq.gz", sep = "")
))
R1up <- R1up+1
}
close(docBam)
## category 2: unmapped R2 (mate unmapped)
## PARAMETERS
fieldR2unmap2 <- Rsamtools::ScanBamParam(
flag = Rsamtools::scanBamFlag(
isSecondMateRead = TRUE,
isUnmappedQuery = TRUE,
hasUnmappedMate = TRUE),
reverseComplement = TRUE,
what = c("qname", "seq", "qual")
)
## PROCESSing
#2.inizialization
file.create(
file.path(
temporarySource, paste(
prefix, "FastqR2unmap2_", sampleBasename, "_Unmapped_R2.fq.gz", sep = "")
)
)
#3.iterate,import,class,write
docBam <- open(Rsamtools::BamFile(
file.path(temporarySource,
paste(prefix, sampleBasename, "_supplementary", sep = "")),
yieldSize = subsetting))
R2up <- 1
while (R2up<=cycleBAM) {
print(R2up)
fromBamToFastq(alignFile = docBam,
Setting = fieldR2unmap2,
OutPut = file.path(
temporarySource, paste(
prefix, "FastqR2unmap2_", sampleBasename, "_Unmapped_R2.fq.gz", sep = "")
)
)
R2up <- R2up+1
}
close(docBam)
## "SPURIE": R1 unmapped e R2 mapped
## unmapped R1
## PARAMETers
fieldR1mateUnmapped2 <- Rsamtools::ScanBamParam(
flag = Rsamtools::scanBamFlag(
isUnmappedQuery = TRUE,
isFirstMateRead = TRUE,
hasUnmappedMate = FALSE
), reverseComplement = TRUE,
what = c("qname", "seq", "qual")
)
## PROCESSING
#2.inizialization
file.create(
file.path(
temporarySource, paste(
prefix, "FastqR1mateUnmapped2_", sampleBasename, "_Unmapped_R1.fq.gz", sep = "")
)
)
#3.iterate,import,class,write
docBam <- open(Rsamtools::BamFile(
file.path(temporarySource,
paste(prefix, sampleBasename, "_supplementary", sep = "")),
yieldSize = subsetting))
R1mu <- 1
while (R1mu<=cycleBAM) {
print(R1mu)
fromBamToFastq(alignFile = docBam,
Setting = fieldR1mateUnmapped2,
OutPut = file.path(
temporarySource, paste(
prefix, "FastqR1mateUnmapped2_", sampleBasename, "_Unmapped_R1.fq.gz", sep = "")
))
R1mu <- R1mu+1
}
## mapped R2, mate R1 unmapped
## PARAMETers
fieldR2mateMapped2 <- Rsamtools::ScanBamParam(
flag = Rsamtools::scanBamFlag(
isUnmappedQuery = FALSE,
isSecondMateRead = TRUE,
hasUnmappedMate = TRUE
), reverseComplement = TRUE,
what = c("qname", "seq", "qual")
)
## PROCESSing
#2.inizialization
file.create(
file.path(
temporarySource, paste(
prefix, "FastqR2mateMapped2_", sampleBasename, "_Unmapped_R2.fq.gz", sep = "")
)
)
#3.iterate,import,class,write
docBam <- open(Rsamtools::BamFile(
file.path(temporarySource,
paste(prefix, sampleBasename, "_supplementary", sep = "")),
yieldSize = subsetting))
R2mm <- 1
while (R2mm<=cycleBAM) {
print(R2mm)
fromBamToFastq(alignFile = docBam,
Setting = fieldR2mateMapped2,
OutPut = file.path(
temporarySource, paste(
prefix, "FastqR2mateMapped2_", sampleBasename, "_Unmapped_R2.fq.gz", sep = "")
))
R2mm <- R2mm+1
}
## "SPURIE": R1 mapped e R2 unmapped
## mapped R1
fieldR1mateMapped2 <- Rsamtools::ScanBamParam(
flag = Rsamtools::scanBamFlag(
isUnmappedQuery = FALSE,
isFirstMateRead = TRUE,
hasUnmappedMate = TRUE
), reverseComplement = TRUE,
what = c("qname", "seq", "qual")
)
## PROCESSing
#2.inizialization
file.create(
file.path(
temporarySource, paste(
prefix, "FastqR1mateMapped2_", sampleBasename, "_Unmapped_R1.fq.gz", sep = "")
)
)
#3.iterate,import,class,write
docBam <- open(Rsamtools::BamFile(
file.path(temporarySource,
paste(prefix, sampleBasename, "_supplementary", sep = "")),
yieldSize = subsetting))
R1mm <- 1
while (R1mm<=cycleBAM) {
print(R1mm)
fromBamToFastq(alignFile = docBam,
Setting = fieldR1mateMapped2,
OutPut = file.path(
temporarySource, paste(
prefix, "FastqR1mateMapped2_", sampleBasename, "_Unmapped_R1.fq.gz", sep = "")
))
R1mm <- R1mm+1
}
## unmapped R2, mate R1 mapped
fieldR2mateUnmapped2 <- Rsamtools::ScanBamParam(
flag = Rsamtools::scanBamFlag(
isUnmappedQuery = TRUE,
isSecondMateRead = TRUE,
hasUnmappedMate = FALSE
), reverseComplement = TRUE,
what = c("qname", "seq", "qual")
)
## PROCESSing
#2.inizialization
file.create(
file.path(
temporarySource, paste(
prefix, "FastqR2mateUnmapped2_", sampleBasename, "_Unmapped_R2.fq.gz", sep = "")
)
)
#3.iterate,import,class,write
docBam <- open(Rsamtools::BamFile(
file.path(temporarySource,
paste(prefix, sampleBasename, "_supplementary", sep = "")),
yieldSize = subsetting))
R2mu <- 1
while (R2mu<=cycleBAM) {
print(R2mu)
fromBamToFastq(alignFile = docBam,
Setting = fieldR1mateMapped2,
OutPut = file.path(
temporarySource, paste(
prefix, "FastqR2mateUnmapped2_", sampleBasename, "_Unmapped_R2.fq.gz", sep = "")
))
R2mu <- R2mu+1
}
##### UNION
## UNION R1
erreUno <- c(
file.path(
temporarySource, paste(
prefix, "FastqR1unmap2_", sampleBasename, "_Unmapped_R1.fq.gz", sep = "")),
file.path(
temporarySource, paste(
prefix, "FastqR1mateUnmapped2_", sampleBasename, "_Unmapped_R1.fq.gz", sep = "")),
file.path(
temporarySource, paste(
prefix, "FastqR1mateMapped2_", sampleBasename, "_Unmapped_R1.fq.gz", sep = ""))
)
Rfastp::catfastq(
output =
file.path(
outputBam, paste(
prefix, "R1_", sampleBasename, "_unmapped.fastq.gz", sep = "")),
inputFiles = erreUno,
append = FALSE,
paired = FALSE
)
## UNION R2
erreDue <- c(
file.path(
temporarySource, paste(
prefix, "FastqR2unmap2_", sampleBasename, "_Unmapped_R2.fq.gz", sep = "")),
file.path(
temporarySource, paste(
prefix, "FastqR2mateMapped2_", sampleBasename, "_Unmapped_R2.fq.gz", sep = "")),
file.path(
temporarySource, paste(
prefix, "FastqR2mateUnmapped2_", sampleBasename, "_Unmapped_R2.fq.gz", sep = ""))
)
Rfastp::catfastq(
output = file.path(
outputBam, paste(
prefix, "R2_", sampleBasename, "_unmapped.fastq.gz", sep = "")),
inputFiles = erreDue,
append = FALSE,
paired = FALSE)
## FILTER BAM FROM UNMAPPED
## filter parameters
parameters <- Rsamtools::ScanBamParam(
flag = Rsamtools::scanBamFlag(
isPaired = TRUE,
isUnmappedQuery = FALSE,
hasUnmappedMate = FALSE
),
what = Rsamtools::scanBamWhat())
## sorting bam
Rsamtools::sortBam(
file = file.path(temporarySource,
paste(prefix, sampleBasename, "_supplementary", sep = "")),
destination = file.path(temporarySource, paste(
prefix, sampleBasename, "_sorted", sep = "")))
## index bam
Rsamtools::indexBam(files = file.path(temporarySource, paste(
prefix, sampleBasename, "_sorted", ".bam", sep = "")))
## filter e write bam
Rsamtools::filterBam(
file = file.path(temporarySource, paste(
prefix, sampleBasename, "_sorted", ".bam", sep = "")),
destination =
file.path(outputBam, paste(
prefix, sampleBasename, ".bam", sep = "")),
param = parameters,
indexDestination = FALSE)
}
## parallelization
parallel::parLapply(cl = clust <- parallel::makeCluster(spec = nodes, type = "PSOCK"),
X = lalista,
fun = function(x,
readsPath,
GenomeIndex,
outBam,
threads,
outFormat,
phredScore,
maxExtractedSubreads,
consensusVote,
mismatchMax,
uniqueOnly,
maxMultiMapped,
indelLength,
fragmentMinLength,
fragmentMaxLength,
matesOrientation,
readOrderConserved,
coordinatesSorting,
allJunctions,
tempfolder
) {
## first alignment
Rsubread::subjunc(
readfile1 = file.path(readsPath,
list.files(
path = readsPath,
pattern =
paste(x, "_1.fastq$|_1.fq$|_1.fastq.gz$|_1.fq.gz$",
sep = "")
)
),
readfile2 = file.path(readsPath,
list.files(
path = readsPath,
pattern =
paste(x, "_2.fastq$|_2.fq$|_2.fastq.gz$|_2.fq.gz$",
sep = "")
)
),
index = GenomeIndex,
output_file = file.path(tempfolder,
paste("Alignment", x, "_supplementary", sep = "")
),
nthreads = threads,
output_format = outFormat,
phredOffset = phredScore,
nsubreads = maxExtractedSubreads,
TH1 = consensusVote,
TH2 = consensusVote,
maxMismatches = mismatchMax,
unique = uniqueOnly,
nBestLocations = maxMultiMapped,
indels = indelLength,
maxFragLength = fragmentMinLength,
minFragLength = fragmentMaxLength,
PE_orientation = matesOrientation,
keepReadOrder = readOrderConserved,
sortReadsByCoordinates = coordinatesSorting,
reportAllJunctions = allJunctions,
)## end of subjunc1
## extracting unmapped
UnmappedExtraction(temporarySource = tempfolder,
outputBam = outBam,
prefix = "Alignment",
sampleBasename = x)
## rescue indel.vcf
fileRescue <- c(".indel.vcf", ".junction.bed", ".summary")
file.rename(
from = file.path(tempfolder,
paste("Alignment", x, "_supplementary", fileRescue, sep = "")),
to = file.path(outBam,
paste("Alignment", x, fileRescue, sep = ""))
)
## eliciting cleaning RAM memory
base::gc(reset = TRUE)
}, ## end of the function body
readsPath,
GenomeIndex,
outBam,
threads,
outFormat,
phredScore,
maxExtractedSubreads,
consensusVote,
mismatchMax,
uniqueOnly,
maxMultiMapped,
indelLength,
fragmentMinLength,
fragmentMaxLength,
matesOrientation,
readOrderConserved,
coordinatesSorting,
allJunctions,
tempfolder
) ## end of parlapply
parallel::stopCluster(cl = clust) ## end of node communications
unlink(tempfolder, recursive = TRUE)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.