Nothing
#' @importFrom ShortRead FastqStreamer yield writeFastq
#' @importFrom Biostrings DNAStringSet width BStringSet
#' @importClassesFrom ShortRead FastqStreamer ShortReadQ SFastqQuality SRFilterResult
#' @importClassesFrom Biostrings DNAStringSet BStringSet
#' @importFrom IRanges narrow
############################################## Main functions for stage 0
Stage_0_Main_fun = function(SA_prefix, S0_fastq1, S0_fastq2, S0_LinkerA, S0_LinkerB,
S0_MinReadLength, S0_MaxReadLength, S0_LinkerOccurence, S0_image, S0_fastqStream,
S0_Totfastqreads, S0_AnalysisDir) {
# Take time:
Analysis.time.start = Sys.time()
#----------------
# create names to save the resulted fastq:
#----------------
if (!dir.exists(S0_AnalysisDir))
dir.create(S0_AnalysisDir)
FastqWriteList = Create_fastq_list_fun(S0_AnalysisDir = S0_AnalysisDir, SA_prefix = SA_prefix)
# image dir:
S0_image_dir = file.path(S0_AnalysisDir, paste(SA_prefix, "_stage_0_image.jpg",
sep = ""))
#----------------
# break and filter fastq files:
#----------------
cat("Filtering fastq files...\n")
Parse_fastqfiles_main_fun(S0_fastq1 = S0_fastq1, S0_fastq2 = S0_fastq2, S0_LinkerA = S0_LinkerA,
S0_LinkerB = S0_LinkerB, S0_MinReadLength = S0_MinReadLength, S0_MaxReadLength = S0_MaxReadLength,
S0_LinkerOccurence = S0_LinkerOccurence, S0_fastqStream = S0_fastqStream,
S0_Totfastqreads = S0_Totfastqreads, FastqWriteList = FastqWriteList, S0_image = S0_image,
SA_prefix = SA_prefix, S0_image_dir = S0_image_dir)
# print:
futile.logger::flog.info("=====================================", name = "SA_LogFile",
capture = FALSE)
futile.logger::flog.info("Stage 0 is done!", name = "SA_LogFile", capture = FALSE)
futile.logger::flog.info(paste("Analysis results for stage 0 are in:\n", S0_AnalysisDir),
name = "SA_LogFile", capture = FALSE)
# save time:
Analysis.time.end = Sys.time()
Total.Time = Analysis.time.end - Analysis.time.start
LogFile = paste("Total stage 0 time:", Total.Time, " ", units(Total.Time))
futile.logger::flog.info(LogFile, name = "SA_LogFile", capture = FALSE)
}
# done
#-------------
#-------------
# function for creating the fastq files for the ouput to be saved:
Create_fastq_list_fun = function(S0_AnalysisDir, SA_prefix) {
#-------------
# those with A/A or B/B linkers:(used in subsequent stages)
#-------------
fastq1_usable_dir = file.path(S0_AnalysisDir, paste(SA_prefix, "_usable_1.fastq.gz",
sep = ""))
if (file.exists(fastq1_usable_dir))
unlink(x = fastq1_usable_dir, recursive = TRUE, force = TRUE)
fastq2_usable_dir = file.path(S0_AnalysisDir, paste(SA_prefix, "_usable_2.fastq.gz",
sep = ""))
if (file.exists(fastq2_usable_dir))
unlink(x = fastq2_usable_dir, recursive = TRUE, force = TRUE)
#-------------
# those with A/B or B/A or one/two parts with no linkers
#-------------
fastq1_chimeric_dir = file.path(S0_AnalysisDir, paste(SA_prefix, "_chimeric_1.fastq.gz",
sep = ""))
if (file.exists(fastq1_chimeric_dir))
unlink(x = fastq1_chimeric_dir, recursive = TRUE, force = TRUE)
fastq2_chimeric_dir = file.path(S0_AnalysisDir, paste(SA_prefix, "_chimeric_2.fastq.gz",
sep = ""))
if (file.exists(fastq2_chimeric_dir))
unlink(x = fastq2_chimeric_dir, recursive = TRUE, force = TRUE)
#-------------
# those with lack of linkers in each read sequence. (always discarded,except if
# S0_KeepEmptyReads)
#-------------
fastq1_ambiguous_dir = file.path(S0_AnalysisDir, paste(SA_prefix, "_ambiguous_1.fastq.gz",
sep = ""))
if (file.exists(fastq1_ambiguous_dir))
unlink(x = fastq1_ambiguous_dir, recursive = TRUE, force = TRUE)
fastq2_ambiguous_dir = file.path(S0_AnalysisDir, paste(SA_prefix, "_ambiguous_2.fastq.gz",
sep = ""))
if (file.exists(fastq2_ambiguous_dir))
unlink(x = fastq2_ambiguous_dir, recursive = TRUE, force = TRUE)
#-------------
# add to list:
#-------------
FastqWriteList = list()
FastqWriteList[[1]] = list(object = NA, file = fastq1_usable_dir)
FastqWriteList[[2]] = list(object = NA, file = fastq2_usable_dir)
FastqWriteList[[3]] = list(object = NA, file = fastq1_chimeric_dir)
FastqWriteList[[4]] = list(object = NA, file = fastq2_chimeric_dir)
FastqWriteList[[5]] = list(object = NA, file = fastq1_ambiguous_dir)
FastqWriteList[[6]] = list(object = NA, file = fastq2_ambiguous_dir)
return(FastqWriteList)
}
# done
#-------------
#-------------
# function for cutting linkers from the yield fastq files
Parse_fastqfiles_main_fun = function(S0_fastq1, S0_fastq2, S0_LinkerA, S0_LinkerB,
S0_MinReadLength, S0_MaxReadLength, S0_LinkerOccurence, S0_fastqStream, S0_Totfastqreads,
FastqWriteList, S0_image, SA_prefix, S0_image_dir) {
#----------------
# Stream the fastq files:
#----------------
Streamfastq1 = ShortRead::FastqStreamer(con = S0_fastq1, n = S0_fastqStream)
Streamfastq2 = ShortRead::FastqStreamer(con = S0_fastq2, n = S0_fastqStream)
# the first of loop you need to create the files, then change to append
WrittingMode = "w"
# count yield/chimeric/usable/ambiguous/N reads:
TotfastqLinesRead = 0
Totchimeric = 0
Totusable = 0
Totambi = 0
TotNs = 0
# the following are for printing progress to the console:
options(scipen = 999, digits = 2) #for printing
cat1 = "||lines scanned:"
cat2 = "||usable reads:"
cat3 = "||chimeric reads:"
cat4 = "||ambiguous reads:"
cat5 = "||N reads:"
#----------------
# loop though the files:
#----------------
repeat {
#----------------
# read the fastq files:
#----------------
# read yield for fastq1
fastq1yield = ShortRead::yield(Streamfastq1)
# read yield for fastq2
fastq2yield = ShortRead::yield(Streamfastq2)
# break if empty, all is read
Curfastqyieldsize = length(fastq1yield)
if (Curfastqyieldsize == 0)
break
TotfastqLinesRead = TotfastqLinesRead + Curfastqyieldsize
#----------------
# check if fastq files have the same ids and remove /1,/2.
#----------------
SortedReads = Check_If_sorted_fun(fastq1yield = fastq1yield, fastq2yield = fastq2yield)
if (!SortedReads) {
stop("S0_fastq1 and S0_fastq2 files are not sorted by ID, or their IDs before /1 and /2 are not identical.",
call. = FALSE)
}
#----------------
# Find fastq linker and split (c++11):
#----------------
FilteringResults = FilterFastqYield_fun_Rcpp(Curfastqyieldsize, as.character(fastq1yield@sread),
Biostrings::width(fastq1yield@sread), as.character(fastq2yield@sread),
Biostrings::width(fastq2yield@sread), S0_LinkerA, S0_LinkerB, S0_LinkerOccurence,
S0_MinReadLength, S0_MaxReadLength)
#----------------
# check the whole Yield is NNs and skip:
#----------------
if (FilteringResults$TotNsYield == Curfastqyieldsize) {
TotNs = TotNs + FilteringResults$TotNsYield
# print:
cat(cat1, TotfastqLinesRead, "(", TotfastqLinesRead/S0_Totfastqreads *
100, "%)|| ", cat2, Totusable, "(", Totusable/TotfastqLinesRead *
100, "%)|| ", cat3, Totchimeric, "(", Totchimeric/TotfastqLinesRead *
100, "%)|| ", cat4, Totambi, "(", Totambi/TotfastqLinesRead * 100,
"%)|| ", cat5, TotNs, "(", TotNs/TotfastqLinesRead * 100, "%)||\r",
sep = "")
# go to next
next
}
#----------------
# narrow results
#----------------
fastq1yield@sread = IRanges::narrow(fastq1yield@sread, start = 1, end = FilteringResults$NarrowingPos[,
1])
fastq1yield@quality = IRanges::narrow(fastq1yield@quality, start = 1,
end = FilteringResults$NarrowingPos[, 1])
fastq2yield@sread = IRanges::narrow(fastq2yield@sread, start = 1, end = FilteringResults$NarrowingPos[,
2])
fastq2yield@quality = IRanges::narrow(fastq2yield@quality, start = 1,
end = FilteringResults$NarrowingPos[, 2])
#----------------
# add counts:
#----------------
Totusable = Totusable + FilteringResults$TotusableYield
Totchimeric = Totchimeric + FilteringResults$TotchimericYield
Totambi = Totambi + FilteringResults$TotambiYield
TotNs = TotNs + FilteringResults$TotNsYield
#----------------
# split chimeric, usable and ambiguous for the fastq files:
#----------------
# take Ids:
Usable_id = which(FilteringResults$FilterClasses == 1)
Chim_id = which(FilteringResults$FilterClasses == 2)
Amb_id = which(FilteringResults$FilterClasses == 0)
# split:
FastqWriteList[[1]]$object = fastq1yield[Usable_id]
FastqWriteList[[2]]$object = fastq2yield[Usable_id]
FastqWriteList[[3]]$object = fastq1yield[Chim_id]
FastqWriteList[[4]]$object = fastq2yield[Chim_id]
FastqWriteList[[5]]$object = fastq1yield[Amb_id]
FastqWriteList[[6]]$object = fastq2yield[Amb_id]
#----------------
# save data: fastq1yield_usable, fastq2yield_usable, fastq1yield_chimeric,
# fastq2yield_chimeric
#----------------
for (i in seq_len(6)) {
ShortRead::writeFastq(object = FastqWriteList[[i]]$object, file = FastqWriteList[[i]]$file,
mode = WrittingMode, compress = TRUE)
}
# change mode to append now:
WrittingMode = "a"
# print:
cat(cat1, TotfastqLinesRead, "(", TotfastqLinesRead/S0_Totfastqreads * 100,
"%)|| ", cat2, Totusable, "(", Totusable/TotfastqLinesRead * 100, "%)|| ",
cat3, Totchimeric, "(", Totchimeric/TotfastqLinesRead * 100, "%)|| ",
cat4, Totambi, "(", Totambi/TotfastqLinesRead * 100, "%)|| ", cat5, TotNs,
"(", TotNs/TotfastqLinesRead * 100, "%)||\r", sep = "")
}
# close connections:
close(Streamfastq1)
close(Streamfastq2)
#----------------
# write in log file:
#----------------
TotfastqLinesRead100 = TotfastqLinesRead/S0_Totfastqreads * 100
Totusable100 = Totusable/TotfastqLinesRead * 100
Totchimeric100 = Totchimeric/TotfastqLinesRead * 100
Totambi100 = Totambi/TotfastqLinesRead * 100
TotNs100 = TotNs/TotfastqLinesRead * 100
LogFile = list()
LogFile[[1]] = paste("Total lines processed:", TotfastqLinesRead, "(", TotfastqLinesRead100,
"%)")
LogFile[[2]] = paste("Total usable reads:", Totusable, "(", Totusable100, "%)")
LogFile[[3]] = paste("Total chimeric reads:", Totchimeric, "(", Totchimeric100,
"%)")
LogFile[[4]] = paste("Total ambiguous reads:", Totambi, "(", Totambi100, "%)")
LogFile[[5]] = paste("Total NNs reads:", TotNs, "(", TotNs100, "%)")
for (lf in seq_len(5)) futile.logger::flog.info(LogFile[[lf]], name = "SA_LogFile",
capture = FALSE)
#----------------
# plot image:
#----------------
if (S0_image) {
Get_image_S0_fun(Totusable100 = Totusable100, Totchimeric100 = Totchimeric100,
Totambi100 = Totambi100, TotNs100 = TotNs100, S0_image_dir = S0_image_dir)
}
}
# done
#-------------
#-------------
# function for checking if the fastq yields are sorted/have same id
Check_If_sorted_fun = function(fastq1yield, fastq2yield) {
# get widths of IDs:
fastq1_idwidth = Biostrings::width(fastq1yield@id)
fastq2_idwidth = Biostrings::width(fastq2yield@id)
# remove the /1 and /2 from the fastq files:
id1 = IRanges::narrow(fastq1yield@id, start = 1, end = fastq1_idwidth - 1)
id1 = as.character(id1)
id2 = IRanges::narrow(fastq2yield@id, start = 1, end = fastq2_idwidth - 1)
id2 = as.character(id2)
# return:
return(identical(id1, id2))
}
# done
#-------------
#-------------
# function for plotting for stage 0
Get_image_S0_fun = function(Totusable100, Totchimeric100, Totambi100, TotNs100, S0_image_dir) {
# Rcheck:
Value = NULL
Kind = NULL
#-------------
# create data:
#-------------
S0_imagedata = data.frame(Kind = c(paste("Usable (", round(Totusable100, digits = 1),
"%)", sep = ""), paste("Chimeric (", round(Totchimeric100, digits = 1), "%)",
sep = ""), paste("Ambiguous (", round(Totambi100, digits = 1), "%)", sep = ""),
paste("NNs (", round(TotNs100, digits = 1), "%)", sep = "")), Value = c(round(Totusable100),
round(Totchimeric100), round(Totambi100), round(TotNs100)))
#-------------
# plot the split:
#-------------
S0_image = ggplot2::ggplot(S0_imagedata, ggplot2::aes(x = "", y = Value, fill = factor(Kind))) +
ggplot2::geom_bar(width = 1, stat = "identity") + ggplot2::coord_polar(theta = "y") +
ggplot2::theme(axis.title = ggplot2::element_blank(), plot.title = ggplot2::element_text(size = 20,
color = "black"), legend.title = ggplot2::element_blank(), legend.text = ggplot2::element_text(size = 17),
axis.text = ggplot2::element_blank(), legend.position = "bottom", legend.direction = "vertical",
axis.ticks = ggplot2::element_blank()) + ggplot2::ggtitle("Pie chart for fastq files") +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5)) + ggplot2::scale_fill_brewer(palette = "Dark2")
# save:
ggplot2::ggsave(plot = S0_image, file = S0_image_dir, scale = 2)
}
# done
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.