#' Display the distribution of nucleotide and amino acid.
#'
#' @docType methods
#' @name PeptideCount
#' @rdname PeptideCount
#'
#' @param fastqs paths to fastq files.
#' @param proj The project name.
#' @param outdir The output directory.
#' @param peplen Integer, specifying the length of peptide.
#' @param distrBase Boolean, specifying whether test the distribution at base level.
#' @param distrAA Boolean, specifying whether test the distribution at AA level.
#'
#' @return Count table.
#'
#' @author Wubing Zhang
#' @importFrom ggseqlogo ggseqlogo
#' @export
PeptideCount <- function(fastqs, proj = "Peptide", outdir = "./",
peplen = 12, distrBase = FALSE, distrAA = TRUE){
requireNamespace("ggseqlogo")
stat_reads = c()
peptide_list = list()
for(f in fastqs){
prefix = gsub(".*/|\\..*", "", f)
message(Sys.time(), " Process ", prefix, " ...")
reads = extractReads(f)
nRaw = length(reads)
# remove reads with "N" or have different length
reads = reads[!grepl("N", reads)]
nN = nRaw-length(reads)
nL = sum(nchar(reads)>peplen*3)
nS = sum(nchar(reads)<peplen*3)
reads = reads[nchar(reads)==peplen*3]
if(length(reads)<10) next
# Test the distribution of nucleotide in each position
if(distrBase){
message(Sys.time(), " Test the nucleotide distribution ...")
distr_pos = tableSeqs(reads, peplen*3)
distr_pos = distr_pos[, sort(colnames(distr_pos))]
# p1 = dodgeBarView(distr_pos, xlab = "Position", ylab = "Read count",
# filename = paste0(outdir, "/", proj, "_dodgeBar_Nucleotide_", prefix, ".pdf"),
# width = 8, height = 3, dpi = 200)
# p2 = polylineView(distr_pos, xlab = "Position", ylab = "Read count",
# filename = paste0(outdir, "/", proj, "_polyline_Nucleotide_", prefix, ".pdf"),
# width = 8, height = 3, dpi = 200)
ratioNT = t(distr_pos / rowSums(distr_pos))
p = ggseqlogo(ratioNT, method='custom', seq_type='dna')
ggsave(paste0(outdir, "/", proj, "_SeqLogo_Nucleotide_", prefix, ".pdf"),
p, width = 9, height = 3, dpi = 200)
}
# Amino acid
message(Sys.time(), " Translate the sequences ...")
peptides = NucleoTranslate(reads)
idx = grepl("X", peptides)
nStop = sum(idx)
peptides = peptides[!idx]
peptide_list[[f]] = peptides
message("The number of peptides with stop codon: ", sum(idx))
stat_reads = rbind(stat_reads, c(nRaw, nN, nL, nS, nStop, length(unique(reads)), length(unique(peptides))))
rownames(stat_reads)[nrow(stat_reads)] = prefix
if(length(peptides)<10) next
if(distrAA){
message("The distribution of AA ... ")
distr_pos = tableSeqs(peptides)
distr_pos = distr_pos[, sort(colnames(distr_pos))]
# p1 = dodgeBarView(distr_pos, xlab = "Position", ylab = "Read count",
# filename = paste0(outdir, "/", proj, "_dodgeBar_AminoAcid_", prefix, ".pdf"),
# width = 8, height = 3, dpi = 200)
# p2 = polylineView(distr_pos, xlab = "Position", ylab = "Read count",
# filename = paste0(outdir, "/", proj, "_polyline_AminoAcid_", prefix, ".pdf"),
# width = 8, height = 3, dpi = 200)
ratioNT = t(distr_pos / rowSums(distr_pos))
p = ggseqlogo(ratioNT, method='custom', seq_type='dna')
ggsave(paste0(outdir, "/", proj, "_SeqLogo_AminoAcid_", prefix, ".pdf"),
p, width = 8, height = 10, dpi = 200)
}
}
# saveRDS(peptide_list, paste0(outdir, "/", proj, "_Peptides_list.rds"))
## Write out read level statistics ##
message(Sys.time(), " Write out statistics ...")
colnames(stat_reads) = c("TotalReads", "NReads", "LongerReads",
"ShorterReads", "ReadsWithStop", "UniqueReads", "UniquePeptides")
write.table(stat_reads, paste0(outdir, "/", proj, "_Reads_Statistic.txt"), sep = "\t", quote = FALSE)
## Merge count tables ##
message(Sys.time(), " Merge count tables ...")
rawcount = NULL
for(f in names(peptide_list)){
tmpcount = as.data.frame(table(peptide_list[[f]]), stringsAsFactors = FALSE)
colnames(tmpcount) = c("Peptide", f)
if(is.null(rawcount)){
rawcount = tmpcount
}else{
rawcount = merge(rawcount, tmpcount, all = TRUE, sort = FALSE, by = "Peptide")
}
}
rownames(rawcount) = rawcount$Peptide
rawcount = rawcount[,-1, drop = FALSE]
rawcount[is.na(rawcount)] = 0
colnames(rawcount) = gsub(".*\\/|.fq.*", "", colnames(rawcount))
# write.table(rawcount, paste0(outdir, "/Summary_count.txt"),
# row.names = FALSE, sep = "\t", quote = FALSE)
# return count table #
return(rawcount)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.