#' Phage library QC
#'
#' @docType methods
#' @name LibQC
#' @rdname LibQC
#'
#' @param lib_fastq The path to library fastq.
#' @param outdir The output directory.
#' @param len The length of peptides.
#' @importFrom MAGeCKFlute MapRatesView
#' @export
LibQC <- function(lib_fastq, outdir = ".", len = 12){
message(Sys.time(), " Extract reads from ", lib_fastq, " ...")
reads = extractReads(lib_fastq)
nreads = length(reads)
## Remove reads with "N" or have different length
idx = grepl("N", reads) | nchar(reads)!=len*3
reads = reads[!idx]
## Translate the DNA sequence to Amino acid sequence
message(Sys.time(), " Translate the DNA sequence to Amino acid sequence ...")
peptides = NucleoTranslate(reads)
## Remove peptide with stop codon
message(Sys.time(), " Remove reads with stop codon ", lib_fastq, " ...")
idx = grepl("X", peptides)
peptides = peptides[!idx]
reads = reads[!idx]
## Ratio of useless peptide
gg = data.frame(Label = c("Library"), Reads = nreads,
RemainPeptide = length(peptides))
p = MAGeCKFlute::MapRatesView(gg, Mapped = "RemainPeptide")
p = p + scale_fill_manual(values = c("#9BC7E9", "#1C6DAB"),
label = c("Total peptide", "Remain peptide"))
p = p + theme(axis.text.x = element_text(angle = 0, hjust = 1, vjust = 1))
p = p + theme(legend.position = "bottom")
ggsave(file.path(outdir, "MapRatesView_PhageLibrary.pdf"), p,
width = 5, height = 4, dpi = 200)
## Save library as table
message(Sys.time(), " Save library as data frame ", lib_fastq, " ...")
lib = data.frame(id = reads, sequence = reads, peptide = peptides)
lib = lib[!duplicated(lib$id), ]
lib$id = 1:nrow(lib)
write.table(lib, paste0(outdir, "/PhageDisplay_Library.txt"), sep = "\t",
row.names = FALSE, col.names = FALSE, quote = FALSE)
## Distribution of nucleotide on each position
message(Sys.time(), " Plot the nucleotide distribution of ", lib_fastq, " ...")
suffix = "Library"
distr_pos = tableSeqs(reads)
distr_pos = distr_pos[, sort(colnames(distr_pos))]
saveRDS(distr_pos, paste0(outdir, "/Distr_Nucleotide_", suffix, ".rds"))
p2 = polylineView(distr_pos, xlab = "Position", ylab = "Read count")
p2 = p2 + theme(legend.position = "bottom")
ggsave(paste0(outdir, "/Polyline_Nucleotide_", suffix, ".pdf"), p2,
width = 8, height = 3, dpi = 200)
## Sequence logo
ratioNT = t(distr_pos / rowSums(distr_pos))
p = ggseqlogo(ratioNT, method='custom', seq_type='dna')
ggsave(paste0(outdir, "/SeqLogo_Nucleotide_", suffix, ".pdf"),
p, width = 9, height = 3, dpi = 200)
## Distribution of amino acid
message(Sys.time(), " Plot the AA distribution of ", lib_fastq, " ...")
distr_pos = tableSeqs(peptides)
distr_pos = distr_pos[, sort(colnames(distr_pos))]
saveRDS(distr_pos, paste0(outdir, "/Distr_AminoAcid_", suffix, ".rds"))
p2 = polylineView(distr_pos, xlab = "Position", ylab = "Read count")
p2 = p2 + theme(legend.position = "bottom")
p2 = p2 + guides(col = guide_legend(nrow = 2, byrow = TRUE))
ggsave(paste0(outdir, "/Polyline_AminoAcid_", suffix, ".pdf"), p2,
width = 8, height = 3, dpi = 200)
## Sequence logo
ratioNT = t(distr_pos / rowSums(distr_pos))
p = ggseqlogo(ratioNT, method='custom', seq_type='aa')
ggsave(paste0(outdir, "/SeqLogo_AminoAcid_", suffix, ".pdf"),
p, width = 8, height = 10, dpi = 200)
message(Sys.time(), " Library QC is done ...")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.