inst/extdata/mergeR3.R

setwd("~/Jobs/Project/XunBaihui/_Data/6_PhageDisplay/20190124/kmerRes/")
rm(list = ls())
library(PhageR)
library(rBLAST)
source('~/Jobs/Archive/Codes/PhageR/R/countPeptide.R')

pepdir = "../Peptides_list.rds"
for(kmer in 4:12){
  countPeptide(pepdir, kmer = kmer)
  rawcount = data.frame(peptide = c(NA))
  countTables = list.files(pattern = paste0("3rd.fq.gz_", kmer, "mer_count.txt"))[-(1:3)]
  for(f in countTables){
    message("Merge ", f, "...")
    tmp = read.table(f, sep = "\t", header = FALSE, stringsAsFactors = FALSE)
    samplename = gsub(".*_TB_|_3rd.fq.gz.*", "", f)
    colnames(tmp) = c("peptide", samplename)
    rawcount = merge.data.frame(rawcount, tmp, by = "peptide", all = TRUE)
  }
  file.remove(list.files(pattern = paste0("mer_count.txt")))
  rawcount = rawcount[!is.na(rawcount$peptide), ]
  rawcount[is.na(rawcount)] = 0
  write.table(rawcount, paste0("TB_PBST_NaCI_", kmer, "mercount.txt"), sep = "\t",
              row.names = FALSE, col.names = TRUE, quote = FALSE)
  rawcount = read.table(paste0("TB_PBST_NaCI_", kmer, "mercount.txt"), sep = "\t",
                        row.names = 1, header = TRUE, stringsAsFactors = FALSE)
  all_peptides = rownames(rawcount)
  names(all_peptides) = paste0(">", all_peptides)
  write.table(all_peptides, paste0("../All_", kmer, "mer_peptides.fa"), quote = FALSE,
              row.names = TRUE, col.names = FALSE, sep = "\n")

  makeblastdb("~/Jobs/Project/XunBaihui/_Data/6_PhageDisplay/TB_Proteosome/UP000001584_83332.fasta", dbtype = "prot")
  TB_Proteosome <- blast("~/Jobs/Project/XunBaihui/_Data/6_PhageDisplay/TB_Proteosome/UP000001584_83332.fasta", type = "blastp")
  peptide.seq <- readAAStringSet(paste0("All_", kmer, "mer_peptides.fa"))
  blastRes = predict(TB_Proteosome, peptide.seq, BLAST_args = "-word_size 2 -evalue 200000 -matrix PAM30 -gapopen 8 -gapextend 1 -comp_based_stats 0")
  blastRes = blastRes[blastRes$Perc.Ident==100, ]
  write.table(blastRes, paste0(kmer, "mer_blast_results.txt"), sep = "\t",
              row.names = FALSE, col.names = TRUE, quote = FALSE)
  protCount = t(sapply(levels(blastRes$SubjectID), function(x){
    colSums(rawcount[blastRes$QueryID[blastRes$SubjectID==x], ])
  }))
  write.table(protCount, paste0(kmer, "mer_protein_count.txt"), sep = "\t",
              row.names = TRUE, col.names = TRUE, quote = FALSE)
}
WubingZhang/PhageR documentation built on July 2, 2019, 9:03 p.m.