inst/extcode/kmerRefGenerate.R

#' Generate kmer reference from fasta file
#'
#' @docType methods
#' @name kmerRefGenerate
#' @rdname kmerRefGenerate
#'
#' @param refseq_fa The path to reference fasta file.
#' @param kmer An integer.
#' @param prefix The prefix of kmer reference.
#'
#' @return A list of kmer sequences.
#'
#' @author Wubing Zhang
#' @import Biostrings
#' @export
#'
kmerRefGenerate <- function(refseq_fa = "~/Jobs/Project/XunBaihui/_Data/6_PhageDisplay/TB_Proteosome/UP000001584_83332.fasta",
                            kmer = 5, prefix = "UP000001584_83332"){
  require(Biostrings)
  refseq = readAAStringSet(refseq_fa)
  kmerlist = lapply(refseq, function(x){
    x = as.character(x)
    sapply(kmer:nchar(x), function(y) substr(x, y-kmer+1, y))
  })
  names(kmerlist) = gsub(".*\\|| OS=.*", "", names(kmerlist))
  saveRDS(kmerlist, paste0(prefix, "_", kmer, "mer_reference.rds"))
}
WubingZhang/PhageR documentation built on July 2, 2019, 9:03 p.m.