R/read.seqs.R

Defines functions read.seqs

Documented in read.seqs

#' @title Import sequences of predicted LTR transposons
#' @description Import sequences of predicted LTR transposons generated by \code{\link{LTRharvest}}, \code{\link{LTRdigest}}, or \code{\link{LTRpred}}. 
#' @param seq.file path to fasta file storing the sequences of predicted LTR transposons generated by 
#' \code{\link{LTRharvest}}, \code{\link{LTRdigest}}, or \code{\link{LTRpred}}. 
#' @param program program used to generate the LTR transposons specified in \code{seq.file}, e.g. \code{program = "LTRpred"}, \code{program = "LTRdigest"}, or \code{program = "LTRharvest"}.
#' @author Hajk-Georg Drost
#' @export

read.seqs <- function(seq.file, program = "LTRharvest"){
    
    if (!is.element(program, c("LTRharvest","LTRdigest","LTRpred")))
      stop("Please select a prediction program that is supported by this function.")
  
    end <- start <- NULL
    if (program == "LTRharvest") {
      PredictedLTRSeqs <- Biostrings::readDNAStringSet(seq.file,"fasta")
      HeaderInformation <- PredictedLTRSeqs@ranges@NAMES
      SeqInformation <- do.call(rbind,sapply(HeaderInformation, function(x) noquote(stringr::str_split(stringr::str_replace(stringr::str_replace(stringr::str_extract(x,"[?<=\\[].*?[?=\\]]"),"\\[",""),"\\]",""),","))))
      colnames(SeqInformation) <- c("start","end")
      ChrID <- sapply(rownames(SeqInformation), function(y) stringr::str_split(y, " \\(")[[1]][1])
      
      SeqInformation.df <- data.frame(chromosome = ChrID, start = as.numeric(SeqInformation[ , "start"]), end = as.numeric(SeqInformation[ , "end"]))
      SeqInformation.df <- dplyr::mutate(SeqInformation.df, width = (end - start) + 1)
    }
    
    return(SeqInformation.df)
}
HajkD/LTRpred documentation built on April 22, 2022, 4:35 p.m.