#' @title Read output of \code{ORFpred}
#' @description This function reads the output of the \code{\link{ORFpred}} function and stores the
#' sequence id and number of predicted ORFs in a \code{\link{data.frame}} object.
#' @param input.file fasta file generated by \code{\link{ORFpred}}.
#' @author Hajk-Georg Drost
#' @details The file generated by \code{\link{ORFpred}} is parsed by this function
#' and returned as \code{\link{data.frame}} object.
#' @examples
#' # read an example prediction file generated by PredictORFs()
#' ORFPred <- read.orfs(system.file("nt.fa",package = "LTRpred"))
#'
#' head(ORFPred)
#' @seealso \code{\link{ORFpred}}, \code{\link{LTRpred}}
#' @return
#' A \code{\link{data.frame}} object storing the \code{seq.id}, \code{orfs} (number of predicted ORFs), \code{start}, and \code{end} of the predicted LTRs.
#' @export
read.orfs <- function(input.file){
if (!file.exists(input.file))
stop("The file '", input.file, "' does not seem to exist. Please provide a valid file path to input.file for read.orfs() ...", call. = FALSE)
seq.id <- orfs <- NULL
ReadSeqFile <- Biostrings::readDNAStringSet(input.file)
if (length(ReadSeqFile) == 0){
message("The ORF prediction file was empty ... Therefore, no ORFs are added to the result table.")
return(dplyr::tibble(seq.id = NULL,
orfs = NULL))
}
SeqFile.table <- table(sapply(ReadSeqFile@ranges@NAMES,
function(x)
unlist(stringr::str_split(x, "[|]"))[1]))
ORFCount.df <- dplyr::tibble(seq.id = names(SeqFile.table),
orfs = as.numeric(SeqFile.table))
# GenomicLocus <- as.data.frame(do.call(rbind, sapply(ORFCount.df$seq.id, function(x){
# as.numeric(unlist(stringr::str_split(unlist(stringr::str_split(x,"__"))[2],"_")))
# })), row.names = FALSE)
# names(GenomicLocus) <- c("start","end")
# remove.NA <- which(is.na(GenomicLocus$start) | is.na(GenomicLocus$end))
# GenomicLocus <- GenomicLocus[-remove.NA, ]
# ORFCount.df <- ORFCount.df[-remove.NA, ]
# ORFCount.df <- dplyr::mutate(ORFCount.df,
# start = unlist(GenomicLocus$start),
# end = unlist(GenomicLocus$end))
#
# ORFCount.df <- dplyr::mutate(ORFCount.df, chromosome = paste0(unlist(stringr::str_split(seq.id, "__"))[1],"_"))
#ORFCount.df <- dplyr::select(ORFCount.df,chromosome,seq.id,start,end,orfs)
ORFCount.df <- dplyr::select(ORFCount.df,seq.id,orfs)
return(ORFCount.df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.