R/pred2gff.R

Defines functions pred2gff

Documented in pred2gff

#' @title Format LTR prediction data to GFF3 file format
#' @description This function formats the LTR prediction \code{\link{data.frame}}
#' generated by \code{\link{LTRharvest}}, \code{\link{LTRdigest}}, or \code{\link{LTRpred}}
#' to a \code{\link{data.frame}} in \code{GFF3} file format.
#' @param LTR.data the LTR prediction \code{\link{data.frame}}
#' generated by \code{\link{LTRharvest}}, \code{\link{LTRdigest}}, or \code{\link{LTRpred}}.
#' @param output filename of the output GFF file.
#' @param program program used to generate the prediction table, e.g. \code{program = "LTRpred"},
#' \code{program = "LTRdigest"}, or \code{program = "LTRharvest"}.
#' @author Hajk-Georg Drost
#' @details 
#' The GFF3 file format is defined by: chromosome; start; end; name; columns.
#' @examples 
#' gff.file <- system.file("TAIR10_chr_all_LTRdigestPrediction.gff",
#'                         package = "LTRpred")
#' tabout.file <- system.file("TAIR10_chr_all-ltrdigest_tabout.csv"
#'                            ,package = "LTRpred")
#' LTRfile <- read.prediction(gff.file,tabout.file, program = "LTRdigest")
#' 
#' # generate GFF file
#' pred2gff(LTRfile$ltr.retrotransposon, output = "test.gff")
#' 
#' @references http://www.ensembl.org/info/website/upload/gff.html
#' @export
pred2gff <- function(LTR.data, 
                     output  = "output.gff", 
                     program = "LTRpred"){
  
  if (!is.element(program, c("LTRpred","LTRdigest","LTRharvest")))
    stop ("Please select a program that is supported by this function: 'LTRpred', 'LTRdigest', or 'LTRharvest'.")
  
  ID <- NULL
  
  if (program == "LTRharvest"){
    chromosome <- ltr_similarity <- attribute <- NULL
    
    res <- dplyr::data_frame(seqname   = LTR.data$chromosome, 
                             source    = LTR.data$pred_tool, 
                             feature   = LTR.data$annotation,
                             start     = LTR.data$start,
                             end       = LTR.data$end,
                             score     = LTR.data$score,
                             strand    = unlist(LTR.data$strand),
                             frame     = LTR.data$frame, 
                             attribute = unlist(
                               dplyr::select(
                                 dplyr::mutate(LTR.data,
                                               attribute = paste0("ID=",ID,"; ",
                                                                  "ltr.sim=",ltr_similarity)),attribute)))
  }
  
  if (program == "LTRdigest"){
    
    chromosome <- ltr_similarity <- element_length <- attribute <- NULL
    lLTR_start <- lLTR_end <- lLTR_length <- rLTR_start <- rLTR_end <- rLTR_length <- NULL
    lTSD_start <- lTSD_end <- lTSD_motif <- rTSD_start <- rTSD_end <- rTSD_motif <- NULL
    PPT_start <- PPT_end <- PPT_motif <- PPT_strand <- PPT_offset <- NULL
    PBS_start <- PBS_end <- PBS_strand <- PBS_offset <- NULL
    trna <- trna_offset <- trna_motif <- `PBS/tRNA_edist` <- protein_domain <- NULL
    
    
    res <- dplyr::data_frame(seqname   = LTR.data$chromosome, 
                             source    = LTR.data$pred_tool, 
                             feature   = LTR.data$annotation,
                             start     = LTR.data$start,
                             end       = LTR.data$end,
                             score     = LTR.data$score,
                             strand    = unlist(LTR.data$strand),
                             frame     = LTR.data$frame, 
                             attribute = unlist(
                               dplyr::select(
                                 dplyr::mutate(LTR.data,
                                               attribute = paste0("ID=",ID,"; ",
                                                                  "ltr.sim=",ltr_similarity,
                                                                  "; ","element_length=",element_length,";",
                                                                  "lLTRstart=",lLTR_start,"; ",
                                                                  "lLTRend=",lLTR_end,"; ",
                                                                  "lLTRlength=",lLTR_length,"; ",
                                                                  "rLTRstart=",rLTR_start,"; ",
                                                                  "rLTRend=",rLTR_end,"; ",
                                                                  "rLTRlength=",rLTR_length,"; ",
                                                                  "lTSDstart=",lTSD_start,"; ",
                                                                  "lTSDend=",lTSD_end,"; ",
                                                                  "lTSDmotif=",lTSD_motif,"; ",
                                                                  "rTSDstart=",rTSD_start,"; ",
                                                                  "rTSDend=",rTSD_end,"; ",
                                                                  "rTSDmotif=",rTSD_motif,"; ",
                                                                  "PPTstart=",PPT_start,"; ",
                                                                  "PPTend=",PPT_end,"; ",
                                                                  "PPTmotif=",PPT_motif,"; ",
                                                                  "PPTstrand=",PPT_strand,"; ",
                                                                  "PPToffset=",PPT_offset,"; ",
                                                                  "PBSstart=",PBS_start,"; ",
                                                                  "PBSend=",PBS_end,"; ",
                                                                  "PBSstrand=",PBS_strand,"; ",
                                                                  "PBSoffset=",PBS_offset,"; ",
                                                                  "trna=",trna,"; ",
                                                                  "tRNAmotif=",trna_motif,"; ",
                                                                  "tRNAoffset=",trna_offset,"; ",
                                                                  "PBS/tRNAedist=",`PBS/tRNA_edist`,"; ",
                                                                  "ProteinDomain=",protein_domain)),attribute)))
  }
  
  if (program == "LTRpred"){
    chromosome <- ltr_similarity <- element_length <- attribute <- NULL
    lLTR_start <- lLTR_end <- lLTR_length <- rLTR_start <- rLTR_end <- rLTR_length <- NULL
    lTSD_start <- lTSD_end <- lTSD_motif <- rTSD_start <- rTSD_end <- rTSD_motif <- NULL
    PPT_start <- PPT_end <- PPT_length <- PPT_motif <- PPT_strand <- PPT_offset <- NULL
    PBS_start <- PBS_end <- PBS_length <-  PBS_strand <- PBS_offset <- NULL
    trna <- trna_offset <- trna_motif <- `PBS/tRNA_edist` <- protein_domain <- ltr_age_mya <- NULL
    `seq.id` <- orfs <- repeat_region_length <- protein_domain_start <- protein_domain_end <- protein_domain_match_width <- protein_domain_reading_frame <-  NULL
    
    res <- dplyr::data_frame(seqname   = LTR.data$chromosome, 
                             source    = LTR.data$pred_tool, 
                             feature   = LTR.data$annotation,
                             start     = LTR.data$start,
                             end       = LTR.data$end,
                             score     = LTR.data$score,
                             strand    = unlist(LTR.data$strand),
                             frame     = LTR.data$frame, 
                             attribute = unlist(
                               dplyr::select(
                                 dplyr::mutate(LTR.data,
                                               attribute = paste0("ID=",ID,"; ",
                                                                  "ltr.sim=",ltr_similarity,
                                                                  "ltr.evol.age=",ltr_age_mya,
                                                                  
                                                                  "; ","element_length=",element_length,"; ",
                                                                  "lLTRstart=",lLTR_start,"; ",
                                                                  "lLTRend=",lLTR_end,"; ",
                                                                  "lLTRlength=",lLTR_length,"; ",
                                                                  "rLTRstart=",rLTR_start,"; ",
                                                                  "rLTRend=",rLTR_end,"; ",
                                                                  "rLTRlength=",rLTR_length,"; ",
                                                                  "lTSDstart=",lTSD_start,"; ",
                                                                  "lTSDend=",lTSD_end,"; ",
                                                                  "lTSDmotif=",lTSD_motif,"; ",
                                                                  "rTSDstart=",rTSD_start,"; ",
                                                                  "rTSDend=",rTSD_end,"; ",
                                                                  "rTSDmotif=",rTSD_motif,"; ",
                                                                  "PPTstart=",PPT_start,"; ",
                                                                  "PPTend=",PPT_end,"; ",
                                                                  "PPTmotif=",PPT_motif,"; ",
                                                                  "PPTstrand=",PPT_strand,"; ",
                                                                  "PPToffset=",PPT_offset,"; ",
                                                                  "PPTlength=",PPT_length,"; ",
                                                                  "PBSstart=",PBS_start,"; ",
                                                                  "PBSend=",PBS_end,"; ",
                                                                  "PBSstrand=",PBS_strand,"; ",
                                                                  "PBSoffset=",PBS_offset,"; ",
                                                                  "PBSlength",PBS_length,"; ",
                                                                  "trna=",trna,"; ",
                                                                  "tRNAmotif=",trna_motif,"; ",
                                                                  "tRNAoffset=",trna_offset,"; ",
                                                                  "PBS/tRNAedist=",`PBS/tRNA_edist`,"; ",
                                                                  "ProteinDomain=",protein_domain,"; ",
                                                                  "orfs=",orfs,"; ",
                                                                  "repeat_region_length=",repeat_region_length)),attribute)))
  }
  
  utils::write.table(res,output, sep = "\t", quote = FALSE, col.names = FALSE, row.names = FALSE)
}
HajkD/LTRpred documentation built on April 22, 2022, 4:35 p.m.