#' @title Format LTR prediction data to GFF3 file format
#' @description This function formats the LTR prediction \code{\link{data.frame}}
#' generated by \code{\link{LTRharvest}}, \code{\link{LTRdigest}}, or \code{\link{LTRpred}}
#' to a \code{\link{data.frame}} in \code{GFF3} file format.
#' @param LTR.data the LTR prediction \code{\link{data.frame}}
#' generated by \code{\link{LTRharvest}}, \code{\link{LTRdigest}}, or \code{\link{LTRpred}}.
#' @param output filename of the output GFF file.
#' @param program program used to generate the prediction table, e.g. \code{program = "LTRpred"},
#' \code{program = "LTRdigest"}, or \code{program = "LTRharvest"}.
#' @author Hajk-Georg Drost
#' @details
#' The GFF3 file format is defined by: chromosome; start; end; name; columns.
#' @examples
#' gff.file <- system.file("TAIR10_chr_all_LTRdigestPrediction.gff",
#' package = "LTRpred")
#' tabout.file <- system.file("TAIR10_chr_all-ltrdigest_tabout.csv"
#' ,package = "LTRpred")
#' LTRfile <- read.prediction(gff.file,tabout.file, program = "LTRdigest")
#'
#' # generate GFF file
#' pred2gff(LTRfile$ltr.retrotransposon, output = "test.gff")
#'
#' @references http://www.ensembl.org/info/website/upload/gff.html
#' @export
pred2gff <- function(LTR.data,
output = "output.gff",
program = "LTRpred"){
if (!is.element(program, c("LTRpred","LTRdigest","LTRharvest")))
stop ("Please select a program that is supported by this function: 'LTRpred', 'LTRdigest', or 'LTRharvest'.")
ID <- NULL
if (program == "LTRharvest"){
chromosome <- ltr_similarity <- attribute <- NULL
res <- dplyr::data_frame(seqname = LTR.data$chromosome,
source = LTR.data$pred_tool,
feature = LTR.data$annotation,
start = LTR.data$start,
end = LTR.data$end,
score = LTR.data$score,
strand = unlist(LTR.data$strand),
frame = LTR.data$frame,
attribute = unlist(
dplyr::select(
dplyr::mutate(LTR.data,
attribute = paste0("ID=",ID,"; ",
"ltr.sim=",ltr_similarity)),attribute)))
}
if (program == "LTRdigest"){
chromosome <- ltr_similarity <- element_length <- attribute <- NULL
lLTR_start <- lLTR_end <- lLTR_length <- rLTR_start <- rLTR_end <- rLTR_length <- NULL
lTSD_start <- lTSD_end <- lTSD_motif <- rTSD_start <- rTSD_end <- rTSD_motif <- NULL
PPT_start <- PPT_end <- PPT_motif <- PPT_strand <- PPT_offset <- NULL
PBS_start <- PBS_end <- PBS_strand <- PBS_offset <- NULL
trna <- trna_offset <- trna_motif <- `PBS/tRNA_edist` <- protein_domain <- NULL
res <- dplyr::data_frame(seqname = LTR.data$chromosome,
source = LTR.data$pred_tool,
feature = LTR.data$annotation,
start = LTR.data$start,
end = LTR.data$end,
score = LTR.data$score,
strand = unlist(LTR.data$strand),
frame = LTR.data$frame,
attribute = unlist(
dplyr::select(
dplyr::mutate(LTR.data,
attribute = paste0("ID=",ID,"; ",
"ltr.sim=",ltr_similarity,
"; ","element_length=",element_length,";",
"lLTRstart=",lLTR_start,"; ",
"lLTRend=",lLTR_end,"; ",
"lLTRlength=",lLTR_length,"; ",
"rLTRstart=",rLTR_start,"; ",
"rLTRend=",rLTR_end,"; ",
"rLTRlength=",rLTR_length,"; ",
"lTSDstart=",lTSD_start,"; ",
"lTSDend=",lTSD_end,"; ",
"lTSDmotif=",lTSD_motif,"; ",
"rTSDstart=",rTSD_start,"; ",
"rTSDend=",rTSD_end,"; ",
"rTSDmotif=",rTSD_motif,"; ",
"PPTstart=",PPT_start,"; ",
"PPTend=",PPT_end,"; ",
"PPTmotif=",PPT_motif,"; ",
"PPTstrand=",PPT_strand,"; ",
"PPToffset=",PPT_offset,"; ",
"PBSstart=",PBS_start,"; ",
"PBSend=",PBS_end,"; ",
"PBSstrand=",PBS_strand,"; ",
"PBSoffset=",PBS_offset,"; ",
"trna=",trna,"; ",
"tRNAmotif=",trna_motif,"; ",
"tRNAoffset=",trna_offset,"; ",
"PBS/tRNAedist=",`PBS/tRNA_edist`,"; ",
"ProteinDomain=",protein_domain)),attribute)))
}
if (program == "LTRpred"){
chromosome <- ltr_similarity <- element_length <- attribute <- NULL
lLTR_start <- lLTR_end <- lLTR_length <- rLTR_start <- rLTR_end <- rLTR_length <- NULL
lTSD_start <- lTSD_end <- lTSD_motif <- rTSD_start <- rTSD_end <- rTSD_motif <- NULL
PPT_start <- PPT_end <- PPT_length <- PPT_motif <- PPT_strand <- PPT_offset <- NULL
PBS_start <- PBS_end <- PBS_length <- PBS_strand <- PBS_offset <- NULL
trna <- trna_offset <- trna_motif <- `PBS/tRNA_edist` <- protein_domain <- ltr_age_mya <- NULL
`seq.id` <- orfs <- repeat_region_length <- protein_domain_start <- protein_domain_end <- protein_domain_match_width <- protein_domain_reading_frame <- NULL
res <- dplyr::data_frame(seqname = LTR.data$chromosome,
source = LTR.data$pred_tool,
feature = LTR.data$annotation,
start = LTR.data$start,
end = LTR.data$end,
score = LTR.data$score,
strand = unlist(LTR.data$strand),
frame = LTR.data$frame,
attribute = unlist(
dplyr::select(
dplyr::mutate(LTR.data,
attribute = paste0("ID=",ID,"; ",
"ltr.sim=",ltr_similarity,
"ltr.evol.age=",ltr_age_mya,
"; ","element_length=",element_length,"; ",
"lLTRstart=",lLTR_start,"; ",
"lLTRend=",lLTR_end,"; ",
"lLTRlength=",lLTR_length,"; ",
"rLTRstart=",rLTR_start,"; ",
"rLTRend=",rLTR_end,"; ",
"rLTRlength=",rLTR_length,"; ",
"lTSDstart=",lTSD_start,"; ",
"lTSDend=",lTSD_end,"; ",
"lTSDmotif=",lTSD_motif,"; ",
"rTSDstart=",rTSD_start,"; ",
"rTSDend=",rTSD_end,"; ",
"rTSDmotif=",rTSD_motif,"; ",
"PPTstart=",PPT_start,"; ",
"PPTend=",PPT_end,"; ",
"PPTmotif=",PPT_motif,"; ",
"PPTstrand=",PPT_strand,"; ",
"PPToffset=",PPT_offset,"; ",
"PPTlength=",PPT_length,"; ",
"PBSstart=",PBS_start,"; ",
"PBSend=",PBS_end,"; ",
"PBSstrand=",PBS_strand,"; ",
"PBSoffset=",PBS_offset,"; ",
"PBSlength",PBS_length,"; ",
"trna=",trna,"; ",
"tRNAmotif=",trna_motif,"; ",
"tRNAoffset=",trna_offset,"; ",
"PBS/tRNAedist=",`PBS/tRNA_edist`,"; ",
"ProteinDomain=",protein_domain,"; ",
"orfs=",orfs,"; ",
"repeat_region_length=",repeat_region_length)),attribute)))
}
utils::write.table(res,output, sep = "\t", quote = FALSE, col.names = FALSE, row.names = FALSE)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.