R/filter.jumpers.R

Defines functions filter.jumpers

Documented in filter.jumpers

#' @title Detect LTR retrotransposons that are potential jumpers
#' @description This function applies specific filter criteria to
#' screen for LTR retrotransposons predicted by \code{\link{LTRpred}}
#' that are potentially able to transpose due to their sequence features.
#' @param LTRpred.tbl a \code{\link{data.frame}} generated by \code{\link{LTRpred}}.
#' @param ltr.similarity LTR similarity threshold. Default is \code{ltr_similarity = 95}.
#' @param strategy filter strategy: either \code{conservative}, \code{liberal}, or \code{between}.
#' @author Hajk-Georg Drost
#' @details 
#' This ...
#' 
#' \strong{Filter strategy}
#' \itemize{
#' \item \code{conservative} :
#' \item \code{liberal} :
#' \item \code{between} :
#' }
#' 
#' @examples 
#' \dontrun{
#' # generate de novo LTR transposon prediction
#' pred <- LTRpred(genome.file = "TAIR10_chr_all.fas",
#'                 trnas       = "plantRNA_Arabidopsis.fsa",
#'                 hmms        = "hmm_*")
#'                 
#' # detect potential jumpers               
#' filter.jumpers(pred)
#' }
#' @seealso \code{\link{LTRpred}}
#' @export

filter.jumpers <- function(LTRpred.tbl, ltr.similarity = 95, strategy = "conservative"){
  
  if (!is.element(strategy, c("conservative", "liberal", "between")))
    stop ("Please choose a filter strategy implemented in this function.")
  
    ID <-
        chromosome <-
        strand <-
        repeat_region_length <-
        ltr_similarity <- lTSD_motif <- rTSD_motif <- PPT_motif <- NULL
    width <-
        `orf.id` <-
        lLTR_start <-
        PBS_length <-
        similarity <-
        PPT_motif <- tRNA_motif <- orfs <- protein_domain <- NULL
    
    if (strategy == "conservative") {
        res <-
            dplyr::filter(
                LTRpred.tbl,
                (ltr_similarity >= ltr.similarity) &
                    (!is.na(lTSD_motif)) &
                    (!is.na(rTSD_motif)) &
                    (!is.na(PPT_motif)) &
                    (orfs >= 1) &
                    (!is.na(protein_domain)) &
                    !is.na(tRNA_motif)
            )
        
    }
    
    if (strategy == "between") {
        res <-
            dplyr::filter(
                LTRpred.tbl,
                (ltr_similarity >= ltr.similarity) &
                    ((!is.na(lTSD_motif)) &
                         (!is.na(rTSD_motif))) |
                    (!is.na(PPT_motif)) | (orfs >= 1) |
                    (!is.na(protein_domain)) |
                    !is.na(tRNA_motif)
            )
        
    }
    
    if (strategy == "liberal") {
        res <-
            dplyr::filter(
                LTRpred.tbl,
                (ltr_similarity >= ltr.similarity) &
                    ((!is.na(lTSD_motif)) &
                         (!is.na(rTSD_motif))) | (orfs >= 1) |
                    (!is.na(protein_domain)) |
                    !is.na(tRNA_motif)
            )
        
    }
    
    # res <- dplyr::select(res,ID, chromosome, start, end, strand, width,
    #                      ltr_similarity, similarity,orfs, lLTR_start:`orf.id`,repeat_region_length:PBS_length)
    return (res[order(unlist(res[, "ltr_similarity"]), decreasing = TRUE),])

  #return ( res )
}
HajkD/LTRpred documentation built on April 22, 2022, 4:35 p.m.