R/plotLTRSim.R

Defines functions plotLTRSim

Documented in plotLTRSim

#' @title Plot the age distribution of predicted LTR transposons
#' @description 
#' This function visualizes the age distribution of
#' predicted LTR transposons generated with \code{\link[LTRpred]{LTRpred}}.
#' 
#' The age pf LTR transposons is defined by the sequence similarity between it's
#' 3' and 5' LTR. Evolutionary young (recent) LTR transposons tend to have very similar
#' LTRs (up to 100\% sequence similarity), whereas evolutionary older LTR transposons
#' tend to have less similar LTRs.
#' @param data the \code{\link{data.frame}} generated by \code{\link[LTRpred]{LTRpred}}.
#' @param type type of histogram. Either normal histogram (\code{type = "hist"}) or stacked histogram (\code{type = "stack"}, see also \code{stack.fill}).
#' If \code{type = "stack"} is specified then 
#' @param stack.fill a character string specifying the variable by which the bar plot shall be stacked.
#' @param similarity.bin resolution of similarity binning. E.g. binning 98\%-100\% into 0.5\% intervals would be \code{similarity.bin = 0.5}.
#' Default is \code{similarity.bin = 0.5}.
#' @param min.sim minimum similarity between LTRs that can shall be considered for visualization. 
#' All elements not fulfilling this similarity threshold are filtered out. Default is \code{min.sim = 2}.
#' @param quality.filter shall false positives be filtered out as much as possible or not. See Description for details.
#' @param n.orfs minimum number of ORFs detected in the putative LTR transposon.
#' @param xlab x-axis label.
#' @param ylab y-axis label.
#' @param main main text.
#' @param legend.title legend text.
#' @author Hajk-Georg Drost
#' @details This way of visualizing the age distribution of LTR transposons
#' allows users to examine the rate of recent transposition events in extant organisms. 
#' 
#' LTR similarity values are binned in intervals (per default 2.5\% intervals) which can be modified
#' by the \code{similarity.bin} argument.
#' @examples 
#' \dontrun{
#' # run LTRpred for A. thaliana
#' Ath.Pred <- LTRpred(genome.file = "TAIR10_chr_all.fas")
#' # visualize the age distribution of predicted  A. thaliana LTR transposons
#' PlotLTRAge(Ath.Pred)
#' }
#' @seealso \code{\link[LTRpred]{LTRpred}}, \code{\link[LTRpred]{LTRharvest}}, \code{\link[LTRpred]{LTRdigest}}
#' @export

plotLTRSim <- function(data,
                       type           = "hist",
                       stack.fill     = "protein_domain",
                       similarity.bin = 2, 
                       min.sim        = 70,
                       quality.filter = TRUE,
                       n.orfs         = 1,
                       xlab           = "LTR % Similarity",
                       ylab           = "Frequency",
                       main           = "LTR Age Distribution",
                       legend.title   = "LTR Similarity"
                       ){
    
    if (!is.element(type, c("hist", "stack")))
        stop("Please choose a type that is supported by this function: type = 'hist' or type = 'stack'.", call. = FALSE)

    similarity <- ltr_similarity <- NULL
    
    if (quality.filter)
        data <- LTRpred::quality.filter(data, sim = min.sim, n.orfs = n.orfs)
    if (!quality.filter) {
        cat("\n")
        cat("No quality filter has been applied.")
    }
        

    if (is.null(similarity.bin) & is.null(min.sim)) {
        
        res <- ggplot2::ggplot(data, ggplot2::aes(x = similarity, fill = similarity), order = FALSE)
        if (type == "stack")
            res <- res + ggplot2::geom_bar(ggplot2::aes_string(fill = stack.fill), stat = "count")
        if (type == "hist")
            res <- res +  ggplot2::geom_bar(stat = "count")
         
            res <- res + 
            ggplot2::labs(x = xlab, y = ylab, title = main) +
            ggplot2::scale_fill_discrete(name = legend.title) +
            ggplot2::theme_minimal() + 
            ggplot2::theme(
                title            = ggplot2::element_text(size = 18, face = "bold"),
                legend.title     = ggplot2::element_text(size = 18, face = "bold"),
                legend.text      = ggplot2::element_text(size = 18, face = "bold"),
                axis.title       = ggplot2::element_text(size = 18, face = "bold"),
                axis.text.y      = ggplot2::element_text(size = 18, face = "bold"),
                axis.text.x      = ggplot2::element_text(size = 18, face = "bold"),
                panel.background = ggplot2::element_blank(),
                strip.text.x     = ggplot2::element_text(
                    size           = 18,
                    colour         = "black",
                    face           = "bold"
                ),
                panel.grid.major = ggplot2::element_line(color = "gray50", size = 0.5),
                panel.grid.major.x = ggplot2::element_blank()
            ) +
            ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 1,hjust = 1))
        
    }
    
    if (!is.null(similarity.bin) & !is.null (min.sim)){
        
        data <- dplyr::filter(data, ltr_similarity >= min.sim)
        data <- dplyr::mutate(data, 
                                                    similarity = cut(ltr_similarity,
                                                                     rev(seq(100,min.sim,-similarity.bin)),
                                                                     include.lowest = TRUE,
                                                                     right          = TRUE))
        
    
        res <- ggplot2::ggplot(data, ggplot2::aes(x = similarity, fill = similarity), order = FALSE)
        if (type == "stack")
            res <- res + ggplot2::geom_bar(ggplot2::aes_string(fill = stack.fill), stat = "count")
        if (type == "hist")
            res <- res +  ggplot2::geom_bar(stat = "count")
        
        res <- res + ggplot2::labs(x = xlab, y = ylab, title = main) +
            ggplot2::scale_fill_discrete(name = legend.title) +
            ggplot2::theme_minimal() + 
            ggplot2::theme(
                title            = ggplot2::element_text(size = 18, face = "bold"),
                legend.title     = ggplot2::element_text(size = 18, face = "bold"),
                legend.text      = ggplot2::element_text(size = 18, face = "bold"),
                axis.title       = ggplot2::element_text(size = 18, face = "bold"),
                axis.text.y      = ggplot2::element_text(size = 18, face = "bold"),
                axis.text.x      = ggplot2::element_text(size = 18, face = "bold"),
                panel.background = ggplot2::element_blank(),
                strip.text.x     = ggplot2::element_text(
                    size           = 18,
                    colour         = "black",
                    face           = "bold"
                ),
                panel.grid.major = ggplot2::element_line(color = "gray50", size = 0.5),
                panel.grid.major.x = ggplot2::element_blank()
            ) +
            ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 1,hjust = 1))
    }
    
    return(res)
}
HajkD/LTRpred documentation built on April 22, 2022, 4:35 p.m.