R/gl.filter.rdepth.r

Defines functions gl.filter.rdepth

Documented in gl.filter.rdepth

#' @name gl.filter.rdepth
#' @title Filters loci based on counts of sequence tags scored at a locus (read
#'  depth)
#'  @family matched filter

#' @description
#' SNP datasets generated by DArT report AvgCountRef and AvgCountSnp as counts
#' of sequence tags for the reference and alternate alleles respectively. These
#' can be used to back calculate Read Depth. Fragment presence/absence datasets
#' as provided by DArT (SilicoDArT) provide Average Read Depth and Standard
#'  Deviation of Read Depth as standard columns in their report.

#' Filtering on Read Depth using the companion script gl.filter.rdepth can be on
#'  the basis of loci with exceptionally low counts,
#' or loci with exceptionally high counts.

#' @param x Name of the genlight object containing the SNP or tag
#' presence/absence data [required].
#' @param lower Lower threshold value below which loci will be removed
#'  [default 5].
#' @param upper Upper threshold value above which loci will be removed
#'  [default infinite=1000].
#' @param plot.display If TRUE, histograms of base composition are displayed in the plot window
#' [default TRUE].
#' @param plot.theme Theme for the plot. See Details for options
#' [default theme_dartR()].
#' @param plot.colors List of two color names for the borders and fill of the
#'  plots [default c("#2171B5", "#6BAED6")].
#' @param plot.dir Directory in which to save files [default = working directory]
#' @param plot.file Name for the RDS binary file to save (base name only, exclude extension) [default NULL]
#' @param verbose Verbosity: 0, silent or fatal errors; 1, begin and end; 2,
#'  progress log; 3, progress and results summary; 5, full report
#'   [default 2, unless specified using gl.set.verbosity].

#' @details
#'  For examples of themes, see:
#'  \itemize{
#'  \item \url{https://ggplot2.tidyverse.org/reference/ggtheme.html} and \item
#'  \url{https://yutannihilation.github.io/allYourFigureAreBelongToUs/ggthemes/}
#'  }
#'  
#' @author Custodian: Arthur Georges (Post to
#' \url{https://groups.google.com/d/forum/dartr})
#' 
#' @examples
#'  \donttest{
#' # SNP data
#'   gl.report.rdepth(testset.gl)
#'   result <- gl.filter.rdepth(testset.gl, lower=8, upper=50, verbose=3)
#' # Tag P/A data
#'   result <- gl.filter.rdepth(testset.gs, lower=8, upper=50, verbose=3)
#'   }
#'   res <- gl.filter.rdepth(platypus.gl)

#' @seealso \code{\link{gl.filter.rdepth}}

#' @import patchwork
#' @export
#' @return Returns a genlight object retaining loci with a Read Depth in the
#' range specified by the lower and upper threshold.

gl.filter.rdepth <-  function(x,
                              lower = 5,
                              upper = 1000,
                              plot.display=TRUE,
                              plot.theme = theme_dartR(),
                              plot.colors = NULL,
                              plot.file=NULL,
                              plot.dir=NULL,
                              verbose = NULL) {
    # SET VERBOSITY
    verbose <- gl.check.verbosity(verbose)
    
    # SET WORKING DIRECTORY
    plot.dir <- gl.check.wd(plot.dir,verbose=0)
	
	# SET COLOURS
    if(is.null(plot.colors)){
      plot.colors <- gl.select.colors(library="brewer",palette="Blues",select=c(7,5), verbose=0)
    }
    
    # FLAG SCRIPT START
    funname <- match.call()[[1]]
    utils.flag.start(func = funname,
                     build = "Jody",
                     verbose = verbose)
    
    # CHECK DATATYPE
    datatype <- utils.check.datatype(x, verbose = verbose)
    
    # FUNCTION SPECIFIC ERROR CHECKING
    
    if (datatype == "SilicoDArT") {
      if (!is.null(x@other$loc.metrics$AvgReadDepth)) {
        rdepth <- x@other$loc.metrics$AvgReadDepth
      } else {
        stop(error(
          "Fatal Error: Read depth not included among the locus metrics"
        ))
      }
    } else if (datatype == "SNP") {
      if (!is.null(x@other$loc.metrics$rdepth)) {
        rdepth <- x@other$loc.metrics$rdepth
      } else {
        stop(error(
          "Fatal Error: Read depth not included among the locus metrics"
        ))
      }
    }
    
    # DO THE JOB
    
    n0 <- nLoc(x)
    
    # Remove SNP loci with rdepth < threshold
    
    if (verbose >= 2) {
        cat(report(
            "  Removing loci with rdepth <=",
            lower,
            "and >=",
            upper,
            "\n"
        ))
    }
    
    index <- (rdepth >= lower & rdepth <= upper)
    
      x2 <- x[, index]
      # Remove the corresponding records from the loci metadata
      x2@other$loc.metrics <- x@other$loc.metrics[index,]
    
    
    # PLOT HISTOGRAMS, BEFORE AFTER
    if (plot.display) {
        plotvar <- rdepth
        max <- max(plotvar, upper, na.rm = TRUE)
        max <- ceiling(max / 10) * 10
        if (datatype == "SNP") {
            xlabel <- "Pre-filter SNP read depth"
        } else {
            xlabel <- "Pre-filter P/A read depth"
        }
        p1 <-
            ggplot(data.frame(plotvar), aes(x = plotvar)) + 
            geom_histogram(bins = 100,
                           color = plot.colors[1],
                           fill = plot.colors[2]) + 
            coord_cartesian(xlim = c(0, max)) + 
            geom_vline(xintercept = lower, color = "red", size = 1) +
            geom_vline(xintercept = upper, color = "red", size = 1) + 
            xlab(xlabel) +
            ylab("Count") + 
            plot.theme
        
        if (datatype == "SilicoDArT") {
            rdepth <- x2@other$loc.metrics$AvgReadDepth
        } else if (datatype == "SNP") {
            rdepth <- x2@other$loc.metrics$rdepth
        }
        plotvar <- rdepth
        # min <- min(plotvar,lower) min <- trunc(min*100)/100
        max <- max(plotvar, upper, na.rm = TRUE)
        max <- ceiling(max / 10) * 10
        if (datatype == "SNP") {
            xlabel <- "Post-filter SNP read depth"
        } else {
            xlabel <- "Post-filter P/A read depth"
        }
        p2 <-
            ggplot(data.frame(plotvar), aes(x = plotvar)) +
            geom_histogram(bins = 100,
                           color = plot.colors[1],
                           fill = plot.colors[2]) +
            coord_cartesian(xlim = c(0, max)) +
            geom_vline(xintercept = lower, color = "red",size = 1) + 
            geom_vline(xintercept = upper,color = "red",size = 1) + 
            xlab(xlabel) +
            ylab("Count") +
            plot.theme
        
        p3 <- (p1 / p2) + plot_layout(heights = c(1, 1))
        
        print(p3)
    }
      
      # Optionally save the plot ---------------------
      
      if(!is.null(plot.file)){
        tmp <- utils.plot.save(p3,
                               dir=plot.dir,
                               file=plot.file,
                               verbose=verbose)
      }
      
    # REPORT A SUMMARY
    if (verbose >= 3) {
        cat("  Summary of filtered dataset\n")
        cat("    Initial no. of loci =", n0, "\n")
        # cat(paste(' read depth >=',lower,'and read depth <=',upper,'\n'))
        cat("    No. of loci deleted =", (n0 - nLoc(x2)), "\n")
        cat(paste("    No. of loci retained:", nLoc(x2), "\n"))
        cat(paste("    No. of individuals:", nInd(x2), "\n"))
        cat(paste("    No. of populations: ", length(levels(
            factor(pop(x2))
        )), "\n"))
    }
    
    # ADD TO HISTORY
    nh <- length(x2@other$history)
    x2@other$history[[nh + 1]] <- match.call()
    
    # FLAG SCRIPT END
    if (verbose > 0) {
        cat(report("Completed:", funname, "\n"))
    }
    
    return(x2)
}

Try the dartR.base package in your browser

Any scripts or data that you put into this service are public.

dartR.base documentation built on April 4, 2025, 2:45 a.m.