R/mmextract.R

Defines functions mmextract

Documented in mmextract

#' @title Extract all scaffolds within a selection polygon
#'
#' @description Finds all scaffolds that fall into a \code{selection} polygon highlighted in a plot generated by \code{\link{mmplot}}.
#'
#' @param mm (\emph{required}) A dataframe loaded with \code{\link{mmload}}.
#' @param selection (\emph{required}) A 2-column dataframe with the x and y coordinates of a selection of points in an \code{\link{mmplot}}. The column names of the provided dataframe must match column names in \code{mm}.
#' @param min_length Filter all scaffolds with a length at or below this value before the extraction. (\emph{Default: } \code{0})
#' @param inverse (\emph{Logical}) If \code{TRUE}, then the scaffolds within the \code{selection} are instead removed. (\emph{Default: } \code{FALSE})
#'
#' @export
#'
#' @return A dataframe (tibble) compatible with other mmgenome2 functions.
#'
#' @importFrom sp point.in.polygon
#' @importFrom dplyr filter
#' @importFrom tibble as_tibble
#'
#' @examples
#' library(mmgenome2)
#' data(mmgenome2)
#' mmgenome2
#' selection <- data.frame(
#'   cov_C13.11.25 = c(7.2, 16.2, 25.2, 23.3, 10.1),
#'   cov_C14.01.09 = c(47, 77, 52.8, 29.5, 22.1)
#' )
#' mmgenome2_extraction <- mmextract(mmgenome2,
#'   min_length = 3000,
#'   selection = selection,
#'   inverse = FALSE
#' )
#' mmgenome2_extraction
#' @author Kasper Skytte Andersen \email{ksa@@bio.aau.dk}
#' @author Soren M. Karst \email{smk@@bio.aau.dk}
#' @author Mads Albertsen \email{MadsAlbertsen85@@gmail.com}
mmextract <- function(mm,
                      selection,
                      min_length = 0,
                      inverse = FALSE) {
  if (!any(colnames(selection) %in% colnames(mm))) {
    stop("Could not find any variable names in mm matching those in the selection.", call. = FALSE)
  }

  # filter based on minimum length
  mms <- dplyr::filter(mm, length >= min_length)

  # return scaffolds only in the selection polygon
  in_polygon <- sp::point.in.polygon(
    point.x = mms[[colnames(selection)[1]]],
    point.y = mms[[colnames(selection)[2]]],
    pol.x = selection[[1]],
    pol.y = selection[[2]],
    mode.checked = FALSE
  )
  ifelse(isTRUE(inverse),
    mms <- dplyr::filter(mmss, in_polygon == 0),
    mms <- dplyr::filter(mms, in_polygon > 0)
  )
  nrowBefore <- nrow(mm)
  nrowAfter <- nrow(mms)
  message(paste0(
    nrowAfter,
    " scaffolds (or ",
    round(sum(mms$length) / sum(mm$length) * 100, 2),
    "% of the scaffolds in mm, weighted by length) remain after ",
    nrowBefore - nrowAfter,
    " of ",
    nrowBefore,
    " scaffolds have been filtered."
  ))
  return(tibble::as_tibble(mms))
}
KasperSkytte/mmgenome2 documentation built on Dec. 14, 2021, 12:11 a.m.