R/mmnetwork.R

Defines functions mmnetwork

Documented in mmnetwork

#' @title Network plot of mate-pair or paired-end connected scaffold reads
#'
#' @description Plots all connected scaffolds in a network plot. Scaffolds can then be highlighted and extracted using the locator and selection features.
#'
#' @param mm (\emph{required}) A dataframe loaded with \code{\link{mmload}}.
#' @param network (\emph{required}) Paired-end or mate-pair connections between scaffolds in long format. The first and second columns must contain all connected scaffold pairs and the third column the number of connections.
#' @param min_connections Filter all scaffold pairs with equal to or less than this number of connections. (\emph{Default: } \code{2})
#' @param color_by Color the scaffolds by a variable in \code{mm}. (\emph{Default: } \code{NULL})
#' @param color_scale_log10 (\emph{Logical}) Log10-scale the color gradient when \code{color_by} is set and the variable is continuous (\emph{Default: } \code{FALSE})
#' @param locator (\emph{Logical}) When \code{TRUE}, left-clicks in the plot are captured and the exact x/y-coordinates of the mouse clicks are captured and returned. These coordinates can be used to highlight a selection of scaffolds in the plot, and to extract all scaffolds within the selection. (\emph{Default: } \code{FALSE})
#' @param selection A 2-column dataframe with the x and y coordinates of points with which to draw a polygon onto the plot to highlight a selected region. A selection can be obtained by using the locator feature (by \code{locator = TRUE}). (\emph{Default: } \code{NULL})
#' @param highlight_labels A dataframe or vector of scaffold names whose labels to highlight in the plot (colored by \code{highlight_color}). (\emph{Default: } \code{NULL})
#' @param highlight_color The color with which to highlight the scaffold labels defined by \code{highlight_labels}. (\emph{Default: } \code{"darkred"})
#' @param links_scale A factor to scale the sizes of the links plotted between scaffolds. (\emph{Default: } \code{1})
#' @param scaffold_labels Add labels with the scaffold names of \emph{all} scaffolds.
#' @param print_nolinks (\emph{Logical}) Print the names of all scaffolds with no links to other scaffolds to the console. (\emph{Default: } \code{FALSE})
#' @param seed Network plots are based on Random Number Generation, and this is used to set a specific random seed (with \code{set.seed}) allowing for reproducible network plots. Set to NULL to generate a unique network plot with each run of \code{mmnetwork}. (\emph{Default: } \code{0})
#'
#' @export
#'
#' @return A ggplot object. Note that mmgenome2 hides all warnings produced by ggplot objects.
#'
#' @section Using the locator with mmnetwork:
#' Unlike a plot generated by \code{\link{mmplot}}, the coordinates of points in each dimension in a \code{\link{mmnetwork}} plot is not contained within the provided dataframe \code{mm} itself. As such \code{\link{mmnetwork}} does not have a corresponding extract function like \code{\link{mmextract}} is for \code{\link{mmplot}}. Instead, a subset of \code{mm} containing all scaffolds within the selection polygon is available in the returned ggplot object with \code{plot$data_in_selection}.
#'
#' @import ggplot2
#' @importFrom igraph graph.data.frame layout_with_fr V
#' @importFrom dplyr filter
#' @importFrom sp point.in.polygon
#' @importFrom tibble as_tibble
#' @importFrom ggrepel geom_text_repel
#'
#' @examples
#' library(mmgenome2)
#' data(mmgenome2)
#' data(paired_ends)
#' mmgenome2
#' selection <- data.frame(
#'   cov_C13.11.25 = c(7.2, 16.2, 25.2, 23.3, 10.1),
#'   cov_C14.01.09 = c(47, 77, 52.8, 29.5, 22.1)
#' )
#' mmgenome2_extraction <- mmextract(mmgenome2, selection = selection)
#' mmgenome2_extraction
#' p <- mmnetwork(mmgenome2_extraction,
#'   network = paired_ends,
#'   min_connections = 1,
#'   color_by = "taxonomy"
#' )
#' p
#' @author Kasper Skytte Andersen \email{ksa@@bio.aau.dk}
#' @author Soren M. Karst \email{smk@@bio.aau.dk}
#' @author Mads Albertsen \email{MadsAlbertsen85@@gmail.com}
mmnetwork <- function(mm,
                      network,
                      min_connections = 1,
                      color_by = NULL,
                      color_scale_log10 = FALSE,
                      locator = FALSE,
                      selection = NULL,
                      highlight_labels = NULL,
                      highlight_color = "darkred",
                      links_scale = 1,
                      scaffold_labels = FALSE,
                      print_nolinks = FALSE,
                      seed = 0) {
  # Checks and error messages before anything else
  if (isTRUE(locator) & !is.null(selection)) {
    stop("Using the locator and highlighting a selection at the same time is not supported.", call. = FALSE)
  }

  ## Subset the network, if no connections return error
  snetwork <- dplyr::filter(network, network[[1]] %in% mm[[1]] & network[[2]] %in% mm[[1]] & network[["connections"]] >= min_connections)
  if (!nrow(snetwork) > 0) {
    stop("There are no connections between any of the scaffolds.", call. = FALSE)
  }

  ## Convert to graph
  g <- igraph::graph.data.frame(snetwork, directed = F)

  ## Calculate a layout
  if (!is.null(seed)) set.seed(seed)
  layout <- igraph::layout_with_fr(g)

  ## Extract layout coordinates
  gpoints <- merge(data.frame("scaffold" = igraph::V(g)$name, "x" = layout[, 1], "y" = layout[, 2]), mm, by = 1)

  ## Extract link coordinates
  links <- merge(snetwork, gpoints[, 1:3], by.x = "scaffold1", by.y = "scaffold")
  links <- merge(links, gpoints[, 1:3], by.x = "scaffold2", by.y = "scaffold")
  colnames(links)[4:7] <- c("x", "y", "xend", "yend")

  p <- ggplot(
    data = gpoints,
    aes_string(
      x = "x",
      y = "y",
      size = "length"
    )
  ) +
    geom_segment(
      data = links,
      aes_string(
        x = "x",
        y = "y",
        xend = "xend",
        yend = "yend"
      ),
      color = "darkgrey",
      size = log10(links[["connections"]]) * links_scale
    ) +
    scale_size_area(name = "Scaffold length", max_size = 20)

  if (!is.null(color_by)) {
    if (is.factor(mm[[color_by]]) | is.character(mm[[color_by]])) {
      p <- p +
        geom_point(alpha = 0.1, color = "black") +
        geom_point(
          data = subset(gpoints, gpoints[[color_by]] != "NA"),
          aes_string(color = color_by),
          shape = 1
        ) +
        guides(colour = guide_legend(override.aes = list(alpha = 1, size = 5, shape = 19)))
    } else if (is.numeric(mm[[color_by]])) {
      p <- p +
        geom_point(alpha = 0.7, aes_string(color = color_by))
      if (isTRUE(color_scale_log10)) {
        p <- p +
          scale_colour_gradientn(colours = c("blue", "green", "red"), trans = "log10")
      } else {
        p <- p +
          scale_colour_gradientn(colours = c("blue", "green", "red"))
      }
    }
  } else {
    p <- p +
      geom_point(alpha = 0.1, color = "black")
  }

  p <- p +
    theme(
      panel.grid.major = element_blank(),
      panel.grid.minor = element_blank(),
      axis.text = element_blank(),
      axis.title = element_blank(),
      axis.ticks = element_blank(),
      axis.line = element_blank(),
      panel.border = element_blank(),
      panel.background = element_blank(),
      legend.key = element_blank()
    )

  ##### Highlight selected scaffolds #####
  if (!is.null(highlight_labels)) {
    if (is.data.frame(highlight_labels)) {
      highlight_labels <- highlight_labels[[1]]
    }
    scaffolds <- as.character(highlight_labels)
    d <- dplyr::filter(gpoints, scaffold %in% scaffolds)
    p <- p +
      geom_text(
        data = d,
        color = highlight_color,
        size = 4,
        label = d[["scaffold"]]
      )
  }

  ##### label all scaffolds #####
  if (isTRUE(scaffold_labels)) {
    p <- p +
      ggrepel::geom_text_repel(
        label = gpoints[[1]],
        size = 4,
        color = "black"
      )
  }

  if (isTRUE(print_nolinks)) {
    nolinks <- mm[["scaffold"]][!(mm[["scaffold"]] %in% gpoints$scaffold)]
    cat("The following scaffolds have no links to other scaffolds:\n")
    cat(paste0(nolinks, collapse = ", "))
  }

  ##### Locator and selection #####
  if (isTRUE(locator)) {
    points <- mmlocator(p)
  }
  if (isTRUE(locator) | !is.null(selection)) {
    if (!isTRUE(locator) & !is.null(selection)) {
      points <- selection
    }
    p$selection <- points
    in_polygon <- sp::point.in.polygon(
      point.x = p$data[[colnames(points)[1]]],
      point.y = p$data[[colnames(points)[2]]],
      pol.x = points[[1]],
      pol.y = points[[2]],
      mode.checked = TRUE
    )
    scaffolds <- as.character(p$data$scaffold[which(in_polygon > 0)])
    p$data_in_selection <- tibble::as_tibble(mm[which(mm[[1]] %in% scaffolds), ])
    p <- p +
      geom_point(
        data = points,
        aes_(
          x = points[[1]],
          y = points[[2]]
        ),
        color = "black",
        size = 2,
        inherit.aes = FALSE,
        na.rm = TRUE
      ) +
      geom_polygon(
        data = points,
        aes_(
          x = points[[1]],
          y = points[[2]]
        ),
        fill = NA,
        size = 0.5,
        lty = 2,
        color = "darkred",
        inherit.aes = FALSE,
        na.rm = TRUE
      )
  }
  return(p)
}
KasperSkytte/mmgenome2 documentation built on Dec. 14, 2021, 12:11 a.m.