R/molNet.R

Defines functions molNet

Documented in molNet

#' Generate a molecular network with some properties
#'
#' Function to generate a molecular network object, and some
#' basic properties of the network.
#'
#' Molecular networks can be used to illustrate the biosynthetic/structural
#' similarity of phytochemical compounds in a sample, while
#' simultaneously visualizing their relative concentrations. \code{molNet}
#' creates the network, and \code{\link{molNetPlot}} can subsequently be
#' used to create a plot of the network.
#'
#' @param compDisMat Compound dissimilarity matrix, as calculated by
#' \code{\link{compDis}}. Note that the supplied dissimilarity matrix
#' is transformed into a similarity matrix, and this is what \code{cutOff}
#' values are set for. Note also that \code{\link{compDis}} always outputs a
#' list of one or more matrices, while \code{molNet} requires a single matrix
#' as input. Therefore, a specific matrix has to be selected from this list,
#' as \code{compDisOutput$matrix}.
#' @param npcTable A data frame generated by \code{\link{NPCTable}} can
#' be supplied for calculations of the number of NPC pathways and
#' network modularity.
#' @param cutOff Cut-off value for compound similarities. Any similarity
#' lower than this value will be set to zero when the network is generated,
#' which strongly affects the look of the network. The value can be set
#' manually to any value between 0 and 1; to the \code{median} similarity
#' value from the compDisMat; or, if an NPCTable is supplied,
#' to \code{minPathway}, the lowest within-pathway similarity
#' (which allows all within-NPC-pathway similarities to be kept).
#'
#' @return List with a (tbl_graph) graph object, the number of compounds,
#' number of NPC pathways and a measure of the modularity of the network
#' (see \code{\link[igraph]{modularity}}).
#'
#' @export
#'
#' @examples
#' data(minimalCompDis)
#' data(minimalNPCTable)
#' molNet(minimalCompDis, minimalNPCTable, cutOff = 0)
#'
#' data(alpinaCompDis)
#' molNet(compDisMat = alpinaCompDis, cutOff = 0.75)
molNet <- function(compDisMat,
                   npcTable = NULL,
                   cutOff = "median") {

  if (is.numeric(cutOff) && ((cutOff < 0) || (cutOff > 1))) {
    stop("Numeric values for cutOff must be between 0 and 1.")
  }
  if (is.character(cutOff) && !(cutOff == "median" || cutOff == "minPathway")) {
    stop("cutOff must be a value between 0 and 1, median or minPathway.")
  }
  if (!all(colnames(compDisMat) == rownames(compDisMat))) {
    stop("compDisMat must be a square matrix with identical row and column names.")
  }

  # Make a similarity matrix
  compSimMat <- 1 - compDisMat
  diag(compSimMat) <- 0

  compSimMatFull <- compSimMat

  # Set cut-off point. Median, manual or lowest within-pathway similarity
  if (cutOff == "median") {

    message("Using median similarity as cut-off.")
    medianSim <- stats::median(compSimMat[lower.tri(compSimMat)])
    compSimMat[compSimMat < medianSim] <- 0

  } else if (is.numeric(cutOff)) {

    message(paste("Using cut-off value =", cutOff))
    compSimMat[compSimMat < cutOff] <- 0

  } else if (cutOff == "minPathway" && !is.null(npcTable)) {

    message("Using lowest within-pathway similarity as cut-off.")
    minIntraPath <- max(compSimMat)
    simValue <- NA

    for (col in 1:(ncol(compSimMat)-1)) {
      for (row in (col+1):nrow(compSimMat)) {
        # Lower triangle
        if (!is.na(npcTable$pathway[col]) && !is.na(npcTable$pathway[row]) &&
            npcTable$pathway[col] == npcTable$pathway[row]) {
          # If both are not NA and compounds are from the same pathway
          simValue <- compSimMat[row,col]
          if (simValue < minIntraPath) {
            # If this value is smaller than previous one, replace
            minIntraPath <- simValue
          }
        }
      }
    }
    compSimMat[compSimMat < minIntraPath] <- 0

  } else if (cutOff == "minPathway" && is.null(npcTable)) {
    stop("Using minPathway as cut-off requires npcTable.")
  }

  # The network
  # networkObject <- tidygraph::as_tbl_graph(compSimMat)

  # Creation of network is currently done with tbl_graph() using data on
  # nodes and edges (links), rather than with as_tbl_graph() using the
  # similarity matrix, in order to avoid warnings produced by the latter
  # function, likely due to incorrect use of || or && for R 4.2.0
  linkedComps <- as.data.frame(matrix(data = NA,
                                      nrow = length(compSimMat[compSimMat > 0])/2,
                                      ncol = 2))

  colnames(linkedComps) <- c("Comp1", "Comp2")

  l <- 1
  for(i in 1:nrow(compSimMat)) {
    for (k in i:ncol(compSimMat)) {
      if(compSimMat[i,k] > 0) {
        linkedComps$Comp1[l] <- rownames(compSimMat)[i]
        linkedComps$Comp2[l] <- colnames(compSimMat)[k]
        l <- l + 1
      }
    }
  }

  compSimMatTri <- compSimMat[upper.tri(compSimMat)]
  compSimMatTriPos <- compSimMatTri[compSimMatTri > 0] # No self-links

  nodes <- data.frame(name = rownames(compSimMat))

  # Edges in both directions (as done by as_tbl_graph, order is not
  # the same with function and manually, but that doesn't matter)
  links <- data.frame(from = c(linkedComps$Comp1, linkedComps$Comp2),
                      to = c(linkedComps$Comp2, linkedComps$Comp1),
                      weight = rep(compSimMatTriPos, 2))

  networkObject <- tidygraph::tbl_graph(nodes = nodes, edges = links)

  # Number of compounds
  nCompounds <- nrow(compSimMatFull)

  # Calculating the number of pathways, and modularity, which is done on the
  # full matrix with pathways as groups, to not depend on the cut-off
  nNpcPathways <- NA
  modularity <- NA
  if(!is.null(npcTable)) {
    nNpcPathways <- length(unique(npcTable$pathway[!is.na(npcTable$pathway)]))

    modNet <- igraph::graph.adjacency(compSimMatFull,
                                      mode = "undirected",
                                      weighted = TRUE)
    membership <- as.numeric(as.factor(npcTable$pathway))
    # Give NA their own category, otherwise modularity() causes a crash
    membership[is.na(membership)] <- max(membership, na.rm = TRUE) + 1
    modularity <- igraph::modularity(modNet, membership)
  }

  networkOutput <- list("networkObject" = networkObject,
                        "nCompounds" = nCompounds,
                        "nNpcPathways" = nNpcPathways,
                        "modularity" = modularity)
  return(networkOutput)
}

Try the chemodiv package in your browser

Any scripts or data that you put into this service are public.

chemodiv documentation built on Aug. 18, 2023, 1:08 a.m.