Nothing
#' Generate a molecular network with some properties
#'
#' Function to generate a molecular network object, and some
#' basic properties of the network.
#'
#' Molecular networks can be used to illustrate the biosynthetic/structural
#' similarity of phytochemical compounds in a sample, while
#' simultaneously visualizing their relative concentrations. \code{molNet}
#' creates the network, and \code{\link{molNetPlot}} can subsequently be
#' used to create a plot of the network.
#'
#' @param compDisMat Compound dissimilarity matrix, as calculated by
#' \code{\link{compDis}}. Note that the supplied dissimilarity matrix
#' is transformed into a similarity matrix, and this is what \code{cutOff}
#' values are set for. Note also that \code{\link{compDis}} always outputs a
#' list of one or more matrices, while \code{molNet} requires a single matrix
#' as input. Therefore, a specific matrix has to be selected from this list,
#' as \code{compDisOutput$matrix}.
#' @param npcTable A data frame generated by \code{\link{NPCTable}} can
#' be supplied for calculations of the number of NPC pathways and
#' network modularity.
#' @param cutOff Cut-off value for compound similarities. Any similarity
#' lower than this value will be set to zero when the network is generated,
#' which strongly affects the look of the network. The value can be set
#' manually to any value between 0 and 1; to the \code{median} similarity
#' value from the compDisMat; or, if an NPCTable is supplied,
#' to \code{minPathway}, the lowest within-pathway similarity
#' (which allows all within-NPC-pathway similarities to be kept).
#'
#' @return List with a (tbl_graph) graph object, the number of compounds,
#' number of NPC pathways and a measure of the modularity of the network
#' (see \code{\link[igraph]{modularity}}).
#'
#' @export
#'
#' @examples
#' data(minimalCompDis)
#' data(minimalNPCTable)
#' molNet(minimalCompDis, minimalNPCTable, cutOff = 0)
#'
#' data(alpinaCompDis)
#' molNet(compDisMat = alpinaCompDis, cutOff = 0.75)
molNet <- function(compDisMat,
npcTable = NULL,
cutOff = "median") {
if (is.numeric(cutOff) && ((cutOff < 0) || (cutOff > 1))) {
stop("Numeric values for cutOff must be between 0 and 1.")
}
if (is.character(cutOff) && !(cutOff == "median" || cutOff == "minPathway")) {
stop("cutOff must be a value between 0 and 1, median or minPathway.")
}
if (!all(colnames(compDisMat) == rownames(compDisMat))) {
stop("compDisMat must be a square matrix with identical row and column names.")
}
# Make a similarity matrix
compSimMat <- 1 - compDisMat
diag(compSimMat) <- 0
compSimMatFull <- compSimMat
# Set cut-off point. Median, manual or lowest within-pathway similarity
if (cutOff == "median") {
message("Using median similarity as cut-off.")
medianSim <- stats::median(compSimMat[lower.tri(compSimMat)])
compSimMat[compSimMat < medianSim] <- 0
} else if (is.numeric(cutOff)) {
message(paste("Using cut-off value =", cutOff))
compSimMat[compSimMat < cutOff] <- 0
} else if (cutOff == "minPathway" && !is.null(npcTable)) {
message("Using lowest within-pathway similarity as cut-off.")
minIntraPath <- max(compSimMat)
simValue <- NA
for (col in 1:(ncol(compSimMat)-1)) {
for (row in (col+1):nrow(compSimMat)) {
# Lower triangle
if (!is.na(npcTable$pathway[col]) && !is.na(npcTable$pathway[row]) &&
npcTable$pathway[col] == npcTable$pathway[row]) {
# If both are not NA and compounds are from the same pathway
simValue <- compSimMat[row,col]
if (simValue < minIntraPath) {
# If this value is smaller than previous one, replace
minIntraPath <- simValue
}
}
}
}
compSimMat[compSimMat < minIntraPath] <- 0
} else if (cutOff == "minPathway" && is.null(npcTable)) {
stop("Using minPathway as cut-off requires npcTable.")
}
# The network
# networkObject <- tidygraph::as_tbl_graph(compSimMat)
# Creation of network is currently done with tbl_graph() using data on
# nodes and edges (links), rather than with as_tbl_graph() using the
# similarity matrix, in order to avoid warnings produced by the latter
# function, likely due to incorrect use of || or && for R 4.2.0
linkedComps <- as.data.frame(matrix(data = NA,
nrow = length(compSimMat[compSimMat > 0])/2,
ncol = 2))
colnames(linkedComps) <- c("Comp1", "Comp2")
l <- 1
for(i in 1:nrow(compSimMat)) {
for (k in i:ncol(compSimMat)) {
if(compSimMat[i,k] > 0) {
linkedComps$Comp1[l] <- rownames(compSimMat)[i]
linkedComps$Comp2[l] <- colnames(compSimMat)[k]
l <- l + 1
}
}
}
compSimMatTri <- compSimMat[upper.tri(compSimMat)]
compSimMatTriPos <- compSimMatTri[compSimMatTri > 0] # No self-links
nodes <- data.frame(name = rownames(compSimMat))
# Edges in both directions (as done by as_tbl_graph, order is not
# the same with function and manually, but that doesn't matter)
links <- data.frame(from = c(linkedComps$Comp1, linkedComps$Comp2),
to = c(linkedComps$Comp2, linkedComps$Comp1),
weight = rep(compSimMatTriPos, 2))
networkObject <- tidygraph::tbl_graph(nodes = nodes, edges = links)
# Number of compounds
nCompounds <- nrow(compSimMatFull)
# Calculating the number of pathways, and modularity, which is done on the
# full matrix with pathways as groups, to not depend on the cut-off
nNpcPathways <- NA
modularity <- NA
if(!is.null(npcTable)) {
nNpcPathways <- length(unique(npcTable$pathway[!is.na(npcTable$pathway)]))
modNet <- igraph::graph.adjacency(compSimMatFull,
mode = "undirected",
weighted = TRUE)
membership <- as.numeric(as.factor(npcTable$pathway))
# Give NA their own category, otherwise modularity() causes a crash
membership[is.na(membership)] <- max(membership, na.rm = TRUE) + 1
modularity <- igraph::modularity(modNet, membership)
}
networkOutput <- list("networkObject" = networkObject,
"nCompounds" = nCompounds,
"nNpcPathways" = nNpcPathways,
"modularity" = modularity)
return(networkOutput)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.