R/graph_topo_compar.R In graph4lg: Build Graphs for Landscape Genetics Analysis

Documented in graph_topo_compar

#' Compute an index comparing graph topologies
#'
#' @description The function computes several indices in order to compare two
#' graph topologies. One of the graph has the "true" topology the other is
#' supposed to reproduce. The indices are then a way to assess the reliability
#' of the latter graph.
#' Both graphs must have the same number of nodes, but not necessarily the
#' same number of links. They must also have the same node names and in
#' the same order.
#'
#' @details The indices are calculated from a confusion matrix counting
#' the number of links that are in the "observed" graph ("true") and also
#' in the "predicted" graph (true positives : TP), that are in the "observed"
#' graph but not in the "predicted" graph (false negatives : FN), that are not
#' in the "observed" graph but in the "predicted" graph (false positives : FP)
#' and that are not in the "observed" graph and not in the "predicted" graph
#' neither (true negatives: TN). K is the total number of links in the graphs.
#' K is equal to \eqn{n\times(n-1)} if the graphs are directed and to
#' \eqn{\frac{n\times(n-1)}{2}} if they are not directed, with n the number
#' of nodes.
#' OP = TP + FN, ON = TN + FP, PP = TP + FP and PN = FN + TN.
#'
#' The Matthews Correlation Coefficient (MCC) is computed as follows:
#' \eqn{MCC = \frac{TP\times TN-FP\times FN}{\sqrt{(TP+FP)(TP+FN)(TN+FP)(TN+FN)}}}
#'
#' The Kappa index is computed as follows:
#' \eqn{Kappa = \frac{K\times (TP + TN) - (ON \times PN) - (OP \times PP)}{K^{2} - (ON \times PN) - (OP \times PP)}}
#'
#' The False Discovery Rate (FDR) is calculated as follows:
#' \eqn{FDR = \frac{FP}{TP+FP}}
#'
#' The Accuracy is calculated as follows:
#' \eqn{Acc = \frac{TP + TN}{K}}
#'
#' The Sensitivity is calculated as follows:
#' \eqn{Sens = \frac{TP}{TP+FN}}
#'
#' The Specificity is calculated as follows:
#' \eqn{Spec = \frac{TN}{TN+FP}}
#'
#' The Precision is calculated as follows:
#' \eqn{Prec = \frac{TP}{TP+FP}}
#'
#'Self loops are not taken into account.
#' @param obs_graph A graph object of class \code{igraph} with n nodes.
#' It is the observed graph that \code{pred_graph} is supposed to approach.
#' @param pred_graph A graph object of class \code{igraph} with n nodes.
#' It is the predicted graph that is supposed to be akin to \code{obs_graph}.
#' @param mode A character string specifying which index to compute in order
#' to compare the topologies of the graphs.
#' \itemize{
#' \item{If 'mode = 'mcc'' (default), the Matthews Correlation
#' Coefficient (MCC) is computed.}
#' \item{If 'mode = 'kappa'', the Kappa index is computed.}
#' \item{If 'mode = 'fdr'', the False Discovery Rate (FDR) is computed.}
#' \item{If 'mode = 'acc'', the Accuracy is computed.}
#' \item{If 'mode = 'sens'', the Sensitivity is computed.}
#' \item{If 'mode = 'spec'', the Specificity is computed.}
#' \item{If 'mode = 'prec'', the Precision is computed.}
#' }
#' @param directed Logical (TRUE or FALSE) specifying whether both graphs
#' are directed or not.
#' @return The value of the index computed
#' @export
#' @author P. Savary
#' @references \insertRef{dyer2004population}{graph4lg}
#' \insertRef{baldi2000assessing}{graph4lg}
#' \insertRef{matthews1975comparison}{graph4lg}
#' @examples
#' data(data_ex_genind)
#' data(pts_pop_ex)
#' mat_dist <- suppressWarnings(graph4lg::mat_geo_dist(data=pts_pop_ex,
#'       ID = "ID",
#'       x = "x",
#'       y = "y"))
#' mat_dist <- mat_dist[order(as.character(row.names(mat_dist))),
#'                       order(as.character(colnames(mat_dist)))]
#' graph_obs <- gen_graph_thr(mat_w = mat_dist, mat_thr = mat_dist,
#'                             thr = 15000, mode = "larger")
#' mat_gen <- mat_gen_dist(x = data_ex_genind, dist = "DPS")
#' graph_pred <- gen_graph_topo(mat_w = mat_gen, mat_topo = mat_dist,
#'                             topo = "gabriel")
#' graph_topo_compar(obs_graph = graph_obs,
#'                   pred_graph = graph_pred,
#'                   mode = "mcc",
#'                   directed = FALSE)

graph_topo_compar <- function(obs_graph, pred_graph,
mode = "mcc", directed = FALSE){

# Check whether obs_graph and pred_graph are graphs
if(!inherits(obs_graph, "igraph")){
stop("'obs_graph' must be a graph object of class 'igraph'.")
} else if (!inherits(pred_graph, "igraph")){
stop("'pred_graph' must be a graph object of class 'igraph'.")
}

# Check whether they have the same node number
if(length(igraph::V(obs_graph)) != length(igraph::V(pred_graph))){
stop("Both graphs must have the same node number.")
}

# Check whether the graph nodes have names
if(is.null(igraph::V(obs_graph)$name)){ stop("The nodes of 'obs_graph' must have names.") } else if(is.null(igraph::V(pred_graph)$name)){
stop("The nodes of 'pred_graph' must have names.")
}

# Check whether the graphs have the same names and in the same order
if(!all(igraph::V(obs_graph)$name == igraph::V(pred_graph)$name)){
stop("Both graphs must have the same node names and the nodes
ranked in the same order.")
}

nb_nodes <- length(igraph::V(obs_graph)\$name)

# If the graph has directed links
if(directed == TRUE){

# Number of directed links
K <- (nb_nodes - 1) * nb_nodes

# Get the adjacency matrices of both graphs
names = TRUE , sparse = FALSE)
names = TRUE , sparse = FALSE)

# 1obs 1pred (TP)
# 1 values in both adjacency matrices

# 0obs 0pred (TN)
tn <- length(which(((1 - adj1) & (1 - adj2)))) - nb_nodes
# 0 values become 1 in both adjacency matrices when
# using 1 - adj1 and 1 - adj2
# We remove the number of nodes as both diagonal values
# of 1 - adj1 and 1 - adj2 are ones.

# 0obs 1pred (FP)
fp <- length(which(((1 - adj1) & adj2)))
# We do not remove the number of nodes as only the
# diagonal values of 1 - adj1 are ones.

# 1obs 0pred (FN)
fn <- length(which((adj1 & (1 - adj2))))
# We do not remove the number of nodes as only the
# diagonal values of 1 - adj2 are ones.

# If the graph has non-directed links
} else if (directed == FALSE){

K <- (nb_nodes - 1) * nb_nodes/2
# We consider half the number of links in that case
# given the graphs are not directed.

names = TRUE , sparse = FALSE)
names = TRUE , sparse = FALSE)

# 1obs 1pred (TP)
# We divide by 2 in each case as the adjacency matrices are symmetric

# 0obs 0pred (TN)
tn <- length(which(((1 - adj1) & (1 - adj2))))/2 - (nb_nodes / 2)
# We divide by 2 and then we remove half the number of nodes
# as both diagonal values
# of 1 - adj1 and 1 - adj2 are ones.

# 0obs 1pred (FP)
fp <- length(which(((1 - adj1) & adj2)))/2

# 1obs 0pred (FN)
fn <- length(which((adj1 & (1 - adj2))))/2

} else {
stop("'directed' must be TRUE or FALSE.")
}

# Predicted positive = true positive + false positive
pp <- tp + fp
# Predicted negative = false negative + true negative
pn <- fn + tn

# Observed positive = true positive + false negative
op <- tp + fn
# Observed negative = true negative + false positive
on <- tn + fp

# We compute products because otherwise we reach the limit
# of the numbers R can handle in calculations.
pp_op <- pp * op
pn_on <- pn * on

mcc <- (tp * tn - fp * fn) / ( sqrt(pp_op) * sqrt(pn_on) )
kappa <- ( K * (tp + tn) - (on * pn)-(op * pp))/(K^2 - (on * pn) - (op * pp))
fdr <- fp / (tp + fp)
acc <- (tp + tn) / K
sens <- tp / (tp + fn)
spec <- tn / (tn + fp)
prec <- tp / (tp + fp)

if(mode == "mcc"){
message(paste("Matthews Correlation Coefficient : ", mcc, sep = ""))
return(mcc)
} else if(mode == "kappa"){
message(paste("Kappa Index : ", kappa, sep = ""))
return(kappa)
} else if(mode == "fdr"){
message(paste("False Discovery Rate : ", fdr, sep = ""))
return(fdr)
} else if(mode == "acc"){
message(paste("Accuracy : ", acc, sep = ""))
return(acc)
} else if(mode == "sens"){
message(paste("Sensitivity : ", sens, sep = ""))
return(sens)
} else if(mode == "spec"){
message(paste("Specificity : ", spec, sep = ""))
return(spec)
} else if(mode == "prec"){
message(paste("Precision : ", prec, sep = ""))
return(prec)
}

}


Try the graph4lg package in your browser

Any scripts or data that you put into this service are public.

graph4lg documentation built on Feb. 16, 2023, 5:43 p.m.