WGCNA: Weighted Correlation Network Analysis

# ===================================================
# This code was written by Jun Dong, modified by Peter Langfelder
# datExpr: expression profiles with rows=samples and cols=genes/probesets
# power: for contruction of the weighted network
# trait: the quantitative external trait
#

networkConcepts <- function(datExpr, power = 1, trait = NULL, networkType = "unsigned") {
  networkTypeC <- charmatch(networkType, .networkTypes)
  if (is.na(networkTypeC)) {
    stop(paste(
      "Unrecognized networkType argument.",
      "Recognized values are (unique abbreviations of)", paste(.networkTypes, collapse = ", ")
    ))
  }

  if (networkTypeC == 1) {
    adj <- abs(cor(datExpr, use = "p"))^power
  } else if (networkTypeC == 2) {
    adj <- abs((cor(datExpr, use = "p") + 1) / 2)^power
  } else {
    cor <- cor(datExpr, use = "p")
    cor[cor < 0] <- 0
    adj <- cor^power
  }
  diag(adj) <- 0 # Therefore adj=A-I.

  ### Fundamental Network Concepts
  Size <- dim(adj)[1]
  Connectivity <- apply(adj, 2, sum) # Within Module Connectivities
  Density <- sum(Connectivity) / (Size * (Size - 1))
  Centralization <- Size * (max(Connectivity) - mean(Connectivity)) / ((Size - 1) * (Size - 2))
  Heterogeneity <- sqrt(Size * sum(Connectivity^2) / sum(Connectivity)^2 - 1)
  ClusterCoef <- .ClusterCoef.fun(adj)

  fMAR <- function(v) sum(v^2) / sum(v)
  MAR <- apply(adj, 1, fMAR)
  # CONNECTIVITY=Connectivity/max(Connectivity)

  ### Conformity-Based Network Concepts
  ### Dong J, Horvath S (2007) Understanding Network Concepts in Modules, BMC Systems Biology 2007, 1:24
  Conformity <- .NPC.iterate(adj)$v1
  Factorizability <- 1 - sum((adj - outer(Conformity, Conformity) + diag(Conformity^2))^2) / sum(adj^2)
  Connectivity.CF <- sum(Conformity) * Conformity - Conformity^2
  Density.CF <- sum(Connectivity.CF) / (Size * (Size - 1))
  Centralization.CF <- Size * (max(Connectivity.CF) - mean(Connectivity.CF)) / ((Size - 1) * (Size - 2))
  Heterogeneity.CF <- sqrt(Size * sum(Connectivity.CF^2) / sum(Connectivity.CF)^2 - 1)
  # ClusterCoef.CF=.ClusterCoef.fun(outer(Conformity,Conformity)-diag(Conformity^2) )
  ClusterCoef.CF <- c(NA, Size)
  for (i in 1:Size) {
    ClusterCoef.CF[i] <- (sum(Conformity[-i]^2)^2 - sum(Conformity[-i]^4)) /
      (sum(Conformity[-i])^2 - sum(Conformity[-i]^2))
  }

  ### Approximate Conformity-Based Network Concepts
  Connectivity.CF.App <- sum(Conformity) * Conformity
  Density.CF.App <- sum(Connectivity.CF.App) / (Size * (Size - 1))
  Centralization.CF.App <- Size * (max(Connectivity.CF.App) - mean(Connectivity.CF.App)) / ((Size - 1) * (Size - 2))
  Heterogeneity.CF.App <- sqrt(Size * sum(Connectivity.CF.App^2) / sum(Connectivity.CF.App)^2 - 1)
  ClusterCoef.CF.App <- (sum(Conformity^2) / sum(Conformity))^2

  ### Eigengene-based Network Concepts
  m1 <- moduleEigengenes(datExpr, colors = rep(1, Size))
  # Weighted Expression Conformity
  ConformityE <- cor(datExpr, m1[[1]][, 1], use = "pairwise.complete.obs")
  ConformityE <- abs(ConformityE)^power
  ConnectivityE <- sum(ConformityE) * ConformityE # Expression Connectivity
  DensityE <- sum(ConnectivityE) / (Size * (Size - 1)) # Expression Density
  CentralizationE <- Size * (max(ConnectivityE) - mean(ConnectivityE)) / ((Size - 1) * (Size - 2)) # Expression Centralization
  HeterogeneityE <- sqrt(Size * sum(ConnectivityE^2) / sum(ConnectivityE)^2 - 1) # Expression Heterogeneity
  ClusterCoefE <- (sum(ConformityE^2) / sum(ConformityE))^2 ## Expression ClusterCoef
  MARE <- ConformityE * sum(ConformityE^2) / sum(ConformityE)

  ### Significance measure only when trait is available.
  if (!is.null(trait)) {
    EigengeneSignificance <- abs(cor(trait, m1[[1]], use = "pairwise.complete.obs"))^power
    EigengeneSignificance <- EigengeneSignificance[1, 1]
    GS <- abs(cor(datExpr, trait, use = "pairwise.complete.obs"))^power
    GS <- GS[, 1]
    GSE <- ConformityE * EigengeneSignificance
    GSE <- GSE[, 1]
    ModuleSignificance <- mean(GS)
    ModuleSignificanceE <- mean(GSE)
    K <- Connectivity / max(Connectivity)
    HubGeneSignificance <- sum(GS * K) / sum(K^2)
    KE <- ConnectivityE / max(ConnectivityE)
    HubGeneSignificanceE <- sum(GSE * KE) / sum(KE^2)
  }

  Summary <- cbind(
    c(Density, Centralization, Heterogeneity, mean(ClusterCoef), mean(Connectivity)),
    c(DensityE, CentralizationE, HeterogeneityE, mean(ClusterCoefE), mean(ConnectivityE)),
    c(Density.CF, Centralization.CF, Heterogeneity.CF, mean(ClusterCoef.CF), mean(Connectivity.CF)),
    c(
      Density.CF.App, Centralization.CF.App, Heterogeneity.CF.App, mean(ClusterCoef.CF.App),
      mean(Connectivity.CF.App)
    )
  )
  colnames(Summary) <- c("Fundamental", "Eigengene-based", "Conformity-Based", "Approximate Conformity-based")
  rownames(Summary) <- c("Density", "Centralization", "Heterogeneity", "Mean ClusterCoef", "Mean Connectivity")

  output <- list(
    Summary = Summary, Size = Size, Factorizability = Factorizability, Eigengene = m1[[1]],
    VarExplained = m1[[2]][, 1], Conformity = Conformity, ClusterCoef = ClusterCoef, Connectivity = Connectivity,
    MAR = MAR, ConformityE = ConformityE
  )
  if (!is.null(trait)) {
    output$GS <- GS
    output$GSE <- GSE
    Significance <- cbind(
      c(ModuleSignificance, HubGeneSignificance, EigengeneSignificance),
      c(ModuleSignificanceE, HubGeneSignificanceE, NA)
    )
    colnames(Significance) <- c("Fundamental", "Eigengene-based")
    rownames(Significance) <- c("ModuleSignificance", "HubGeneSignificance", "EigengeneSignificance")
    output$Significance <- Significance
  }
  output
}



# ====================================================================================================
#
# Network functions for network concepts
#
# ====================================================================================================
# =========================================
# Function definitions
# =========================================

# ================================================================================
#   Cohesiveness/Conformity/Factorizability etc
# ================================================================================

# ===================================================
# Check if adj is a valid adjacency matrix:  square matrix, non-negative entries, symmetric and no missing entries.
# Parameters:
#   adj - the input adjacency matrix
#   tol - the tolerence level to measure the difference from 0 (symmetric matrix: upper diagonal minus lower diagonal)
# Remarks:
#   1. This function is not supposed to be used directly. Instead, it should appear in function definitions.
#   2. We release the requirement that the diagonal elements be 1 or 0. Users should assign appropriate values
#      at the beginning of their function definitions.
# Usage:
# if(!.is.adjmat(adj)) stop("The input matrix is not a valid adjacency matrix!")

.is.adjmat <- function(adj, tol = 10^(-15)) {
  n <- dim(adj)
  is.adj <- 1
  if (n[1] != n[2]) {
    message("The adjacency matrix is not a square matrix!")
    is.adj <- 0
  }
  if (sum(is.na(adj)) > 0) {
    message("There are missing values in the adjacency matrix!")
    is.adj <- 0
  }
  if (sum(adj < 0) > 0) {
    message("There are negative entries in the adjacency matrix!")
    is.adj <- 0
  }
  if (max(abs(adj - t(adj))) > tol) {
    message("The adjacency matrix is not symmetric!")
    is.adj <- 0
  }
  # if ( max(abs(diag(adj)-1)) > tol){ message("The diagonal elements are not all one!"); is.adj=0;}
  # The last criteria is removed because of different definitions on diagonals with other papers.
  # Always let "diagonal=1" INSIDE the function calls when using functions for Factorizability paper.
  is.adj
}


# ===================================================
# .NPC.direct=function(adj)
# Calculates the square root of Normalized Product Connectivity (.NPC), by way of definition of .NPC. ( \sqrt{t})
# Parameters:
#   adj - the input adjacency matrix
#   tol - the tolerence level to measure the difference from 0 (zero off-diagonal elements)
# Output:
#   v1 - vector, the square root of .NPC
# Remarks:
#   1. The function requires that the off-diagonal elements of the adjacency matrix are all non-zero.
#   2. If any of the off-diagonal elements is zero, use the function .NPC.iterate().
#   3. If the adjacency matrix is 2 by 2, then a warning message is issued and vector of sqrt(adj[1,2]) is returned.
#   4. If the adjacency matrix is a ZERO matrix, then a warning message is issued and vector of 0 is returned.

.NPC.direct <- function(adj) {
  if (!.is.adjmat(adj)) stop("The input matrix is not a valid adjacency matrix!")
  n <- dim(adj)[1]
  if (n == 2) {
    warning("The adjacecny matrix is only 2 by 2. .NPC may not be unique!")
    return(rep(sqrt(adj[1, 2]), 2))
  }
  diag(adj) <- 0
  if (!sum(adj > 0)) {
    warning("The adjacency matrix is a ZERO matrix!")
    return(rep(0, n))
  }
  diag(adj) <- 1
  if (sum(adj == 0)) stop("There is zero off--diagonal element! Please use the function .NPC.iterate().")
  log10.prod.vec <- function(vec) {
    prod <- 0
    for (i in 1:length(vec)) {
      prod <- prod + log10(vec[i])
    }
    prod
  }
  off.diag <- as.vector(as.dist(adj))
  prod1 <- log10.prod.vec(off.diag)
  v1 <- rep(-666, n)
  for (i in 1:n) {
    prod2 <- prod1 - log10.prod.vec(adj[i, ])
    v1[i] <- 10^(prod1 / (n - 1) - prod2 / (n - 2))
  }
  v1
}

# ===================================================
# .NPC.iterate=function(adj, loop=10^(10), tol=10^(-10))
# Calculates the square root of Normalized Product Connectivity, by way of iteration algorithm. ( \sqrt{t})
# Parameters:
#   adj - the input adjacency matrix
#   loop - the maximum number of iterations before stopping the algorithm
#   tol - the tolerence level to measure the difference from 0 (zero off-diagonal elements)
# Output:
#   v1 - vector, the square root of .NPC
#   loop - integer, the number of iterations taken before convergence criterion is met
#   diff - scaler, the maximum difference between the estimates of 'v1' in the last two iterations
# Remarks:
#   1. Whenever possible, use .NPC.direct().
#   2. If the adjacency matrix is 2 by 2, then a warning message is issued.
#   3. If the adjacency matrix is a ZERO matrix, then a warning message is issued and vector of 0 is returned.

if (exists(".NPC.iterate")) rm(.NPC.iterate)
.NPC.iterate <- function(adj, loop = 10^(10), tol = 10^(-10)) {
  if (!.is.adjmat(adj)) stop("The input matrix is not a valid adjacency matrix!")
  n <- dim(adj)[1]
  if (n == 2) warning("The adjacecny matrix is only 2 by 2. .NPC may not be unique!")
  diag(adj) <- 0
  if (max(abs(adj)) < tol) {
    warning("The adjacency matrix is a ZERO matrix!")
    return(rep(0, n))
  }
  diff <- 1
  k <- apply(adj, 2, sum)
  v1 <- k / sqrt(sum(k)) # first-step estimator for v-vector (initial value)
  i <- 0
  while (loop > i && diff > tol) {
    i <- i + 1
    diag(adj) <- v1^2
    svd1 <- svd(adj) # Spectral Decomposition
    v2 <- sqrt(svd1$d[1]) * abs(svd1$u[, 1])
    diff <- max(abs(v1 - v2))
    v1 <- v2
  }
  list(v1 = v1, loop = i, diff = diff)
}

# ===================================================
# The function .ClusterCoef.fun computes the cluster coefficients.
# Input is an adjacency matrix
.ClusterCoef.fun <- function(adjmat1) {
  # diag(adjmat1)=0
  no.nodes <- dim(adjmat1)[[1]]
  computeLinksInNeighbors <- function(x, imatrix) {
    x %*% imatrix %*% x
  }
  computeSqDiagSum <- function(x, vec) {
    sum(x^2 * vec)
  }
  nolinksNeighbors <- c(rep(-666, no.nodes))
  total.edge <- c(rep(-666, no.nodes))
  maxh1 <- max(as.dist(adjmat1))
  minh1 <- min(as.dist(adjmat1))
  if (maxh1 > 1 | minh1 < 0) {
    stop(paste(
      "ERROR: the adjacency matrix contains entries that are larger",
      "than 1 or smaller than 0: max=", maxh1, ", min=", minh1
    ))
  } else {
    nolinksNeighbors <- apply(adjmat1, 1, computeLinksInNeighbors, imatrix = adjmat1)
    subTerm <- apply(adjmat1, 1, computeSqDiagSum, vec = diag(adjmat1))
    plainsum <- apply(adjmat1, 1, sum)
    squaresum <- apply(adjmat1^2, 1, sum)
    total.edge <- plainsum^2 - squaresum
    CChelp <- rep(-666, no.nodes)
    CChelp <- ifelse(total.edge == 0, 0, (nolinksNeighbors - subTerm) / total.edge)
    CChelp
  }
} # end of function


# ===================================================
# The function err.bp  is used to create error bars in a barplot
# usage: err.bp(as.vector(means), as.vector(stderrs), two.side=F)
.err.bp <- function(daten, error, two.side = F) {
  if (!is.numeric(daten)) {
    stop("All arguments must be numeric")
  }
  if (is.vector(daten)) {
    xval <- (cumsum(c(0.7, rep(1.2, length(daten) - 1))))
  } else {
    if (is.matrix(daten)) {
      xval <- cumsum(array(c(1, rep(0, dim(daten)[1] - 1)),
        dim = c(1, length(daten))
      )) + 0:(length(daten) - 1) + .5
    } else {
      stop("First argument must either be a vector or a matrix")
    }
  }
  MW <- 0.25 * (max(xval) / length(xval))
  ERR1 <- daten + error
  ERR2 <- daten - error
  for (i in 1:length(daten)) {
    segments(xval[i], daten[i], xval[i], ERR1[i])
    segments(xval[i] - MW, ERR1[i], xval[i] + MW, ERR1[i])
    if (two.side) {
      segments(xval[i], daten[i], xval[i], ERR2[i])
      segments(xval[i] - MW, ERR2[i], xval[i] + MW, ERR2[i])
    }
  }
}



# ========================================================================================

conformityBasedNetworkConcepts <- function(adj, GS = NULL) {
  if (!.is.adjmat(adj)) stop("The input matrix is not a valid adjacency matrix!")
  diag(adj) <- 0 # Therefore adj=A-I.
  if (dim(adj)[[1]] < 3) {
    stop("The adjacency matrix has fewer than 3 rows. This network is trivial and will not be evaluated.")
  }
  if (!is.null(GS)) {
    if (length(GS) != dim(adj)[[1]]) {
      stop(paste(
        "The length of the node significnce GS does not equal the number",
        "of rows of the adjcency matrix. length(GS) != dim(adj)[[1]]. \n",
        "Something is wrong with your input"
      ))
    }
  }

  ### Fundamental Network Concepts
  Size <- dim(adj)[1]
  Connectivity <- apply(adj, 2, sum)
  Density <- sum(Connectivity) / (Size * (Size - 1))
  Centralization <- Size * (max(Connectivity) - mean(Connectivity)) / ((Size - 1) * (Size - 2))
  Heterogeneity <- sqrt(Size * sum(Connectivity^2) / sum(Connectivity)^2 - 1)
  ClusterCoef <- .ClusterCoef.fun(adj)
  fMAR <- function(v) sum(v^2) / sum(v)
  MAR <- apply(adj, 1, fMAR)
  ### Conformity-Based Network Concepts
  Conformity <- .NPC.iterate(adj)$v1
  Factorizability <- 1 - sum((adj - outer(Conformity, Conformity) + diag(Conformity^2))^2) / sum(adj^2)
  Connectivity.CF <- sum(Conformity) * Conformity - Conformity^2
  Density.CF <- sum(Connectivity.CF) / (Size * (Size - 1))
  Centralization.CF <- Size * (max(Connectivity.CF) - mean(Connectivity.CF)) / ((Size - 1) * (Size - 2))
  Heterogeneity.CF <- sqrt(Size * sum(Connectivity.CF^2) / sum(Connectivity.CF)^2 - 1)
  # ClusterCoef.CF=.ClusterCoef.fun(outer(Conformity,Conformity)-diag(Conformity^2) )
  ClusterCoef.CF <- c(NA, Size)
  for (i in 1:Size) {
    ClusterCoef.CF[i] <- (sum(Conformity[-i]^2)^2 - sum(Conformity[-i]^4)) /
      (sum(Conformity[-i])^2 - sum(Conformity[-i]^2))
  }
  MAR.CF <- ifelse(sum(Conformity, na.rm = T) - Conformity == 0, NA,
    Conformity * (sum(Conformity^2, na.rm = T) - Conformity^2) / (sum(Conformity, na.rm = T) - Conformity)
  )

  ### Approximate Conformity-Based Network Concepts
  Connectivity.CF.App <- sum(Conformity) * Conformity
  Density.CF.App <- sum(Connectivity.CF.App) / (Size * (Size - 1))
  Centralization.CF.App <- Size * (max(Connectivity.CF.App) - mean(Connectivity.CF.App)) / ((Size - 1) * (Size - 2))
  Heterogeneity.CF.App <- sqrt(Size * sum(Connectivity.CF.App^2) / sum(Connectivity.CF.App)^2 - 1)

  if (sum(Conformity, na.rm = T) == 0) {
    warning(paste(
      "The sum of conformities equals zero.\n",
      "Maybe you used an input adjacency matrix with lots of zeroes?\n",
      "Specifically, sum(Conformity,na.rm=T)==0."
    ))
    MAR.CF.App <- rep(NA, Size)
    ClusterCoef.CF.App <- rep(NA, Size)
  } # end of if
  if (sum(Conformity, na.rm = T) != 0) {
    MAR.CF.App <- Conformity * sum(Conformity^2, na.rm = T) / sum(Conformity, na.rm = T)
    ClusterCoef.CF.App <- rep((sum(Conformity^2) / sum(Conformity))^2, Size)
  } # end of if
  output <- list(
    Factorizability = Factorizability,
    fundamentalNCs = list(
      ScaledConnectivity = Connectivity / max(Connectivity, na.rm = T),
      Connectivity = Connectivity,
      ClusterCoef = ClusterCoef,
      MAR = MAR,
      Density = Density,
      Centralization = Centralization,
      Heterogeneity = Heterogeneity
    ),
    conformityBasedNCs = list(
      Conformity = Conformity,
      Connectivity.CF = Connectivity.CF,
      ClusterCoef.CF = ClusterCoef.CF,
      MAR.CF = MAR.CF,
      Density.CF = Density.CF,
      Centralization.CF = Centralization.CF,
      Heterogeneity.CF = Heterogeneity.CF
    ),
    approximateConformityBasedNCs = list(
      Conformity = Conformity,
      Connectivity.CF.App = Connectivity.CF.App,
      ClusterCoef.CF.App = ClusterCoef.CF.App,
      MAR.CF.App = MAR.CF.App,
      Density.CF.App = Density.CF.App,
      Centralization.CF.App = Centralization.CF.App,
      Heterogeneity.CF.App = Heterogeneity.CF.App
    )
  )
  if (!is.null(GS)) {
    output$FundamentalNC$NetworkSignificance <- mean(GS, na.rm = T)
    K <- Connectivity / max(Connectivity)
    output$FundamentalNC$HubNodeSignificance <- sum(GS * K, na.rm = T) / sum(K^2, na.rm = T)
  }
  output
} # end of function



# ===================================================================================================

fundamentalNetworkConcepts <- function(adj, GS = NULL) {
  if (!.is.adjmat(adj)) stop("The input matrix is not a valid adjacency matrix!")
  diag(adj) <- 0 # Therefore adj=A-I.

  if (dim(adj)[[1]] < 3) stop("The adjacency matrix has fewer than 3 rows. This network is trivial and will not be evaluated.")

  if (!is.null(GS)) {
    if (length(GS) != dim(adj)[[1]]) {
      stop("The length of the node significnce GS does not
equal the number of rows of the adjcency matrix. length(GS) unequal dim(adj)[[1]]. GS should be a vector whosecomponents correspond to the nodes.")
    }
  }

  Size <- dim(adj)[1]

  ### Fundamental Network Concepts
  Connectivity <- apply(adj, 2, sum) # Within Module Connectivities
  Density <- sum(Connectivity) / (Size * (Size - 1))
  Centralization <- Size * (max(Connectivity) - mean(Connectivity)) / ((Size - 1) * (Size - 2))
  Heterogeneity <- sqrt(Size * sum(Connectivity^2) / sum(Connectivity)^2 - 1)
  ClusterCoef <- .ClusterCoef.fun(adj)
  fMAR <- function(v) sum(v^2) / sum(v)
  MAR <- apply(adj, 1, fMAR)
  ScaledConnectivity <- Connectivity / max(Connectivity, na.rm = T)

  output <- list(
    Connectivity = Connectivity,
    ScaledConnectivity = ScaledConnectivity,
    ClusterCoef = ClusterCoef, MAR = MAR,
    Density = Density, Centralization = Centralization,
    Heterogeneity = Heterogeneity
  )

  if (!is.null(GS)) {
    output$NetworkSignificance <- mean(GS, na.rm = T)
    output$HubNodeSignificance <- sum(GS * ScaledConnectivity, na.rm = T) / sum(ScaledConnectivity^2, na.rm = T)
  }
  output
} # end of function

# ==========================================================================================================
#
# Density function
#
# ==========================================================================================================