Nothing
#' Triangulated Maximally Filtered Graph
#' @description Applies the Triangulated Maximally Filtered Graph (TMFG) filtering method
#' (\strong{Please see and cite Massara et al., 2016}). The TMFG method uses a structural
#' constraint that limits the number of zero-order correlations included in the network
#' (3n - 6; where \emph{n} is the number of variables). The TMFG algorithm begins by
#' identifying four variables which have the largest sum of correlations to all other
#' variables. Then, it iteratively adds each variable with the largest sum of three
#' correlations to nodes already in the network until all variables have been added to
#' the network. This structure can be associated with the inverse correlation matrix
#' (i.e., precision matrix) to be turned into a GGM (i.e., partial correlation network)
#' by using \code{\link[NetworkToolbox]{LoGo}}. See Details for more information on this
#' network estimation method.
#'
#' @param data Can be a dataset or a correlation matrix
#'
#' @param normal Should data be transformed to a normal distribution?
#' Input must be a dataset.
#' Defaults to \code{TRUE}.
#' Computes correlations using the \code{\link[qgraph]{cor_auto}} function.
#' Set to \code{FALSE} for Pearson's correlation
#'
#' @param na.data How should missing data be handled?
#' For \code{"listwise"} deletion the \code{\link{na.omit}} function is applied.
#' Set to \code{"fiml"} for Full Information Maximum Likelihood (\code{\link[psych]{corFiml}}).
#' Full Information Maximum Likelihood is \strong{recommended} but time consuming
#'
#' @param depend Is network a dependency (or directed) network?
#' Defaults to \code{FALSE}.
#' Set to \code{TRUE} to generate a TMFG-filtered dependency network
#' (output obtained from the \code{\link[NetworkToolbox]{depend}} function)
#'
#' @return Returns a list containing:
#'
#' \item{A}{The filtered adjacency matrix}
#'
#' \item{separators}{The separators (3-cliques) in the network
#' (wrapper output for \code{\link[NetworkToolbox]{LoGo}})}
#'
#' \item{cliques}{The cliques (4-cliques) in the network
#' (wrapper output for \code{\link[NetworkToolbox]{LoGo}})}
#'
#' @details The TMFG method applies a structural constraint on the network,
#' which restrains the network to retain a certain number of edges (3\emph{n}-6, where \emph{n}
#' is the number of nodes; Massara et al., 2016). The network is also composed of 3- and 4-node
#' cliques (i.e., sets of connected nodes; a triangle and tetrahedron, respectively). The
#' TMFG method constructs a network using zero-order correlations and the resulting network
#' can be associated with the inverse covariance matrix
#' (yielding a GGM; Barfuss, Massara, Di Matteo, & Aste, 2016).
#' Notably, the TMFG can use any association measure and thus does not assume the data is multivariate normal.
#'
#' Construction begins by forming a tetrahedron of the four nodes that have
#' the highest sum of correlations that are greater than the average correlation in the
#' correlation matrix. Next, the algorithm iteratively identifies the node that maximizes
#' its sum of correlations to a connected set of three nodes (triangles) already included
#' in the network and then adds that node to the network. The process is completed once
#' every node is connected in the network. In this process, the network automatically
#' generates what’s called a planar network. A planar network is a network that could be
#' drawn on a sphere with no edges crossing (often, however, the networks are depicted
#' with edges crossing; Tumminello, Aste, Di Matteo, & Mantegna, 2005).
#'
#' @examples
#' # Pearson's correlation only for CRAN checks
#' A <- TMFG(neoOpen, normal = FALSE)$A
#'
#' @references
#' Christensen, A. P., Kenett, Y. N., Aste, T., Silvia, P. J., & Kwapil, T. R. (2018).
#' Network structure of the Wisconsin Schizotypy Scales-Short Forms: Examining psychometric network filtering approaches.
#' \emph{Behavior Research Methods}, \emph{50}, 2531-2550.
#'
#' Massara, G. P., Di Matteo, T., & Aste, T. (2016).
#' Network filtering for big data: Triangulated maximally filtered graph.
#' \emph{Journal of Complex Networks}, \emph{5}, 161-178.
#'
#' @author Alexander Christensen <alexpaulchristensen@gmail.com>
#'
#' @importFrom stats cor sd runif qt na.action qchisq
#' @importFrom utils capture.output
#'
#' @export
#TMFG Filtering Method----
TMFG <-function (data, normal = TRUE,
na.data = c("pairwise","listwise","fiml","none"),
depend = FALSE)
{
#missing data handling
if(missing(na.data))
{
if(any(is.na(data)))
{stop("Missing values were detected! Set 'na.data' argument")
}else{na.data<-"none"}
}else{na.data<-match.arg(na.data)}
if(na.data=="pairwise")
{
if(normal)
{cormat<-qgraph::cor_auto(data,missing=na.data)
}else{cormat<-cor(data,use="pairwise.complete.obs")}
}else if(na.data=="listwise")
{
if(normal)
{cormat<-qgraph::cor_auto(data,missing=na.data)
}else{
rem<-na.action(na.omit(data))
warning(paste(length(na.action(na.omit(data)))),
" rows were removed for missing data\nrow(s): ",
paste(na.action(na.omit(data)),collapse = ", "))
data<-na.omit(data)
}
}else if(na.data=="fiml")
{
if(normal)
{cormat<-qgraph::cor_auto(data,missing=na.data)
}else{cormat<-psych::corFiml(data)}
}else if(na.data=="none")
{
if(nrow(data)==ncol(data)){cormat<-data
}else if(normal){cormat<-qgraph::cor_auto(data)
}else{cormat<-cor(data)}
}
# Number of nodes
n <- ncol(cormat)
# Signed correlations
tcormat <- cormat
cormat <- abs(cormat)
# Let user know matrix is too small for TMFG estimation
# It is still okay to proceed
if(n < 9)
{print("Matrix is too small")}
# Initialize sparse edge matrix
nodeTO <- sort(c(rep(1:n,n)))
nodeFROM <- c(rep(1:n,n))
nodeWEIGHT <- as.vector(cormat)
# Initialize matrices
M <- cbind(nodeTO, nodeFROM, nodeWEIGHT) # sparse node-weight matrix
in_v <- matrix(nrow=nrow(cormat), ncol=1) # inserted vertices matrix
ou_v <- matrix(nrow=nrow(cormat), ncol=1) # not yet inserted vertices matrix
tri <- matrix(nrow=((2*n)-4), ncol=3) # triangles matrix
separators <- matrix(nrow=n-4, ncol=3) # matrix of 3-cliques (non-face triangles)
# Find 3 vertices with largest strength
s <- rowSums(cormat*(cormat > mean(matrix(unlist(cormat), nrow=1)))*1)
# Order vertices with largest strength
# and grab the top 4
in_v[1:4] <- order(s,decreasing=TRUE)[1:4]
# Set aside nodes that are not in the top 4
ou_v <- setdiff(1:nrow(in_v),in_v)
# Build tetrahedron with the largest strength
## Insert triangles
tri[1,]<-in_v[1:3,]
tri[2,]<-in_v[2:4,]
tri[3,]<-in_v[c(1,2,4),]
tri[4,]<-in_v[c(1,3,4),]
# Initialize sparse TMFG matrix
S <- matrix(nrow=(3*nrow(cormat)-6),ncol=3)
# Algorithm for traditional or dependency network
if(!depend)
{
S[1,] <- c(in_v[1],in_v[2],1)
S[2,] <- c(in_v[1],in_v[3],1)
S[3,] <- c(in_v[1],in_v[4],1)
S[4,] <- c(in_v[2],in_v[3],1)
S[5,] <- c(in_v[2],in_v[4],1)
S[6,] <- c(in_v[3],in_v[4],1)
}else{
# Determine appropriate order for directionality in dependency network
## Node 1 and 2
if(cormat[in_v[1],in_v[2]]>cormat[in_v[2],in_v[1]])
{S[1,]<-c(in_v[1],in_v[2],1)
}else{S[1,]<-c(in_v[2],in_v[1],1)}
## Node 1 and 3
if(cormat[in_v[1],in_v[3]]>cormat[in_v[3],in_v[1]])
{S[2,]<-c(in_v[1],in_v[3],1)
}else{S[2,]<-c(in_v[3],in_v[1],1)}
## Node 1 and 4
if(cormat[in_v[1],in_v[4]]>cormat[in_v[4],in_v[1]])
{S[3,]<-c(in_v[1],in_v[4],1)
}else{S[3,]<-c(in_v[4],in_v[1],1)}
## Node 2 and 3
if(cormat[in_v[2],in_v[3]]>cormat[in_v[3],in_v[2]])
{S[4,]<-c(in_v[2],in_v[3],1)
}else{S[4,]<-c(in_v[3],in_v[2],1)}
## Node 2 and 4
if(cormat[in_v[2],in_v[4]]>cormat[in_v[4],in_v[2]])
{S[5,]<-c(in_v[2],in_v[4],1)
}else{S[5,]<-c(in_v[4],in_v[2],1)}
## Node 3 and 4
if(cormat[in_v[3],in_v[4]]>cormat[in_v[4],in_v[3]])
{S[6,]<-c(in_v[3],in_v[4],1)
}else{S[6,]<-c(in_v[4],in_v[3],1)}
}
#build initial gain table
gain <- matrix(-Inf,nrow=n,ncol=(2*(n-2)))
gain[ou_v,1] <- rowSums(cormat[ou_v,(tri[1,])])
gain[ou_v,2] <- rowSums(cormat[ou_v,(tri[2,])])
gain[ou_v,3] <- rowSums(cormat[ou_v,(tri[3,])])
gain[ou_v,4] <- rowSums(cormat[ou_v,(tri[4,])])
ntri <- 4 #number of triangles
gij <- matrix(nrow=1,ncol=ncol(gain))
v <- matrix(nrow=1,ncol=ncol(gain))
ve <- array()
tr <- 0
for(e in 5:n)
{
if(length(ou_v)==1){
ve<-ou_v
v<-1
w<-1
tr<-which.max(gain[ou_v,])
}else{
for(q in 1:ncol(gain))
{
gij[,q] <- max(gain[ou_v,q])
v[,q] <- which.max(gain[ou_v,q])
tr <- which.max(gij)
}
ve <- ou_v[v[tr]]
w <- v[tr]
}
#update vertex lists
ou_v<-ou_v[-w]
in_v[e]<-ve
#update adjacency matrix
for(u in 1:length(tri[tr,]))
{
cou<-6+((3*(e-5))+u)
if(depend){
if(cormat[ve,tri[tr,u]]>cormat[tri[tr,u],ve]){
S[cou,]<-cbind(ve,tri[tr,u],1)
}else{S[cou,]<-cbind(tri[tr,u],ve,1)}}else
S[cou,]<-cbind(ve,tri[tr,u],1)
}
#update 3-clique list
separators[e-4,]<-tri[tr,]
#update triangle list replacing 1 and adding 2 triangles
tri[ntri+1,]<-cbind(rbind(tri[tr,c(1,3)]),ve)
tri[ntri+2,]<-cbind(rbind(tri[tr,c(2,3)]),ve)
tri[tr,]<-cbind(rbind(tri[tr,c(1,2)]),ve)
#update gain table
gain[ve,]<-0
gain[ou_v,tr]<-rowSums(cormat[ou_v,tri[tr,],drop=FALSE])
gain[ou_v,ntri+1]<-rowSums(cormat[ou_v,tri[ntri+1,],drop=FALSE])
gain[ou_v,ntri+2]<-rowSums(cormat[ou_v,tri[ntri+2,],drop=FALSE])
#update triangles
ntri<-ntri+2
}
cliques<-rbind(in_v[1:4],(cbind(separators,in_v[5:ncol(cormat)])))
L<-S
if(depend)
{W<-matrix(1:nrow(cormat),nrow=nrow(cormat),ncol=1)
X<-matrix(1:nrow(cormat),nrow=nrow(cormat),ncol=1)
Y<-matrix(0,nrow=nrow(cormat),ncol=1)
Z<-cbind(W,X,Y)
K<-rbind(L,Z)
}else{
L[,1]<-S[,2]
L[,2]<-S[,1]
K<-rbind(S,L)
}
x <- matrix(0, nrow = ncol(cormat), ncol = ncol(cormat))
for(i in 1:nrow(K))
{
x[K[i,1], K[i,2]] <- 1
x[K[i,2], K[i,1]] <- 1
}
diag(x)<-1
for(r in 1:nrow(x))
for(z in 1:ncol(x))
{if(x[r,z]==1){x[r,z]<-tcormat[r,z]}}
colnames(x)<-colnames(data)
x <- as.data.frame(x)
row.names(x)<-colnames(x)
x <- as.matrix(x)
return(list(A=x, separators=separators, cliques=cliques))
}
#----
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.