R/TMFG.R

#' Triangulated Maximally Filtered Graph
#' @description Applies the Triangulated Maximally Filtered Graph (TMFG) filtering method
#' (\strong{Please see and cite Massara et al., 2016}). The TMFG method uses a structural
#' constraint that limits the number of zero-order correlations included in the network
#' (3n - 6; where \emph{n} is the number of variables). The TMFG algorithm begins by
#' identifying four variables which have the largest sum of correlations to all other
#' variables. Then, it iteratively adds each variable with the largest sum of three
#' correlations to nodes already in the network until all variables have been added to
#' the network. This structure can be associated with the inverse correlation matrix
#' (i.e., precision matrix) to be turned into a GGM (i.e., partial correlation network)
#' by using \code{\link[NetworkToolbox]{LoGo}}. See Details for more information on this
#' network estimation method.
#' 
#' @param data Can be a dataset or a correlation matrix
#' 
#' @param normal Should data be transformed to a normal distribution?
#' Input must be a dataset.
#' Defaults to \code{TRUE}.
#' Computes correlations using the \code{\link[qgraph]{cor_auto}} function.
#' Set to \code{FALSE} for Pearson's correlation
#' 
#' @param na.data How should missing data be handled?
#' For \code{"listwise"} deletion the \code{\link{na.omit}} function is applied.
#' Set to \code{"fiml"} for Full Information Maximum Likelihood (\code{\link[psych]{corFiml}}).
#' Full Information Maximum Likelihood is \strong{recommended} but time consuming
#' 
#' @param depend Is network a dependency (or directed) network?
#' Defaults to \code{FALSE}.
#' Set to \code{TRUE} to generate a TMFG-filtered dependency network
#' (output obtained from the \code{\link[NetworkToolbox]{depend}} function)
#' 
#' @return Returns a list containing:
#' 
#' \item{A}{The filtered adjacency matrix}
#' 
#' \item{separators}{The separators (3-cliques) in the network
#' (wrapper output for \code{\link[NetworkToolbox]{LoGo}})}
#' 
#' \item{cliques}{The cliques (4-cliques) in the network
#' (wrapper output for \code{\link[NetworkToolbox]{LoGo}})}
#' 
#' @details The TMFG method applies a structural constraint on the network,
#' which restrains the network to retain a certain number of edges (3\emph{n}-6, where \emph{n}
#' is the number of nodes; Massara et al., 2016). The network is also composed of 3- and 4-node
#' cliques (i.e., sets of connected nodes; a triangle and tetrahedron, respectively). The
#' TMFG method constructs a network using zero-order correlations and the resulting network
#' can be associated with the inverse covariance matrix
#' (yielding a GGM; Barfuss, Massara, Di Matteo, & Aste, 2016).
#' Notably, the TMFG can use any association measure and thus does not assume the data is multivariate normal.
#' 
#' Construction begins by forming a tetrahedron of the four nodes that have
#' the highest sum of correlations that are greater than the average correlation in the
#' correlation matrix. Next, the algorithm iteratively identifies the node that maximizes
#' its sum of correlations to a connected set of three nodes (triangles) already included
#' in the network and then adds that node to the network. The process is completed once
#' every node is connected in the network. In this process, the network automatically
#' generates what’s called a planar network. A planar network is a network that could be
#' drawn on a sphere with no edges crossing (often, however, the networks are depicted
#' with edges crossing; Tumminello, Aste, Di Matteo, & Mantegna, 2005).
#' 
#' @examples
#' # Pearson's correlation only for CRAN checks
#' A <- TMFG(neoOpen, normal = FALSE)$A
#' 
#' @references 
#' Christensen, A. P., Kenett, Y. N., Aste, T., Silvia, P. J., & Kwapil, T. R. (2018).
#' Network structure of the Wisconsin Schizotypy Scales-Short Forms: Examining psychometric network filtering approaches.
#' \emph{Behavior Research Methods}, \emph{50}, 2531-2550.
#' 
#' Massara, G. P., Di Matteo, T., & Aste, T. (2016).
#' Network filtering for big data: Triangulated maximally filtered graph.
#' \emph{Journal of Complex Networks}, \emph{5}, 161-178.
#' 
#' @author Alexander Christensen <alexpaulchristensen@gmail.com>
#' 
#' @importFrom stats cor sd runif qt na.action qchisq
#' @importFrom utils capture.output
#' 
#' @export
#TMFG Filtering Method----
TMFG <-function (data, normal = TRUE,
                 na.data = c("pairwise","listwise","fiml","none"),
                 depend = FALSE)
{
    #missing data handling
    if(missing(na.data))
    {
        if(any(is.na(data)))
        {stop("Missing values were detected! Set 'na.data' argument")
        }else{na.data<-"none"}
    }else{na.data<-match.arg(na.data)}
    
    if(na.data=="pairwise")
    {
        if(normal)
        {cormat<-qgraph::cor_auto(data,missing=na.data)
        }else{cormat<-cor(data,use="pairwise.complete.obs")}
    }else if(na.data=="listwise")
    {
        if(normal)
        {cormat<-qgraph::cor_auto(data,missing=na.data)
        }else{
            rem<-na.action(na.omit(data))
            warning(paste(length(na.action(na.omit(data)))),
                    " rows were removed for missing data\nrow(s): ",
                    paste(na.action(na.omit(data)),collapse = ", "))
            data<-na.omit(data)
        }
    }else if(na.data=="fiml")
    {
        if(normal)
        {cormat<-qgraph::cor_auto(data,missing=na.data)
        }else{cormat<-psych::corFiml(data)}
    }else if(na.data=="none")
    {
        if(nrow(data)==ncol(data)){cormat<-data
        }else if(normal){cormat<-qgraph::cor_auto(data)
        }else{cormat<-cor(data)}
    }
    
    # Number of nodes
    n <- ncol(cormat)
    
    # Signed correlations
    tcormat <- cormat
    cormat <- abs(cormat)
    
    # Let user know matrix is too small for TMFG estimation
    # It is still okay to proceed
    if(n < 9)
    {print("Matrix is too small")}

    # Initialize sparse edge matrix
    nodeTO <- sort(c(rep(1:n,n)))
    nodeFROM <- c(rep(1:n,n))
    nodeWEIGHT <- as.vector(cormat)
    
    # Initialize matrices
    M <- cbind(nodeTO, nodeFROM, nodeWEIGHT) # sparse node-weight matrix
    in_v <- matrix(nrow=nrow(cormat), ncol=1) # inserted vertices matrix
    ou_v <- matrix(nrow=nrow(cormat), ncol=1) # not yet inserted vertices matrix
    tri <- matrix(nrow=((2*n)-4), ncol=3) # triangles matrix
    separators <- matrix(nrow=n-4, ncol=3) # matrix of 3-cliques (non-face triangles)
    
    # Find 3 vertices with largest strength
    s <- rowSums(cormat*(cormat > mean(matrix(unlist(cormat), nrow=1)))*1)
    
    # Order vertices with largest strength
    # and grab the top 4
    in_v[1:4] <- order(s,decreasing=TRUE)[1:4]
    
    # Set aside nodes that are not in the top 4
    ou_v <- setdiff(1:nrow(in_v),in_v)
    
    # Build tetrahedron with the largest strength
    ## Insert triangles
    tri[1,]<-in_v[1:3,]
    tri[2,]<-in_v[2:4,]
    tri[3,]<-in_v[c(1,2,4),]
    tri[4,]<-in_v[c(1,3,4),]
    
    # Initialize sparse TMFG matrix
    S <- matrix(nrow=(3*nrow(cormat)-6),ncol=3)
    
    # Algorithm for traditional or dependency network
    if(!depend)
    {
        S[1,] <- c(in_v[1],in_v[2],1)
        S[2,] <- c(in_v[1],in_v[3],1)
        S[3,] <- c(in_v[1],in_v[4],1)
        S[4,] <- c(in_v[2],in_v[3],1)
        S[5,] <- c(in_v[2],in_v[4],1)
        S[6,] <- c(in_v[3],in_v[4],1)
    }else{
        
        # Determine appropriate order for directionality in dependency network
        ## Node 1 and 2
        if(cormat[in_v[1],in_v[2]]>cormat[in_v[2],in_v[1]])
            {S[1,]<-c(in_v[1],in_v[2],1)
        }else{S[1,]<-c(in_v[2],in_v[1],1)}
        
        ## Node 1 and 3
        if(cormat[in_v[1],in_v[3]]>cormat[in_v[3],in_v[1]])
        {S[2,]<-c(in_v[1],in_v[3],1)
        }else{S[2,]<-c(in_v[3],in_v[1],1)}
        
        ## Node 1 and 4
        if(cormat[in_v[1],in_v[4]]>cormat[in_v[4],in_v[1]])
        {S[3,]<-c(in_v[1],in_v[4],1)
        }else{S[3,]<-c(in_v[4],in_v[1],1)}
        
        ## Node 2 and 3
        if(cormat[in_v[2],in_v[3]]>cormat[in_v[3],in_v[2]])
        {S[4,]<-c(in_v[2],in_v[3],1)
        }else{S[4,]<-c(in_v[3],in_v[2],1)}
        
        ## Node 2 and 4
        if(cormat[in_v[2],in_v[4]]>cormat[in_v[4],in_v[2]])
        {S[5,]<-c(in_v[2],in_v[4],1)
        }else{S[5,]<-c(in_v[4],in_v[2],1)}
        
        ## Node 3 and 4
        if(cormat[in_v[3],in_v[4]]>cormat[in_v[4],in_v[3]])
        {S[6,]<-c(in_v[3],in_v[4],1)
        }else{S[6,]<-c(in_v[4],in_v[3],1)}
    }
    
    #build initial gain table
    gain <- matrix(-Inf,nrow=n,ncol=(2*(n-2)))
    gain[ou_v,1] <- rowSums(cormat[ou_v,(tri[1,])])
    gain[ou_v,2] <- rowSums(cormat[ou_v,(tri[2,])])
    gain[ou_v,3] <- rowSums(cormat[ou_v,(tri[3,])])
    gain[ou_v,4] <- rowSums(cormat[ou_v,(tri[4,])])
    
    ntri <- 4 #number of triangles
    gij <- matrix(nrow=1,ncol=ncol(gain))
    v <- matrix(nrow=1,ncol=ncol(gain))
    ve <- array()
    tr <- 0
    for(e in 5:n)
    {
        if(length(ou_v)==1){
            ve<-ou_v
            v<-1
            w<-1
            tr<-which.max(gain[ou_v,])
        }else{
            for(q in 1:ncol(gain))
            {
                gij[,q] <- max(gain[ou_v,q])
                v[,q] <- which.max(gain[ou_v,q])
                tr <- which.max(gij)
            }
            
            ve <- ou_v[v[tr]]
            w <- v[tr]
        }
        
        #update vertex lists
        ou_v<-ou_v[-w]
        in_v[e]<-ve
        
        #update adjacency matrix
        for(u in 1:length(tri[tr,]))
        {
            cou<-6+((3*(e-5))+u)
            if(depend){
                if(cormat[ve,tri[tr,u]]>cormat[tri[tr,u],ve]){
                    S[cou,]<-cbind(ve,tri[tr,u],1)   
                }else{S[cou,]<-cbind(tri[tr,u],ve,1)}}else
                    S[cou,]<-cbind(ve,tri[tr,u],1)
        }
        
        #update 3-clique list
        separators[e-4,]<-tri[tr,]
        #update triangle list replacing 1 and adding 2 triangles
        tri[ntri+1,]<-cbind(rbind(tri[tr,c(1,3)]),ve)
        tri[ntri+2,]<-cbind(rbind(tri[tr,c(2,3)]),ve)
        tri[tr,]<-cbind(rbind(tri[tr,c(1,2)]),ve)
        #update gain table
        gain[ve,]<-0
        gain[ou_v,tr]<-rowSums(cormat[ou_v,tri[tr,],drop=FALSE])
        gain[ou_v,ntri+1]<-rowSums(cormat[ou_v,tri[ntri+1,],drop=FALSE])
        gain[ou_v,ntri+2]<-rowSums(cormat[ou_v,tri[ntri+2,],drop=FALSE])
        
        #update triangles
        ntri<-ntri+2
    }
    cliques<-rbind(in_v[1:4],(cbind(separators,in_v[5:ncol(cormat)])))
    
    L<-S
    if(depend)
    {W<-matrix(1:nrow(cormat),nrow=nrow(cormat),ncol=1)
    X<-matrix(1:nrow(cormat),nrow=nrow(cormat),ncol=1)
    Y<-matrix(0,nrow=nrow(cormat),ncol=1)
    Z<-cbind(W,X,Y)
    K<-rbind(L,Z)
    }else{
        L[,1]<-S[,2]
        L[,2]<-S[,1]
        K<-rbind(S,L)
    }
    
    x <- matrix(0, nrow = ncol(cormat), ncol = ncol(cormat))
    
    for(i in 1:nrow(K))
    {
        x[K[i,1], K[i,2]] <- 1
        x[K[i,2], K[i,1]] <- 1
    }
    
    diag(x)<-1
    
    for(r in 1:nrow(x))
        for(z in 1:ncol(x))
        {if(x[r,z]==1){x[r,z]<-tcormat[r,z]}}
    
    colnames(x)<-colnames(data)
    x <- as.data.frame(x)
    row.names(x)<-colnames(x)
    x <- as.matrix(x)
    
    return(list(A=x, separators=separators, cliques=cliques))
}
#----
AlexChristensen/NetworkToolbox documentation built on March 6, 2023, 5:08 p.m.