In dilernia/rcm: Implemention of Random Covariance Models

knitr::opts_chunk$set(echo = TRUE, eval = TRUE, fig.align="center",
                      message = F, warning = F)

This document walks through example simulation code used to produce the results obtained in 'A random covariance model for bi-level graphical modeling with application to resting-state fMRI data' submitted to the journal Biometrics. First, we install and load the R package for the submitted manuscript:

# Install package
if("rcm" %in% installed.packages() == FALSE) {
devtools::install_github("dilernia/rcm")
}

# Load rcm package
library(rcm)

# Loading other packages
library(tidyverse, quietly = T)
library(kableExtra, quietly = T)

Simulating Data {.tabset .tabset-fade}

We simulate an example data set for $K=8$ total subjects. For each subject, we generate $n=50$ observations for $p=100$ variables with approximately $\rho = 20\%$ of network connections differing from their shared group-level network.

set.seed(123456)
# file to specify graphs G and data for simulation study 
# generate individual variants with rho=0.2 and errors [-0.1,0.1] for common edges

rm(list=ls())

p = 100

# common graph 
G = diag(0,p)

G[1,2:14] <- G[12,15] <- G[13,16] <- G[14,17] <- G[17,18] <- 1
G[1,19] <- G[19,20] <- G[19,21] <- G[21,22:23] <- G[23,24] <- 1
G[1,25] <- G[25,26] <- 1

G[26,27:33] <- G[33,34:35] <- 1
G[26,36] <- G[36,37:39] <- G[38,40] <- G[39,41] <- 1
G[26,42] <- G[42,43:44] <- G[44,45] <- 1
G[26,46] <- G[46,47] <- G[47,48] <- G[48,49:52] <- G[52,53] <- 1
G[26,54] <- 1

G[54,55:62] <- G[59,63] <- G[60,64] <- G[61,65] <- G[62,66] <- 1
G[54,67] <- G[67,68:72] <- 1
G[54,73] <- G[73,74:76] <- G[76,77] <- 1
G[54,78] <- 1

G[78,79:82] <- G[81,83] <- G[82,84:85] <- 1
G[78,86] <- 1
G[86,87:92] <- G[90,93] <- G[91,94] <- G[92,95:96] <- 1
G[86,97] <- G[97,98:99] <- G[99,100] <- 1


# library(igraph)
# g0 = graph.adjacency(G,mode="undirected")
# tkplot(g0, layout=layout.kamada.kawai,vertex.label=rep('',p),vertex.size=5,
#              vertex.color="gray80", canvas.width=650, canvas.height=600 )



temp <- diag(0.5,p) + G*matrix(runif(p*p,0.5,1)*sample(c(-1,1),p*p,replace=TRUE),p,p)
temp2 <- temp + t(temp) + diag(rowSums(G+t(G))/2)
any(eigen(temp2,only.values=T)$values <= 0)
Omega0.true <- diag(diag(temp2)^-0.5) %*% temp2 %*% diag(diag(temp2)^-0.5)

# plot(sort(Omega0.true[Omega0.true != 0 & Omega0.true < 0.99]))


# add/subtract individual specific edges

A = diag(0,p); A[upper.tri(A)] = 1; A = A-G;
ind.0 <- which(A==1)
ind.1 <- which(G==1)

K = 8 
Gk <- Omegas.true <- array(0,c(p,p,K))

dif.p <- 0.2 
es <- 0.1

# Generating mechanism 1
for(k in 1:K) {

    ind.ak <- sample(ind.0, round(sum(G)*dif.p))
    ind.dk <- sample(ind.1, round(sum(G)*dif.p))
    ind.sm <- setdiff(ind.1,ind.dk)
    G.ak <- G.dk <- G.sm <- matrix(0,p,p)
    G.ak[ind.ak] <- 1
    G.dk[ind.dk] <- 1
    G.sm[ind.sm] <- 1

    temp.k = matrix(0,p,p)
    while(any(eigen(temp.k)$values <= 0)) {

        temp.ak <- G.ak*matrix(runif(p*p,0.5,1)*sample(c(-1,1),p*p,replace=TRUE),p,p)
        temp.dk <- G.dk*temp
        temp.sm <- G.sm*matrix(runif(p*p,0,es)*sample(c(-1,1),p*p,replace=TRUE),p,p)
        temp.k <- temp + t(temp) - temp.dk - t(temp.dk) + temp.ak + t(temp.ak) + temp.sm + t(temp.sm) + 
            diag(rowSums(G+t(G)+G.ak+t(G.ak)-G.dk-t(G.dk))/2)
    }
    Omegas.true[,,k] <- diag(diag(temp.k)^-0.5) %*% temp.k %*% diag(diag(temp.k)^-0.5)

    Gk[,,k] <- G + G.ak -G.dk

    cat(k,"is done \n")
}

G.true <- G
Gk.true <- Gk



n <- 50

sim.dat <- rep(list(NA),K)
for(k in 1:K) sim.dat[[k]] <- matrix(rnorm(n*p),n,p) %*% chol(solve(Omegas.true[,,k]))

Analysis {.tabset .tabset-fade}

We now select the optimal sets of tuning parameters using BIC and a modified BIC and analyze our simulated data set.

# parameters to tune: lambda1, lambda2, and lambda3
lam2.cand = 100 ; lam3.cand = 0.0001
    lam1.cand = c(0.0002,0.0006,0.0008,seq(0.001,0.006,by=0.001),0.008,0.01,0.015,0.02)
lams.cand <- expand.grid(lam1.cand,lam2.cand,lam3.cand)


lam2.cand = 70 ; lam3.cand = 0.0001
    lam1.cand = c(0.0002,0.0006,0.0008,seq(0.001,0.006,by=0.001),0.008,0.01,0.015,0.02)
lams.cand <- rbind(lams.cand,expand.grid(lam1.cand,lam2.cand,lam3.cand))


lam2.cand = 40 ; lam3.cand = 0.001
    lam1.cand = c(0.0002,0.0006,0.0008,seq(0.001,0.006,by=0.001),0.008,0.01,0.015,0.02)
lams.cand <- rbind(lams.cand,expand.grid(lam1.cand,lam2.cand,lam3.cand))


colnames(lams.cand) <- c('lam1','lam2','lam3')
nlams <- nrow(lams.cand)


x <- sim.dat

K = length(x); p = ncol(x[[1]])

uptri.mat0 <- diag(0,p); uptri.mat0[upper.tri(diag(p))] <- 1
uptri.matk <- array(uptri.mat0,c(p,p,K))


Omega0.est <- array(NA,c(p,p,nlams))
Omegas.est <- array(NA,c(p,p,K,nlams))
bic.rc <- mbic.rc<- array(NA,nlams)


for(j in 1:nlams) {


    # regularizing parameters
    lam1 <- lams.cand[j,1]*(1+lams.cand[j,2])
    lam2 <- lams.cand[j,2]*K;
    lam3 <- lams.cand[j,3]


    # estimation
    est <- rcm::randCov(x,lam1,lam2,lam3)
    Omega0 <- est$Omega0
    Omegas <- est$Omegas

    Omega0.est[,,j] <- Omega0
    Omegas.est[,,,j] <- Omegas

    # calculate BIC
    bic.rc[j] <- rcm::bic_cal(x,Omegas)
    mbic.rc[j] <- rcm::mbic_cal(x,Omega0,Omegas,lams.cand[j,2])

    # keep track
    if(j < nlams & lams.cand[j+1,2] != lams.cand[j,2]) cat('lam2 =',lams.cand[j,2],'is done \n')


}  # end of loop for all lambdas

Omega0.rc <- Omega0.est[,,which.min(mbic.rc)]
Omegas.rc <- Omegas.est[,,,which.min(mbic.rc)]

Gk.rc <- (round(Omegas.rc,3) != 0) - array(diag(p),c(p,p,K))
G0.rc <- (round(Omega0.rc,3) != 0) - diag(p)

uptri.mat0 <- diag(0,p); uptri.mat0[upper.tri(diag(p))] <- 1
uptri.matk <- array(uptri.mat0,c(p,p,K))

TPRk <- sum( Gk.rc*uptri.matk & Gk.true ) / sum(Gk.true)
FPRk <- sum( Gk.rc*uptri.matk & (uptri.matk-Gk.true) ) / sum(uptri.matk-Gk.true)
TPRg <- sum( G0.rc*uptri.mat0 & G.true ) / sum(G.true)
FPRg <- sum( G0.rc*uptri.mat0 & (uptri.mat0-G.true) ) /sum(uptri.mat0-G.true)

print(c(TPRk,FPRk,TPRg,FPRg))