In thebrisklab/singR: Simultaneous Non-Gaussian Component Analysis

singR

singR package is built on SING method https://github.com/thebrisklab/SING. SING is used to extract joint and individual non-gaussian components from different datasets. This is a tutorial example supporting the paper Simultaneous Non-Gaussian Component Analysis (SING) for Data Integration in Neuroimaging Benjamin Risk, Irina Gaynanova https://projecteuclid.org/journals/annals-of-applied-statistics/volume-15/issue-3/Simultaneous-non-Gaussian-component-analysis-SING-for-data-integration-in/10.1214/21-AOAS1466.full

Installation

You can install singR from github with:

library(devtools)
install_github("thebrisklab/singR")

If you want to install it on Mac OS, the installation tips is here:

1.Make sure all the R packages used in singR are updated to the recommended version.

2.Try to install singR, if there is an error saying:

ld: warning: directory not found for option '-L/opt/R/arm64/gfortran/lib/gcc/aarch64-apple-darwin20.2.0/11.0.0'
ld: warning: directory not found for option '-L/opt/R/arm64/gfortran/lib'
ld: library not found for -lgfortran
clang: error: linker command failed with exit code 1 (use -v to see invocation)

Then go to: /Library/Frameworks/R.framework/Resources/etc/Makeconf Change FLIBS from

FLIBS =  -L/opt/R/arm64/gfortran/lib/gcc/aarch64-apple-darwin20.2.0/11.0.0 -L/opt/R/arm64/gfortran/lib -lgfortran -lemutls_w -lm

FLIBS =  -L/usr/local/gfortran/lib/gcc/aarch64-apple-darwin20.2.0/11.0.0 -L/usr/local/gfortran/lib -lgfortran -lm

3.Download the .tar.xz file from https://github.com/fxcoudert/gfortran-for-macOS/releases/tag/11-arm-alpha2 using the browser, unpack it under /usr/local so that the "gfortran" folder is there.

4.Install singR again, it should be installed successfully.

Quick start guide

Load package and data

for the quick start, we use a compressed version to accelerate the computation, which is a subpart of correlation matrix and 2k-resolution dtseries data.

# Load the package
library(singR)

# Read and visualize data
load("extdata/simdata.rda")

# It contains dX, dY, mj, sIx,sIy,sjx,sjy

## True Data and signchange
Sxtrue = t(simdata$sjx) #dim(Sxtrue) px x n
Sytrue = t(simdata$sjy)

Sxtrue = signchange(Sxtrue) #sign degree amplification
Sytrue = signchange(Sytrue)

Plot for true X component

This step needs ciftiTools package and connectome workbench, which can be found in https://github.com/mandymejia/ciftiTools

library(ciftiTools)
ciftiTools.setOption("wb_path", "C:/Software/workbench")

xii_template <- read_cifti("extdata/template.dtseries.nii", brainstructures=c("left", "right"),resamp_res = 2000) #the template cifti file is built in the package
# resample the template to 2k resolution
xii_new <- newdata_xifti(xii_template, Sxtrue)


view_xifti_surface(select_xifti(xii_new,1),zlim = c(-2.43,2.82)) # component1 true
view_xifti_surface(select_xifti(xii_new,2),zlim = c(-2.43,2.82)) # component2 true

knitr::include_graphics("fig/Truth_CompX.png")

#define plotNetwork_change
plotNetwork_change = function(component,title='',qmin=0.005, qmax=0.995, path = '~/Dropbox/JINGCA/Data/community_affiliation_mmpplus.csv',make.diag=NA) {
  # component:
  # vectorized network of length choose(n,2)
  require(ggplot2)
  require(grid)
  require(scales)

  # load communities for plotting:
  mmp_modules = read.csv(path)
  mmp_order = order(mmp_modules$Community_Vector)

  #check community labels:
  #table(mmp_modules$Community_Label)
  #table(mmp_modules$Community_Label,mmp_modules$Community_Vector)

  #labels = c('VI','SM','DS','VS','DM','CE','SC')
  #coords = c(0,70.5,124.5,148.5,197.5,293.5,360.5)


  zmin = quantile(component,qmin)
  zmax = quantile(component,qmax)

  netmat = vec2net(component,make.diag)

  meltsub = create.graph.long(netmat,mmp_order)
  #g2 = ggplot(meltsub, aes(X1,X2,fill=value))+ geom_tile()+ scale_fill_gradient2(low = "blue",  high = "red",limits=c(zmin,zmax),oob=squish)+labs(title = paste0("Component ",component), x = "Node 1", y = "Node 2")+coord_cartesian(clip='off',xlim=c(-0,390))

  g2 = ggplot(meltsub, aes(X1,X2,fill=value))+
    geom_tile()+
    scale_fill_gradient2(low = "blue",  high = "red",limits=c(zmin,zmax),oob=squish)+
    labs(title = title, x = "Node 1", y = "Node 2")+
    coord_cartesian(clip='off',xlim=c(-0,100))

  #for (i in 1:7) {
  #  if (i!=3) {
  #    g2 = g2+geom_hline(yintercept = coords[i],linetype="dotted",size=0.5)+geom_vline(xintercept = coords[i],linetype="dotted",size=0.5)+annotation_custom(grob = textGrob(label = labels[i], hjust = 0, gp = gpar(cex = 1)),ymin = (coords[i]+10), ymax = (coords[i]+10), xmin = 385, xmax = 385)+annotation_custom(grob = textGrob(label = labels[i], hjust = 0, gp = gpar(cex = 1)),xmin = (coords[i]+10), xmax = (coords[i]+10), ymin = -7, ymax = -7)
  #  } else{
  #    g2 = g2+geom_hline(yintercept = coords[i],linetype="dotted",size=0.5)+geom_vline(xintercept = coords[i],linetype="dotted",size=0.5)+annotation_custom(grob = textGrob(label = labels[i], hjust = 0, gp = gpar(cex = 1)),ymin = (coords[i]+10), ymax = (coords[i]+10), xmin = 385, xmax = 385)+annotation_custom(grob = textGrob(label = labels[i], hjust = 0, gp = gpar(cex = 1)),xmin = (coords[i]+1), xmax = (coords[i]+1), ymin = -7, ymax = -7)
  #  }
  #}
  # which nodes are prominent:
  loadingsummary = apply(abs(netmat),1,sum,na.rm=TRUE)
  loadingsum2 = loadingsummary[mmp_order]

  Community = factor(mmp_modules$Community_Label)[mmp_order]

  g3 = qplot(c(1:100),loadingsum2,col=Community,size=I(3))+xlab('MMP Index')+ylab('L1 Norm of the Rows')

  return(list(netmatfig = g2, loadingsfig = g3, netmat=netmat, loadingsummary = loadingsummary))
}

Plot for true Y component

library(cowplot)
# plot for the true component of Y
out_true1 = plotNetwork_change(Sytrue[,1], title=expression("True S"["Jy"]*", 1"),qmin=0.005, qmax=0.995, path = 'extdata/new_mmp.csv') 
out_true2 = plotNetwork_change(Sytrue[,2], title=expression("True S"["Jy"]*", 2"),qmin=0.005, qmax=0.995, path = 'extdata/new_mmp.csv') 

# function plotNetwork_change is tailored for this compressed version data.
# The original function is called plotNetwork, which is set for the standard version of correlation matrix.
p1=out_true1$netmatfig
p2=out_true1$loadingsfig
p3=out_true2$netmatfig
p4=out_true2$loadingsfig

plot_grid(p1,p2,p3,p4,nrow = 2)

knitr::include_graphics("fig/ex2true.png")

singR function

This is a wrapper function that performs the centering, standardization, initial estimation (applying separate LNGCA, i.e., separate JB, and then matching scores), whitening, and final estimation (using the curvilinear algorithm) in a single function. This is the easiest way to apply sing.

output = singR(dX =simdata$dX ,dY =simdata$dY ,individual = T)

Pipeline of SING method

In this section of the vignett, we provide the detailed steps that are performed in the function singR(). This can be useful when customizing the analyses or when working with very large datasets.

# Center X and Y
dX=simdata$dX
dY=simdata$dY
n = nrow(dX)
pX = ncol(dX)
pY = ncol(dY)
dXcentered <- dX - matrix(rowMeans(dX), n, pX, byrow = F)
dYcentered <- dY - matrix(rowMeans(dY), n, pY, byrow = F)

Apply separate JB

n.comp.X=NG_number(dX)
n.comp.Y=NG_number(dY)
# JB on X
estX_JB = lngca(xData = dX, n.comp = n.comp.X, whiten = 'sqrtprec', restarts.pbyd = 20, distribution='JB') #Note: make n.comp=nsubjects-1 on real data when computationally feasible. For this tutorial, to save time, we have reduced to n.comp=12, which is a number greater than the true number of components.
Mx_JB = est.M.ols(sData = estX_JB$S, xData = dX) 
# NOTE: for centered X, equivalent to xData %*% sData/(px-1)
Uxfull <- estX_JB$U  
# Ax = Ux %*% Lx, where Lx is the whitened matrix from covariance matrix of dX.


# JB on Y
estY_JB = lngca(xData = dY, n.comp = n.comp.Y, whiten = 'sqrtprec', restarts.pbyd = 20, distribution='JB')
My_JB = est.M.ols(sData = estY_JB$S, xData = dY)
Uyfull <- estY_JB$U

Get joint components

# Greedy Match
matchMxMy = greedymatch(Mx_JB, My_JB, Ux = Uxfull, Uy = Uyfull)

# Use permutation test to get the p-value of each match.
permJoint <- permTestJointRank(matchMxMy$Mx,matchMxMy$My,alpha = 0.05,nperm = 1000)
pval_joint = permJoint$pvalues
joint_rank = permJoint$rj
joint_rank  # the true value in this example is 2.

Whiten dX and dY

# For X
# Scale rowwise
est.sigmaXA = tcrossprod(dXcentered)/(pX-1)  ## dXcentered %*% t(dXcentered), which is the covariance matrix with n x n.
whitenerXA = est.sigmaXA%^%(-0.5)   # ZCA Whitening, Lx. 
xDataA = whitenerXA %*% dXcentered   # Xw = Lx %*% Xc.matrix with n x px. 
invLx = est.sigmaXA%^%(0.5) # Inverse matrix of Lx, which is the whitenerXA aforemetioned. 

# For Y
# Scale rowwise
est.sigmaYA = tcrossprod(dYcentered)/(pY-1)  ## since already centered, can just take tcrossprod
whitenerYA = est.sigmaYA%^%(-0.5)   # ZCA Whitening
yDataA = whitenerYA %*% dYcentered   
invLy = est.sigmaYA%^%(0.5)

Curvilinear search

# Calculate JB values
JBall = calculateJB(matchMxMy$Ux[1:joint_rank, ], X = xDataA) + calculateJB(matchMxMy$Uy[1:joint_rank, ], X = yDataA) 
# the columns of Ux & Uy are up to the joint_rank

## Small rho
rho = JBall/10
# medium rho, rho = JBall
# large rho, rho = JBall * 10

out_indiv_small <- curvilinear_c(invLx = invLx, invLy = invLy, xData = xDataA, yData = yDataA, Ux = matchMxMy$Ux, Uy = matchMxMy$Uy, rho = rho, maxiter = 1500, rj = joint_rank)
# curvilinear search with C code

Estimation and Plot

Outcome

#### Estimation by Small rho
Sx_rhoSmall = t(out_indiv_small$Ux[1:2, ] %*% xDataA)
Sy_rhoSmall = t(out_indiv_small$Uy[1:2, ] %*% yDataA)

#### Signchange for estimation
Sx_rhoSmall = signchange(Sx_rhoSmall)
Sy_rhoSmall = signchange(Sy_rhoSmall)

Estimation plot for X

xii_new <- newdata_xifti(xii_template, cbind(Sxtrue,Sx_rhoSmall))

view_xifti_surface(select_xifti(xii_new,3),zlim = c(-2.43,2.82)) # component1 small rho
view_xifti_surface(select_xifti(xii_new,4),zlim = c(-2.43,2.82)) # component2 small rho

knitr::include_graphics("fig/Esti_CompX.png")

Estimation plot for Y

library(cowplot)

out_rhoSmall1 = plotNetwork_change(Sy_rhoSmall[,1], title=expression("Estimate S"["Jy"]*", 1"),qmin=0.005, qmax=0.995, path = 'extdata/new_mmp.csv') 

out_rhoSmall2 = plotNetwork_change(Sy_rhoSmall[,2], title=expression("Estimate S"["Jy"]*", 2"),qmin=0.005, qmax=0.995, path = 'extdata/new_mmp.csv')

p5=out_rhoSmall1$netmatfig
p6=out_rhoSmall1$loadingsfig
p7=out_rhoSmall2$netmatfig
p8=out_rhoSmall2$loadingsfig

plot_grid(p5,p6,p7,p8,nrow = 2)

knitr::include_graphics("fig/Estexp2.png")

thebrisklab/singR documentation built on Feb. 10, 2024, 12:12 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

thebrisklab/singR
Simultaneous Non-Gaussian Component Analysis

In thebrisklab/singR: Simultaneous Non-Gaussian Component Analysis

singR

Installation

Quick start guide

Load package and data

for the quick start, we use a compressed version to accelerate the computation, which is a subpart of correlation matrix and 2k-resolution dtseries data.

Plot for true X component

This step needs ciftiTools package and connectome workbench, which can be found in https://github.com/mandymejia/ciftiTools

Plot for true Y component

singR function

Pipeline of SING method

Apply separate JB

Get joint components

Whiten dX and dY

Curvilinear search

Estimation and Plot

Outcome

Estimation plot for X

Estimation plot for Y

R Package Documentation

Browse R Packages

We want your feedback!

thebrisklab/singR Simultaneous Non-Gaussian Component Analysis

In thebrisklab/singR: Simultaneous Non-Gaussian Component Analysis

singR

Installation

Quick start guide

Load package and data

for the quick start, we use a compressed version to accelerate the computation, which is a subpart of correlation matrix and 2k-resolution dtseries data.

Plot for true X component

This step needs ciftiTools package and connectome workbench, which can be found in https://github.com/mandymejia/ciftiTools

Plot for true Y component

singR function

Pipeline of SING method

Apply separate JB

Get joint components

Whiten dX and dY

Curvilinear search

Estimation and Plot

Outcome

Estimation plot for X

Estimation plot for Y

R Package Documentation

Browse R Packages

We want your feedback!

thebrisklab/singR
Simultaneous Non-Gaussian Component Analysis