require(fmriutils) require(graphstats) require(mgc) require(ggplot2) require(latex2exp) require(igraph) source('../../R/siem.R') # accepts a matrix and thresholds/binarizes it thresh_matrix = function(matrix, thresh=0.5) { thr = quantile(matrix, thresh) return(ifelse(matrix > thr, 1, 0)) } # accepts a [n x n] adjacency matrix and computes the probabilities associated with an SBM # where the vertices are grouped such that union_i(groups) = V(A) and # intersection(group_i, group_j) = NULL for all i != j block_data = function(matrix, groups) { # matrix is adwi_thresh n x n array # groups is a grouping of the vertices in the matrix as a list blocks = array(NaN, dim=c(2,2)) for (i in 1:length(groups)) { for (j in 1:length(groups)) { blocks[i, j] = mean(matrix[groups[[i]], groups[[j]]]) } } return(blocks) }
For details on the model, please see SIEM model.
In this notebook, we determine whether there exists a difference in connectivity $p_{homo}$ homotopically (same region, opposite hemisphere) vs $p_{hetero}$ heterotopically (different region) connectivity within a particular modality.
Our test for this notebook is as follows:
\begin{align} H_0: p_{homo} \leq p_{hetero} \ H_A: p_{homo} > p_{hetero} \end{align}
in words, whether the connectivity homotopically exceeds the connectivity heterotopically. We will do this in 2 ways:
We will perform this experiment for both dMRI and fMRI-derived connectomes.
For the data, we compute the weighted mean functional (rank of each edge) and diffusion (number of fibers). For the functional connectome, we threshold such that the largest 50% of edges are set to connected, and the smallest 50% set to disconnected. For the diffusion (which are natively sparse) we just threshold edges that are present to connected, and edges that are not present to disconnected (threshold about 0).
The data below can be downloaded and moved to appropriate folders as follows (note that the below section requires sudo access):
sudo mkdir /data/ sudo chmod -R 777 /data cd /data wget http://openconnecto.me/mrdata/share/derivatives/dwi_edgelists.tar.gz wget http://openconnecto.me/mrdata/share/derivatives/fmri_edgelists.tar.gz wget http://openconnecto.me/mrdata/share/connectome_stats/connectome_stats.zip mkdir -p /data/connectome_stats /data/all_mr /data/all_mr/dwi/edgelists /data/all_mr/fmri/ranked/edgelists mv dwi_edgelists.tar.gz /data/dwi/edgelists cd /data/dwi/edgelists tar -xvzf dwi_edgelists.tar.gz mv /data/fmri_edgelists.tar.gz /data/fmri/ranked/edgelists cd /data/fmri/ranked/edgelists tar -xvzf fmri_edgelists.tar.gz mv /data/connectome_stats.zip /data/connectome_stats.zip cd /data/connectome_stats unzip connectome_stats.zip
basepath = '/data/connectome_stats/' fmri_gr = read_graph(file.path(basepath, 'fmrimean_1709.edgelist'), format="ncol") vset <- V(fmri_gr) ordered_v <- order(vset) fmri_gr = read_graph(file.path(basepath, 'fmrimean_1709.edgelist'), format="ncol", predef=ordered_v) fmri_mean = get.adjacency(fmri_gr, type="both", sparse=FALSE, attr='weight') dwi_gr = read_graph(file.path(basepath, 'dwimean_2861.edgelist'), format="ncol", predef=ordered_v) dwi_mean = get.adjacency(dwi_gr, type="both", sparse=FALSE, attr='weight') fmri_thresh = thresh_matrix(fmri_mean) dwi_thresh = thresh_matrix(dwi_mean, thresh=0) gs.plot.plot_matrix(fmri_thresh, title = "Mean Thresholded Functional Connectome", legend.name = "connection", ffactor = TRUE) + theme(panel.background = element_rect(fill = '#ffffff')) gs.plot.plot_matrix(dwi_thresh, title = "Mean Thresholded DWI Connectome", legend.name = "connection", ffactor = TRUE) + theme(panel.background = element_rect(fill = '#ffffff'))
nroi <- 70 dwi.dsets = c('BNU1', 'BNU3', 'HNU1', 'KKI2009', 'NKI1', 'NKIENH', 'MRN1313', 'Templeton114', 'Templeton255', 'SWU4') dwi.atlas = 'desikan' dwi.basepath = '/data/all_mr/dwi/edgelists' graphobj = fmriu.io.collection.open_graphs(basepath = dwi.basepath, atlases = dwi.atlas, datasets = dwi.dsets, gname = 'graphs', fmt='edgelist', rtype = 'array') dwi.graphs = graphobj$graphs dwi.datasets = graphobj$dataset dwi.subjects = graphobj$subjects
ne = 1225 nroi <- 70 group1 <- c() # edges in same hemisphere group2 <- c() # edges across hemispheres for (i in 1:nroi) { for (j in 1:nroi) { idx <- (i - 1)*nroi + j if (abs(j - i) == 35) { group1 <- c(group1, idx) } else { group2 <- c(group2, idx) } } } Es <- list(group1, group2) dwi.models <- sapply(1:dim(dwi.graphs)[1], function(i) { gs.siem.fit(thresh_matrix(dwi.graphs[i,,], 0), Es, alt='greater') }, simplify = FALSE)
We might want to visualize the distribution of $\delta = \hat{p}{homo} - \hat{p}{hetero}$ under the analytical model and compare to our empirical model:
ne = 1225 dwi.homo.phat <- sapply(dwi.models, function(model) model$pr[1]) dwi.hetero.phat <- sapply(dwi.models, function(model)model$pr[2]) dwi.diff.distr.emp.mod = density(as.numeric(sapply(dwi.models, function(model) model$dpr[1,2]))) # variances sum dwi.diff.distr.ana = dnorm(dwi.diff.distr.emp.mod$x, mean=mean(abs(dwi.homo.phat - dwi.hetero.phat)), sd=sqrt(model.var(mean(dwi.homo.phat), ne) + model.var(mean(dwi.hetero.phat), ne))) n_diff = length(dwi.diff.distr.emp.mod$x) dwi.diff.dat = data.frame(x = c(dwi.diff.distr.emp.mod$x, dwi.diff.distr.emp.mod$x), y = c(dwi.diff.distr.emp.mod$y, dwi.diff.distr.ana), distribution=c(rep("empirical", n_diff), rep("analytical", n_diff))) dwi.diff.dat$distribution = factor(dwi.diff.dat$distribution) ggplot(dat=dwi.diff.dat, aes(x=x, y=y, ymax=y, fill=distribution, color=distribution, group=distribution)) + geom_ribbon(ymin=0, alpha=0.5) + ylab('Density') + xlab(TeX('$\\delta$')) + ggtitle(TeX('Distribution of $\\delta = \\hat{p}_{homo} - \\hat{p}_{hetero}$, DWI')) + theme(panel.background = element_rect(fill = '#ffffff'))
which clearly shows a strong difference in the mean heterotopically compared to homotopically, as our $\delta$ is generally quite high. Performing a paired t-test between the homotopic and heterotopic $\hat{p}$, we find:
t.test(dwi.homo.phat, dwi.hetero.phat, alternative="greater", var.equal=FALSE, paired=TRUE)
which as we can see, indicates a significant difference in homotopic connectivity compared to heterotopic connectivity with $p < 2.2\times 10^{-16}$ for the diffusion connectomes. However, in this case, we note that the model is not very representative of the actual data observed. This is likely due to the fact that much of the data (50%) is acquired from 2 of the sites, so there likely are strong batch-effects present in the $\hat{p}$, or that the diffusion connectivity is much more structured than the functional connectivity, and thus using a random block model may not be ideal.
Below, we look at the distribution of our $p-$values wehre we estimate one p-value per graph:
dwi.per.p <- sapply(dwi.models, function(model) model$pv[1,2]) dwi.p.dat = data.frame(p=dwi.per.p, dataset = dwi.datasets, modality='DWI') dwi.p.dat$dataset = factor(dwi.p.dat$dataset) ggplot(data=dwi.p.dat, aes(x=dataset, y=p, color=dataset, group=dataset)) + geom_jitter() + coord_trans(y = "log10") + ggtitle(TeX(sprintf('DWI Per-subject P-value (1 graph), %.2f percent have $p < .05$', 100*sum(dwi.per.p < .05)/length(dwi.per.p)))) + xlab('Dataset') + ylab('p-value') + theme(axis.text.x = element_text(angle=45), legend.position=NaN)
As we can see, with just $1$ graph, we still see that homotopic connectivity exceeds heterotopic connectivity with $p < .05$ for just about none of the individual graphs.
nroi <- 70 fmri.dsets = c('BNU1', 'BNU2', 'BNU3', 'HNU1', 'IBATRT', 'IPCAS1', 'IPCAS2', 'IPCAS5', 'IPCAS6', 'IPCAS8', 'MRN1', 'NYU1', 'SWU1', 'SWU2', 'SWU3', 'SWU4', 'UWM', 'XHCUMS') fmri.atlas = 'desikan-2mm' fmri.basepath = '/data/all_mr/fmri/ranked/edgelists/' graphobj = fmriu.io.collection.open_graphs(basepath = fmri.basepath, atlases = fmri.atlas, datasets=fmri.dsets, fmt='edgelist', rtype = 'array') fmri.graphs = graphobj$graphs fmri.datasets = graphobj$dataset fmri.subjects = graphobj$subjects fmri.sessions <- graphobj$sessions
ne = 1225 nroi <- 70 group1 <- c() # edges in same hemisphere group2 <- c() # edges across hemispheres for (i in 1:nroi) { for (j in 1:nroi) { idx <- (i - 1)*nroi + j if (abs(j - i) == 35) { group1 <- c(group1, idx) } else { group2 <- c(group2, idx) } } } Es <- list(group1, group2) fmri.models <- sapply(1:dim(fmri.graphs)[1], function(i) { gs.siem.fit(thresh_matrix(fmri.graphs[i,,], 0.5), Es, alt='greater') }, simplify = FALSE)
We might want to visualize the distribution of $\hat{p}{hetero}$ and $\hat{p}{homo}$ under the analytical model and compare to our empirical model:
ne = 1225 fmri.homo.phat <- sapply(fmri.models, function(model) model$pr[1]) fmri.hetero.phat <- sapply(fmri.models, function(model)model$pr[2]) fmri.diff.distr.emp.mod = density(as.numeric(fmri.homo.phat - fmri.hetero.phat)) # variances sum fmri.diff.distr.ana = dnorm(fmri.diff.distr.emp.mod$x, mean=mean(abs(fmri.homo.phat - fmri.hetero.phat)), sd=sqrt(model.var(mean(fmri.homo.phat), ne) + model.var(mean(fmri.hetero.phat), ne))) n_diff = length(fmri.diff.distr.emp.mod$x) fmri.diff.dat = data.frame(x = c(fmri.diff.distr.emp.mod$x, fmri.diff.distr.emp.mod$x), y = c(fmri.diff.distr.emp.mod$y, fmri.diff.distr.ana), distribution=c(rep("empirical", n_diff), rep("analytical", n_diff))) fmri.diff.dat$distribution = factor(fmri.diff.dat$distribution) ggplot(dat=fmri.diff.dat, aes(x=x, y=y, ymax=y, fill=distribution, color=distribution, group=distribution)) + geom_ribbon(ymin=0, alpha=0.5) + ylab('Density') + xlab(TeX('$\\delta$')) + ggtitle(TeX('Distribution of $\\delta = \\hat{p}_{homo} - \\hat{p}_{hetero}$, fMRI')) + theme(panel.background = element_rect(fill = '#ffffff'))
we note that since most of the fMRI connectomes have full connectivity homotopically, that the density estimate appears to be relatively odd. With only 70 possible edges with homotopic connectivity, if the majority are 65 or greater, there are only a few possible values that the homotopic connectivity can take.
which clearly shows a much less strong difference in the means homotopically compared to heterotopically, but still a present difference. Performing a t-test, we find:
t.test(fmri.homo.phat, fmri.hetero.phat, alternative="greater", var.equal=FALSE, paired=TRUE)
similar to the diffusion connectomes, the functional connectomes again exhibit a higher homotopic connectivity than heterotopic connectivity that is significant with $p < 2.2\times 10^{-16}$. The fit here is much better, likely due to the fact that fMRI connectivity is less structured and more random than diffusion connectivity.
Below, we look at the distribution of our $p-$values wehre we estimate one p-value per graph:
fmri.per.p <- sapply(fmri.models, function(model) model$pv[1,2]) fmri.p.dat = data.frame(p=fmri.per.p, dataset = fmri.datasets, modality='fMRI') fmri.p.dat$dataset = factor(fmri.p.dat$dataset) ggplot(data=fmri.p.dat, aes(x=dataset, y=p, color=dataset, group=dataset)) + geom_jitter() + ggtitle(TeX(sprintf('fMRI Per-subject P-value (1 graph), %.2f percent have $p < .05$', 100*sum(fmri.per.p < .05)/length(fmri.per.p)))) + coord_trans(y = "log10") + xlab('Dataset') + ylab('p-value') + theme(axis.text.x = element_text(angle=45), legend.position=NaN, panel.background = element_rect(fill = '#ffffff'))
As we can see, with just $1$ graph, we have significantly more confidence that the homotopic connectivity exceeds the heterotopic connectivity than we did with the diffusion graphs. With just one graph, we can identify a significant difference in homotopic vs. heterotopic connectivity for the functional connectomes in a large portion of the connectomes.
We can compare the results from the fMRI and DWI simultaneously as appropriately colored density estimates to show the difference in the $p$-values:
dual.p.dat = rbind(dwi.p.dat, fmri.p.dat) vline = data.frame(x=.05, type="sig") labs = lapply(levels(dual.p.dat$modality), function(mod) { pmod= dual.p.dat[dual.p.dat$modality == mod, ]$p TeX(paste(sprintf('%s: %.2f', mod, 100*sum(pmod < .05)/length(pmod)), '% < $\\alpha$', sep="")) }) dual.p.dat$grouping = paste(dual.p.dat$dataset, dual.p.dat$modality) # for the datasets that are shared ggplot(data=dual.p.dat, aes(p, group=grouping, color=modality)) + geom_line(stat="density", size=1, adjust=1.5) + scale_x_log10(limits=c(.005, 1)) + geom_vline(data=vline, aes(xintercept = x, linetype=type)) + scale_color_discrete(name="Modality", breaks=levels(dual.p.dat$modality)) + scale_linetype_manual(values=c("dashed"), name="Cutoff", breaks=c("sig"), labels=lapply(c("$\\alpha = 0.05$"), TeX)) + xlab(TeX('$log(p)$')) + ylab("Density") + theme(panel.background = element_rect(fill = '#ffffff')) + ggtitle("Hemispheric Intra-Modality")
Here, we again perform a test on 1 graph, except here the graphs used are the average functional and diffusion connectomes (the megameans). We feed this into a simple t-test with the appropriate assumptions (unequal variance, goal is to test for homotopic connectivity exceeding heterotopic connectivity). The question here that we seek to answer is, given the average connectome for a particular modality, can we identify a significant difference in homotopic vs. heterotopic connectivity?
fmri.agg.mod <- gs.siem.fit(fmri_thresh, Es) print(fmri.agg.mod$pv[1,2])
dwi.agg.mod <- gs.siem.fit(dwi_thresh, Es) print(dwi.agg.mod$pv[1,2])
As we can see above, the diffusion connectome has a less significant difference in homotopic vs heterotopic connectivity at the megamean connectome level than the functional connectome.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.