# Suppress title check warning options(rmarkdown.html_vignette.check_title = FALSE) knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
library(NetCoMi)
This tutorial demonstrates how to use the createAssoPerm
function
for storing and reloading count tables, association matrices and network
properties of the permuted data if permutation tests are performed with
netCompare()
or diffnet()
.
NetCoMi (>=1.0.2) includes the function createAssoPerm()
for creating either
only a matrix with permuted group labels or storing count tables and association
matrices of the permuted data. The stored files can be passed to
netCompare()
and diffnet()
so that users can repeatedly run these two
functions with varying arguments, without the need to recompute the associations
each time.
createAssoPerm()
additionally enables to compute the permutation associations
block-wise (for a subset of permutations) and store them in separate files.
These files can then be combined to one large matrix (with all permutations)
and passed to netCompare()
or diffnet()
.
The tutorial furthermore explains NetCoMi's ability to handle matched data sets.
We use data from the American Gut Project and conduct a network comparison between subjects with and without lactose intolerance.
To demonstrate NetCoMi's functionality for matched data, we build
a "fake" 1:2 matched data set, where each two samples of the LACTOSE = "no"
group are assigned to one sample of the LACTOSE = "yes"
group.
We use a subset of 150 samples, leading to 50 samples in group "yes" and 100
samples in group "no".
library(NetCoMi) library(phyloseq)
set.seed(123456) # Load American Gut Data (from SpiecEasi package) data("amgut2.filt.phy") #table(amgut2.filt.phy@sam_data@.Data[[which(amgut2.filt.phy@sam_data@names == "LACTOSE")]]) # Divide samples into two groups: with and without lactose intolerance lact_yes <- phyloseq::subset_samples(amgut2.filt.phy, LACTOSE == "yes") lact_no <- phyloseq::subset_samples(amgut2.filt.phy, LACTOSE == "no") # Extract count tables counts_yes <- t(as(phyloseq::otu_table(lact_yes), "matrix")) counts_no <- t(as(phyloseq::otu_table(lact_no), "matrix")) # Build the 1:2 matched data set counts_matched <- matrix(NA, nrow = 150, ncol = ncol(counts_yes)) colnames(counts_matched) <- colnames(counts_yes) rownames(counts_matched) <- 1:150 ind_yes <- ind_no <- 1 for (i in 1:150) { if ((i-1)%%3 == 0) { counts_matched[i, ] <- counts_yes[ind_yes, ] rownames(counts_matched)[i] <- rownames(counts_yes)[ind_yes] ind_yes <- ind_yes + 1 } else { counts_matched[i, ] <- counts_no[ind_no, ] rownames(counts_matched)[i] <- rownames(counts_no)[ind_no] ind_no <- ind_no + 1 } } # The corresponding group vector used for splitting the data into two subsets. group_vec <- rep(c(1,2,2), 50) # Note: group "1" belongs to "yes", group "2" belongs to "no" # Network construction net_amgut <- netConstruct(counts_matched, group = group_vec, matchDesign = c(1,2), filtTax = "highestFreq", filtTaxPar = list(highestFreq = 50), measure = "pearson", zeroMethod = "pseudo", normMethod = "clr", sparsMethod = "threshold", thresh = 0.4, seed = 123456) # Network analysis with default values props_amgut <- netAnalyze(net_amgut) #summary(props_amgut) # Network plot plot(props_amgut, sameLayout = TRUE, layoutGroup = "union", nodeSize = "clr", repulsion = 0.9, cexTitle = 3.7, cexNodes = 2, cexLabels = 2, groupNames = c("LACTOSE = yes", "LACTOSE = no")) legend("bottom", title = "estimated correlation:", legend = c("+","-"), col = c("#009900","red"), inset = 0.02, cex = 4, lty = 1, lwd = 4, bty = "n", horiz = TRUE)
We conduct a network comparison with permutation tests to examine whether group differences are significant. In order to reduce the execution time, only 100 permutations are used. For real data sets, the number of permutations should be at least 1000 to get reliable results.
The matrices with estimated associations for the permuted data are stored in an
external file (in the current working directory) named "assoPerm_comp"
.
# Network comparison comp_amgut_orig <- netCompare(props_amgut, permTest = TRUE, nPerm = 100, storeAssoPerm = TRUE, fileStoreAssoPerm = "assoPerm_comp", storeCountsPerm = FALSE, seed = 123456) summary(comp_amgut_orig)
The network comparison is repeated, but this time, the stored permutation
associations are loaded by netCompare()
. This option might be useful to
rerun the function with alternative multiple testing adjustment, without the
need of re-estimating all associations.
# Network comparison comp_amgut1 <- netCompare(props_amgut, permTest = TRUE, nPerm = 100, fileLoadAssoPerm = "assoPerm_comp", storeCountsPerm = FALSE, seed = 123456) # Check whether the second comparison leads to equal results all.equal(comp_amgut_orig$properties, comp_amgut1$properties)
The stored permutation associations can also be passed to diffnet()
to
construct a differential network.
# Construct differential network diffnet_amgut <- diffnet(net_amgut, diffMethod = "permute", nPerm = 100, fileLoadAssoPerm = "assoPerm_comp", storeCountsPerm = FALSE) plot(diffnet_amgut)
As expected for a number of permutations of only 100, there are no differential associations after multiple testing adjustment.
Just to take a look how the differential network could look like, we plot the differential network based on non-adjusted p-values. Note that this approach is statistically not correct!
plot(diffnet_amgut, adjusted = FALSE, mar = c(2, 2, 5, 15), legendPos = c(1.2,1.2), legendArgs = list(bty = "n"), legendGroupnames = c("yes", "no"), legendTitle = "Correlations:")
This time, the permutation association matrices are generated using
createAssoPerm()
and then passed to netCompare()
.
The output should be written to a variable because createAssoPerm()
generally
returns the matrix with permuted group labels.
permGroupMat <- createAssoPerm(props_amgut, nPerm = 100, computeAsso = TRUE, fileStoreAssoPerm = "assoPerm", storeCountsPerm = TRUE, append = FALSE, seed = 123456)
Let's take a look at the permuted group labels. To interpret the group labels
correctly, it is important to know, that within netConstruct()
, the data
set is divided into two matrices belonging to the two groups. For the permutation
tests, the two matrices are combined by rows and for each permutation, the
samples are reassigned to one of the two groups while keeping the matching design
for matched data.
In our case, the permGroupMat
matrix consists of 100 rows (nPerm = 100
) and
150 columns (our sample size). The first 50 columns belong to the first group
(group "yes" in our case) and columns 51 to 150 belong to the second group.
Since each two samples of group 2 are matched to one sample of group 1, we number
the group label matrix accordingly. Now, we can see that the matching design is
kept: Since sample 3 is assigned to group 1, samples 1 and 2 have to be assigned
to group 2 and so on (entries [1,1] and [1,51:52] of permGroupMat
).
seq1 <- seq(1,150, by = 3) seq2 <- seq(1:150)[!seq(1:150)%in%seq1] colnames(permGroupMat) <- c(seq1, seq2) permGroupMat[1:5, 1:10] permGroupMat[1:5, 51:71]
As before, the stored permutation association matrices are passed to
netCompare()
.
comp_amgut2 <- netCompare(props_amgut, permTest = TRUE, nPerm = 100, fileLoadAssoPerm = "assoPerm", seed = 123456) # Are the network properties equal? all.equal(comp_amgut_orig$properties, comp_amgut2$properties)
Using the fm.open
function, we take a look at the stored matrices themselves.
# Open stored files and check whether they are equal assoPerm1 <- filematrix::fm.open(filenamebase = "assoPerm_comp" , readonly = TRUE) assoPerm2 <- filematrix::fm.open(filenamebase = "assoPerm" , readonly = TRUE) identical(as.matrix(assoPerm1), as.matrix(assoPerm2)) dim(as.matrix(assoPerm1)) dim(as.matrix(assoPerm2)) # Close files filematrix::close(assoPerm1) filematrix::close(assoPerm2)
Due to limited resources, it might be meaningful to estimate the associations in blocks, that is, for a subset of permutations instead of all permutations at once. We'll now see how to perform such a block-wise network comparison using NetCoMi's functions. Note that in this approach, the external file is extended in each iteration, which is why it is not parallelizable.
In the first step, createAssoPerm
is used to generate the matrix with
permuted group labels (for all permutations!). Hence, we set the computeAsso
parameter to FALSE
.
permGroupMat <- createAssoPerm(props_amgut, nPerm = 100, computeAsso = FALSE, seed = 123456)
We now compute the association matrices in blocks of 20 permutations in each loop (leading to 5 iterations).
Note: The nPerm
argument must be set to the block size.
The external file (containing the association matrices) must be extended in each loop,
except for the first iteration, where the file is created. Thus, append
is set
to TRUE
for i >=2
.
nPerm_all <- 100 blocksize <- 20 repetitions <- nPerm_all / blocksize for (i in 1:repetitions) { print(i) if (i == 1) { # Create a new file in the first run tmp <- createAssoPerm(props_amgut, nPerm = blocksize, permGroupMat = permGroupMat[(i-1) * blocksize + 1:blocksize, ], computeAsso = TRUE, fileStoreAssoPerm = "assoPerm", storeCountsPerm = FALSE, append = FALSE) } else { tmp <- createAssoPerm(props_amgut, nPerm = blocksize, permGroupMat = permGroupMat[(i-1) * blocksize + 1:blocksize, ], computeAsso = TRUE, fileStoreAssoPerm = "assoPerm", storeCountsPerm = FALSE, append = TRUE) } }
The stored file, which now contains the associations of all 100 permutations,
can be passed to netCompare()
as before.
comp_amgut3 <- netCompare(props_amgut, permTest = TRUE, nPerm = 100, storeAssoPerm = TRUE, fileLoadAssoPerm = "assoPerm", storeCountsPerm = FALSE, seed = 123456) # Are the network properties equal to the first comparison? all.equal(comp_amgut_orig$properties, comp_amgut3$properties) # Open stored files and check whether they are equal assoPerm1 <- fm.open(filenamebase = "assoPerm_comp" , readonly = TRUE) assoPerm3 <- fm.open(filenamebase = "assoPerm" , readonly = TRUE) all.equal(as.matrix(assoPerm1), as.matrix(assoPerm3)) dim(as.matrix(assoPerm1)) dim(as.matrix(assoPerm3)) # Close files close(assoPerm1) close(assoPerm3)
If the blocks should be computed in parallel, extending the "assoPerm"
file
in each iteration would not work. To be able to run the blocks in parallel,
we have to create a separate file in each iteration and combine them at the end.
# Create the matrix with permuted group labels (as before) permGroupMat <- createAssoPerm(props_amgut, nPerm = 100, computeAsso = FALSE, seed = 123456) nPerm_all <- 100 blocksize <- 20 repetitions <- nPerm_all / blocksize # 5 repetitions # Execute as standard for-loop: for (i in 1:repetitions) { tmp <- createAssoPerm(props_amgut, nPerm = blocksize, permGroupMat = permGroupMat[(i-1) * blocksize + 1:blocksize, ], computeAsso = TRUE, fileStoreAssoPerm = paste0("assoPerm", i), storeCountsPerm = FALSE, append = FALSE) } # OR execute in parallel: library("foreach") cores <- 2 # Please choose an appropriate number of cores cl <- parallel::makeCluster(cores) doSNOW::registerDoSNOW(cl) # Create progress bar: pb <- utils::txtProgressBar(0, repetitions, style=3) progress <- function(n) { utils::setTxtProgressBar(pb, n) } opts <- list(progress = progress) tmp <- foreach(i = 1:repetitions, .packages = c("NetCoMi"), .options.snow = opts) %dopar% { progress(i) NetCoMi::createAssoPerm(props_amgut, nPerm = blocksize, permGroupMat = permGroupMat[(i-1) * blocksize + 1:blocksize, ], computeAsso = TRUE, fileStoreAssoPerm = paste0("assoPerm", i), storeCountsPerm = FALSE, append = FALSE) } # Close progress bar close(pb) # Stop cluster parallel::stopCluster(cl) # Combine the matrices and store them into a new file (because netCompare() # needs an external file) assoPerm_all <- NULL for (i in 1:repetitions) { assoPerm_tmp <- fm.open(filenamebase = paste0("assoPerm", i) , readonly = TRUE) assoPerm_all <- rbind(assoPerm_all, as.matrix(assoPerm_tmp)) close(assoPerm_tmp) } dim(assoPerm_all) # Combine the permutation association matrices fm.create.from.matrix(filenamebase = "assoPerm", mat = assoPerm_all)
As last step, we pass the file containing the combined matrix to netCompare()
.
comp_amgut4 <- netCompare(props_amgut, permTest = TRUE, nPerm = 100, fileLoadAssoPerm = "assoPerm", storeCountsPerm = FALSE, seed = 123456) # Are the network properties equal to those of the first comparison? all.equal(comp_amgut_orig$properties, comp_amgut4$properties) # Open stored files and check whether they are equal assoPerm1 <- fm.open(filenamebase = "assoPerm_comp" , readonly = TRUE) assoPerm4 <- fm.open(filenamebase = "assoPerm" , readonly = TRUE) identical(as.matrix(assoPerm1), as.matrix(assoPerm4)) dim(as.matrix(assoPerm1)) dim(as.matrix(assoPerm4)) # Close files close(assoPerm1) close(assoPerm4)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.