Nothing
#' @title Network-based R-statistics using Linear Model ANOVA
#'
#' @description This function computes the specified linear model (LM) ANOVA for each edge in
#' the network, and calculates the family wise error (FWE) p-value for the size of the clusters
#' of connected edges that are individually below the P threshold (\emph{thrP}), or above the
#' F threshold (\emph{thrF}). FWE estimation is based on the null distribution of the maximum
#' size of sets of connected edges (defined as above), obtained with \emph{nperm} permutations
#' of the original data.
#'
#' @usage nbr_lm_aov(net, nnodes, idata, mod, diag = FALSE, nperm,
#' thrP = 0.05, thrF = NULL, cores = NULL,
#' nudist = FALSE, expList = NULL,
#' verbose = TRUE, ...)
#'
#' @param net 3D volume (2D matrices for each observation) or 2D matrix of edges as columns.
#' @param nnodes Number of network nodes.
#' @param idata Matrix or data.frame including independent variables of interest of the model.
#' @param mod Model, specify as a string, e.g., "~Group + Age".
#' @param diag Logical indicating if matrix diagonal is to be included in the analysis (default: FALSE).
#' @param nperm Number of permutations.
#' @param thrP Individual edge p-value threshold (if NULL, thrF should be given).
#' @param thrF Individual edge F-value threshold (if NULL, thrP should be given).
#' @param cores Number of selected cores for parallel computing (default: NULL).
#' @param nudist Logical indicating if null distribution should be returned (default: FALSE).
#' @param expList Character string adding variable names to the varlist of 'clusterExport' (default: NULL).
#' @param verbose Logical indicating if messages should be printed (default: TRUE).
#' @param ... Additional arguments to be passed to the low level 'lm' function.
#'
#' @inherit nbr_lm return details
#'
#' @examples
#' data(frontal2D)
#' \donttest{
#' ncores <- 2
#' library(parallel)
#' if(detectCores() < ncores) ncores <- NULL
#' nbr_result <- nbr_lm_aov(net = frontal2D[,-(1:3)],
#' nnodes = 28, idata = frontal2D[,1:3],
#' mod = "~ Group + Sex * Age",
#' thrP = 0.01, nperm = 5, cores = ncores)
#' show(nbr_result)
#' }
#'
#' @importFrom stats as.formula anova lm qf aggregate
#' @importFrom parallel detectCores makeCluster clusterExport parSapply stopCluster
#' @export
nbr_lm_aov <- function(net,
nnodes,
idata,
mod,
diag = FALSE,
nperm,
thrP = 0.05,
thrF = NULL,
cores = NULL,
nudist = FALSE,
expList = NULL,
verbose = TRUE,
...){
# Check 'diag' and 'nnodes'
if(!is.logical(diag)) stop("STOP: diagonal variable must be logical!")
if(nnodes%%1 != 0) stop("STOP: number of nodes must be integer!!")
# Edge positions
tri_pos <- which(upper.tri(matrix(nrow = nnodes, ncol = nnodes), diag = diag),
arr.ind = T)
# Check if everything is alright
net_dim <- dim(net)
# If 3D input, reshape to 2D
if(length(net_dim)==3){
# Check number of variables
if(!all(net_dim[1:2]==nnodes)) stop("STOP: number of matrix variables does not match volume dimmensions")
# Extract edges
mx <- sapply(1:nrow(tri_pos), function(x) net[tri_pos[x,1],tri_pos[x,2],])
} else if(length(net_dim)==2){
# Check number of variables
if(net_dim[2] != nrow(tri_pos)) stop("STOP: number of matrix variables does not match column numbers")
# Extract edges
mx <- net
} else stop("STOP: network input has to be a 2D or 3D array!!")
if(nrow(idata)!=nrow(mx)) stop("STOP: network volume dimension 3 and dataset must match!!")
if(nperm%%1 != 0) stop("STOP: number of permutations must be integer!!")
if(!is.numeric(as.matrix(mx))) stop("STOP: network array is not numeric!!")
# Apply example model edgewise
lm_form <- as.formula(paste0("mx[,",1,"]",mod))
fit_ex <- anova(lm(lm_form, idata, ...))
# Create empty object to store F-statistic and p-values edgewise
obsF <- matrix(0, nrow = nrow(tri_pos), ncol = (nrow(fit_ex)-1)*2)
# Compute observed stats
if(verbose) cat("Computing observed stats")
# Set (or not) parallelization
if(is.null(cores)){
# Apply example model edgewise
obsF <- t(sapply(1:nrow(tri_pos), function(x){
lm_form <- as.formula(paste0("mx[,",x,"]",mod))
fit <- anova(lm(lm_form, idata, ...))
c(rbind(fit$`F value`[1:(nrow(fit)-1)],fit$`Pr(>F)`[1:(nrow(fit)-1)]))
}))
} else{
if(cores%%1 != 0) stop("STOP: number of cores must be integer")
#library(parallel)
if(cores > detectCores()) stop("STOP: input number of cores is to high")
# Set 'clusterExport' varlist
vlist <- c("tri_pos", "mod", "idata", "mx")
if(!is.null(expList)){
if(!is.character(expList)) stop("STOP: input 'expList' is not a character string")
vlist <- c(vlist,expList)
}
if(verbose) cat(paste0(" (setting ",cores," cluster cores)"))
cl <- makeCluster(cores)
# Set variables for local function from the main function environment
clusterExport(cl=cl, varlist=vlist, envir=environment())
# Apply example model edgewise
obsF <- t(parSapply(cl, 1:nrow(tri_pos), function(x){
lm_form <- as.formula(paste0("mx[,",x,"]",mod))
fit <- anova(lm(lm_form, idata, ...))
c(rbind(fit$`F value`[1:(nrow(fit)-1)],fit$`Pr(>F)`[1:(nrow(fit)-1)]))
}))
stopCluster(cl)
}
colnames(obsF) <- paste0(rep(rownames(fit_ex)[1:(nrow(fit_ex)-1)], each = 2),c("_F","_p"))
if(verbose) cat(".\n")
# Find components based on F-statistic or p-values thresholds
if(all(is.null(thrF),is.null(thrP))) stop("STOP: F or p-value threshold needed!")
if(is.null(thrP)){thr <- thrF; thr_idx <- 1}
if(is.null(thrF)){thr <- thrP; thr_idx <- 2}
# Initial variables for component search
obs_comp <- matrix(1:nnodes, nrow = nnodes, ncol = nrow(fit_ex)-1)
obs_list <- vector("list",nrow(fit_ex)-1)
# Set names
colnames(obs_comp) <- rownames(fit_ex)[1:(nrow(fit_ex)-1)]
names(obs_list) <- rownames(fit_ex)[1:(nrow(fit_ex)-1)]
# Label component for each node and edge (and store strength as well)
if(thr_idx == 1){
# For each independent variable in the LM
for(ii in 1:length(obs_list)){
# Find F-statistics (F > thrF) edges
edges <- which(obsF[,ii*2-thr_idx]>thr)
if(length(edges)>0){
# Generate empty object for component label
component <- vector("integer", length(edges))
# Store strength intensity
strength <- obsF[edges,ii*2-thr_idx]-thrF
# Label nodes and edge component
for(jj in 1:length(edges)){
# Maximum label to be changed
max_lab <- max(obs_comp[tri_pos[edges[jj],],ii])
min_lab <- min(obs_comp[tri_pos[edges[jj],],ii])
if(sum(component==max_lab)>0) component[which(component==max_lab)] <- min_lab
pre_idx <- which(obs_comp[,ii]==max_lab)
obs_comp[pre_idx,ii] <- component[jj] <- min_lab
}
# Store results
obs_list[[ii]] <- cbind(edges,tri_pos[edges,1],tri_pos[edges,2],component,strength)
colnames(obs_list[[ii]]) <- c("2Dcol","3Drow","3Dcol","comp","strn")
}
}
} else{
# For each independent variable in the LM
for(ii in 1:length(obs_list)){
# Find significant (p < thrP) edges
edges <- which(obsF[,ii*thr_idx]<thr)
if(length(edges)>0){
# Generate empty object for component label
component <- vector("integer", length(edges))
# Store strength intensity
strength <- obsF[edges,ii*thr_idx-1]-qf(1-thrP,fit_ex$Df[ii],fit_ex$Df[length(fit_ex$Df)])
# Label nodes and edge component
for(jj in 1:length(edges)){
# Maximum label to be changed
max_lab <- max(obs_comp[tri_pos[edges[jj],],ii])
min_lab <- min(obs_comp[tri_pos[edges[jj],],ii])
if(sum(component==max_lab)>0) component[which(component==max_lab)] <- min_lab
pre_idx <- which(obs_comp[,ii]==max_lab)
obs_comp[pre_idx,ii] <- component[jj] <- min_lab
}
# Store results
obs_list[[ii]] <- cbind(edges,tri_pos[edges,1],tri_pos[edges,2],component,strength)
colnames(obs_list[[ii]]) <- c("2Dcol","3Drow","3Dcol","comp","strn")
}
}
}
# Permutate dataset
if(verbose) cat("Computing permutated stats")
# Create empty object to store FWE null distribution
null_dist <- array(0, dim=c(nperm, 2*length(obs_list)))
colnames(null_dist) <- paste0(rep(names(obs_list), each=2),c("_ncomp","_strn"))
# Set (or not) parallelization
if(!is.null(cores)){
# Set 'clusterExport' varlist
vlist <- c("tri_pos", "mod", "pdata", "mx")
if(!is.null(expList)) vlist <- c(vlist,expList)
if(verbose) cat(paste0(" (setting ",cores," cluster cores)"))
cl <- makeCluster(cores)
}
if(verbose) cat(".\n")
# Apply permutations
if(verbose) cat("Permutation progress: ")
for(pp in 1:nperm){
# Print progress
if(verbose) if(pp%%100 == 1) cat("...")
if(verbose) if(pp%%100 == 0) cat(pp)
# Create empty object to store F-statistic and p-values edgewise
permF <- matrix(0, nrow = nrow(tri_pos), ncol = (nrow(fit_ex)-1)*2)
# Permutate inference dataset
pdata <- idata[sample(1:nrow(idata)),]
# Set (or not) parallelization
if(is.null(cores)){
# Apply example model edgewise
permF <- t(sapply(1:nrow(tri_pos), function(x){
lm_form <- as.formula(paste0("mx[,",x,"]",mod))
fit <- anova(lm(lm_form, pdata, ...))
c(rbind(fit$`F value`[1:(nrow(fit)-1)],fit$`Pr(>F)`[1:(nrow(fit)-1)]))
}))
} else{
# Set variables for local function from the main function environment
clusterExport(cl=cl, varlist=vlist, envir=environment())
# Apply example model edgewise
permF <- t(parSapply(cl, 1:nrow(tri_pos), function(x){
lm_form <- as.formula(paste0("mx[,",x,"]",mod))
fit <- anova(lm(lm_form, pdata, ...))
c(rbind(fit$`F value`[1:(nrow(fit)-1)],fit$`Pr(>F)`[1:(nrow(fit)-1)]))
}))
}
# Initial variables for component search
perm_comp <- matrix(1:nnodes, nrow = nnodes, ncol = nrow(fit_ex)-1)
perm_list <- vector("list",nrow(fit_ex)-1)
# Label component for each node and edge (and store strength as well)
if(thr_idx == 1){
# For each independent variable in the LM
for(ii in 1:length(perm_list)){
# Find F-statistics (F > thrF) edges
edges <- which(permF[,ii*2-thr_idx]>thr)
if(length(edges)>0){
# Generate empty object for component label
component <- vector("integer", length(edges))
# Store strength
strength <- permF[edges,ii*2-thr_idx]-thrF
# Label nodes and edge component
for(jj in 1:length(edges)){
# Maximum label to be changed
max_lab <- max(perm_comp[tri_pos[edges[jj],],ii])
min_lab <- min(perm_comp[tri_pos[edges[jj],],ii])
if(sum(component==max_lab)>0) component[which(component==max_lab)] <- min_lab
pre_idx <- which(perm_comp[,ii]==max_lab)
perm_comp[pre_idx,ii] <- component[jj] <- min_lab
}
# Store results
perm_list[[ii]] <- cbind(component,strength)
}
}
} else{
# For each independent variable in the LM
for(ii in 1:length(perm_list)){
# Find significant (p < thrP) edges
edges <- which(permF[,ii*thr_idx]<thr)
if(length(edges)>0){
# Generate empty object for component label
component <- vector("integer", length(edges))
# Store strength
strength <- permF[edges,ii*thr_idx-1]-qf(1-thrP,fit_ex$Df[ii],fit_ex$Df[length(fit_ex$Df)])
# Label nodes and edge component
for(jj in 1:length(edges)){
# Maximum label to be changed
max_lab <- max(perm_comp[tri_pos[edges[jj],],ii])
min_lab <- min(perm_comp[tri_pos[edges[jj],],ii])
if(sum(component==max_lab)>0) component[which(component==max_lab)] <- min_lab
pre_idx <- which(perm_comp[,ii]==max_lab)
perm_comp[pre_idx,ii] <- component[jj] <- min_lab
}
# Store results
perm_list[[ii]] <- cbind(component,strength)
}
}
}
# Save maximum values for the FWE null distribution
for(ii in 1:length(perm_list)){
if(!is.null(perm_list[[ii]])){
null_dist[pp,(2*ii)-1] <- max(table(perm_list[[ii]][,1]))
null_dist[pp,(2*ii)] <- max(aggregate(perm_list[[ii]][,2]~perm_list[[ii]][,1], perm_list[[ii]], sum)[,2])
}
}
}#for(pp in 1:nperm)
if(!is.null(cores)) stopCluster(cl)
if(verbose) cat(".\n")
# Compute FWE for each observed component
# Quantile observed values within the null distribution
fwe_list <- vector("list",nrow(fit_ex)-1)
# Set names
names(fwe_list) <- rownames(fit_ex)[1:(nrow(fit_ex)-1)]
# Compute quantiles
for(ii in 1:length(obs_list)){
if(!is.null(obs_list[[ii]])){
# Number of components FWE
obs_ncomp <- ncompFWE <- table(obs_list[[ii]][,4])
for(cc in 1:length(obs_ncomp)) ncompFWE[cc] <- sum(null_dist[,(2*ii)-1] > obs_ncomp[cc])/nperm
# Components strength
obs_strn <- strnFWE <- aggregate(obs_list[[ii]][,5]~obs_list[[ii]][,4], obs_list[[ii]], sum)
for(cc in 1:nrow(obs_strn)) strnFWE[cc,2] <- sum(null_dist[,(2*ii)] > obs_strn[cc,2])/nperm
# Save results
DF <- as.data.frame(cbind(obs_strn[,1], c(obs_ncomp), c(ncompFWE), obs_strn[,2], strnFWE[,2]))
names(DF) <- c("Component","ncomp","ncompFWE","strn","strnFWE")
rownames(obs_strn) <- c()
fwe_list[[ii]] <- DF
}
}
# Return results
if(nudist){
return(list(components = obs_list, fwe = fwe_list, nudist = null_dist))
} else{
return(list(components = obs_list, fwe = fwe_list))
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.