Nothing
#' Estimate prior from DR
#'
#' Estimate variance decomposition and priors from DR estimates.
#'
#' @param DR the test/retest dual regression estimates, as an array with
#' dimensions \eqn{M \times N \times (L \times V)}, where \eqn{M} is the number
#' of visits (2), \eqn{N} is the number of subjects, \eqn{L} is the number of
#' brain networks, and \eqn{V} is the number of data locations.
#'
#' (\eqn{L} and \eqn{V} are collapsed because they are treated equivalently
#' in the context of calculating the variance decomposition and priors).
#' @param LV A length-two integer vector giving the dimensions \eqn{L} and
#' \eqn{V} to reshape the result. Default: \code{NULL} (do not reshape the
#' result).
#'
#' @return List of two elements: the priors and the variance decomposition.
#'
#' There are two version of the variance prior: \code{varUB} gives the
#' unbiased variance estimate, and \code{varNN} gives the upwardly-biased
#' non-negative variance estimate. Values in \code{varUB} will need to be
#' clamped above zero before using in \code{BrainMap}.
#'
#' @importFrom fMRItools var_decomp
#' @keywords internal
estimate_prior_from_DR <- function(
DR, LV=NULL){
# Check arguments.
stopifnot(length(dim(DR)) == 3)
nM <- dim(DR)[1] # visits
nN <- dim(DR)[2] # subjects
nLV <- dim(DR)[3] # locations & networks
if (!is.null(LV)) {
stopifnot(is.numeric(nLV) && all(nLV > 0) && all(nLV == round(nLV)))
stopifnot(prod(LV) == nLV)
}
# Variance decomposition
vd <- var_decomp(DR)
# Prior calculation
# Below true for M==2. Double check correct for M > 3? (Not used currently.)
MSB_divM <- (vd$SSB / (nN-1)) / nM
MSE_divM <- (vd$SSR / ((nM-1)*(nN-1))) / nM
prior <- list(
mean = vd$grand_mean,
varUB = MSB_divM - MSE_divM,
varNN = MSB_divM
)
# Format `vd`
vd$nM <- vd$nS <- vd$grand_mean <- NULL # Get rid of redundant entries
## Format `prior`: clamp var est above zero.
# prior$varUB <- pmax(0, prior$varUB)
# Format as matrix.
if (!is.null(LV)) {
prior <- lapply(prior, function(x){ matrix(x, nrow=LV[1], ncol=LV[2]) })
vd <- lapply(vd, function(x){ matrix(x, nrow=LV[1], ncol=LV[2]) })
}
# Return
list(prior=prior, var_decomp=vd)
}
#' Estimate prior from DR estimates (when there are two measurements)
#'
#' Legacy version of \code{\link{estimate_prior_from_DR}}
#'
#' @param DR1,DR2 the test and retest dual regression estimates (\eqn{N \times L \times V})
#'
#' @return List of two elements: the mean and variance priors
#' @keywords internal
estimate_prior_from_DR_two <- function(DR1, DR2){
# Check arguments.
stopifnot(length(dim(DR1)) == length(dim(DR2)))
stopifnot(all(dim(DR1) == dim(DR2)))
N <- dim(DR1)[1]
prior <- list(mean=NULL, var=NULL)
# Mean.
prior$mean <- t(colMeans(DR1 + DR2, na.rm=TRUE) / 2)
# Variance.
SSB <- 2 * colSums(((DR1 + DR2)/2 - rep(t(prior$mean), each=N))^2, na.rm=TRUE)
prior$var_nn <- t(SSB / (N-1)) / 2 # MSB/2
# Unbiased.
# 1. Fastest method.
var_noise <- t( (1/2) * apply(DR1 - DR2, c(2,3), var, na.rm=TRUE) )
prior$var_ub <- prior$var_nn - var_noise/2
# # 2. Previous, equivalent calculation.
# var_tot1 <- apply(DR1, c(2,3), var, na.rm=TRUE)
# var_tot2 <- apply(DR2, c(2,3), var, na.rm=TRUE)
# var_tot <- t((var_tot1 + var_tot2)/2)
# # noise (within-subject) variance
# DR_diff <- DR1 - DR2;
# var_noise <- t((1/2)*apply(DR_diff, c(2,3), var, na.rm=TRUE))
# # signal (between-subject) variance
# prior$var <- var_tot - var_noise
#
# # 3. Another equivalent calculation.
# prior$var <- t(apply(
# abind::abind(DR1, DR2, along=1),
# seq(2, 3),
# function(q){ cov(q[seq(N)], q[seq(N+1, 2*N)], use="complete.obs") }
# ))
# Make negative estimates equal to zero.
prior$var_ub[prior$var_ub < 0] <- 0
prior
}
#' Estimate FC prior
#'
#' @param FC0 The FC estimates from \code{\link{estimate_prior}}.
#' @param nu_adjust Factor by which to adjust estimate of nu. Values < 1 will
#' inflate the prior variance to avoid an over-informative prior on FC.
#' @importFrom matrixStats colVars
#' @keywords internal
estimate_prior_FC <- function(FC0, nu_adjust=1){
nL <- dim(FC0)[3]
stopifnot(nL == dim(FC0)[4])
FC1 <- FC0[1,,,]; FC2 <- FC0[2,,,]
FCavg <- (FC1 + FC2)/2
mean_FC <- apply(FCavg, c(2,3), mean, na.rm=TRUE)
var_FC_between <- apply(FCavg, c(2,3), var, na.rm=TRUE) #this may be an overestimate but that's ok
# mean_FC <- (colMeans(FC1, na.rm=TRUE) + colMeans(FC2, na.rm=TRUE)) / 2
# var_FC_tot <- (apply(FC1, c(2, 3), var, na.rm=TRUE) + apply(FC2, c(2, 3), var, na.rm=TRUE))/2
# #var_FC_within <- 1/2*(apply(FC1-FC2, c(2, 3), var, na.rm=TRUE))
# var_FC_between <- var_FC_tot # var_FC_within #to avoid under-estimating the variance, given that within-subject variance in FC is often high
# var_FC_between[var_FC_between < 0] <- NA
nu_est <- estimate_nu(var_FC_between, mean_FC)
nu_est <- max(nL+4, nu_est*nu_adjust)
list(nu = nu_est,
psi = mean_FC*(nu_est - nL - 1),
mean_empirical = mean_FC,
var_empirical = var_FC_between)
}
#' Cholesky-based FC sampling
#'
#' @param Chol_vals Matrix of Cholesky factorizations (upper triangular values) for all prior sessions (nN*nM x nChol)
#' @param p Pivot/reordering applied to FC matrices prior to Cholesky factorization
#' @param M Number of samples to draw
#' @param chol_diag Indices of diagonal upper triangular elements
#' @param chol_offdiag Indices of off-diagonal upper triangular elements
#' @param Chol_mat_blank A nLxnL matrix indexing the upper triangular elements
#' @importFrom fMRItools UT2mat
#' @importFrom stats rnorm
Chol_samp_fun <- function(Chol_vals, p, M, chol_diag, chol_offdiag, Chol_mat_blank){
nUT <- ncol(Chol_vals)
nL <- length(chol_diag)
logit <- function(p){log(p/(1-p))}
expit <- function(x){exp(x)/(1+exp(x))}
fishZ <- function(r){(1/2)*log((1+r)/(1-r))}
fishZinv <- function(z){(exp(2*z)-1)/(exp(2*z)+1)}
#grab upper triangle of each Cholesky matrix and logit/Fisher transform values
Chol_vals[,1] <- NA #ignore first element since it is always 1
Chol_vals[,chol_diag] <- logit(Chol_vals[,chol_diag]) #logit-transform diagonal values
Chol_vals[,chol_offdiag] <- fishZ(Chol_vals[,chol_offdiag]) #z-transform off-diagonal values
Chol_vals <- Chol_vals[,-1] #sess x (nUT-1) matrix (first element always equals 1)
#perform SVD
chol_svd <- svd(scale(Chol_vals, scale=FALSE)) # X = u %*% diag(d) %*% t(v)
K <- length(chol_svd$d) #keep all the factors
U <- chol_svd$u
V <- chol_svd$v
D <- diag(chol_svd$d)
#take samples of U
sd <- sqrt(mean(apply(U, 2, var))) #estimate SD of the scores (same across PCs)
U_samp <- matrix(rnorm(M*K, mean=0, sd = sd), nrow=M, ncol=K)
#2. transform back to Cholesky elememnts
UDV_samp <- U_samp %*% D %*% t(V)
Chol_mean <- colMeans(Chol_vals) #mean that was was removed prior to PCA
Chol_samp <- UDV_samp + matrix(Chol_mean, nrow=M, ncol=nUT-1, byrow=TRUE) #add back in mean
Chol_samp <- cbind(1, Chol_samp) #add back in first element (always = 1)
Chol_samp[,chol_diag] <- expit(Chol_samp[,chol_diag]) #apply inverse-logit transformation to diagonal elements
Chol_samp[,chol_offdiag] <- fishZinv(Chol_samp[,chol_offdiag]) #apply inverse-Fisher transformation to off-diagonal elements
Chol_samp[,1] <- 1 #correct the first element to equal 1 again
#3. enforce correlation scale
for(col in 2:nL){
inds_col <- Chol_mat_blank[1:col,col]
#for each sample, compute the sum of squares for the current column
SS_col <- rowSums((Chol_samp[,inds_col])^2)
Chol_samp[,inds_col] <- Chol_samp[,inds_col] / sqrt(SS_col)
}
# #compute the mean and variance over samples
# Chol_samp_mean <- UT2mat(colMeans(Chol_samp), LT=0)
# Chol_samp_var <- UT2mat(colVars(Chol_samp), LT=0)
# Chol_samp_mean[lower.tri(Chol_samp_mean)] <- NA
# Chol_samp_var[lower.tri(Chol_samp_var)] <- NA
#reconstruct corresponding FC matrices, compute mean and variance
#these are provided so the user can assess how well the samples emulate the FC edge-wise pop. mean and variance
Chol_samp_list <- apply(Chol_samp, 1, list)
FC_samp_list <- lapply(Chol_samp_list, function(R){
R <- R[[1]]
R_reorder <- UT2mat(R, LT=0)[,order(p)] #reverse-pivot columns of the Cholesky UT matrix
crossprod(R_reorder) #t(R_reorder) %*% R_reorder
})
result <- list(Chol_samp = Chol_samp,
# Chol_samp_mean = Chol_samp_mean,
# Chol_samp_var = Chol_samp_var,
FC_samp_list = FC_samp_list,
#FC_samp_mean = FC_samp_mean,
#FC_samp_var = FC_samp_var,
chol_svd = chol_svd)
return(result)
}
# estimate_prior_FC_chol <- function(FC0_chol){
#
# # FC0_chol is 2 x n_subjects x Q*(Q+1)/2
#
# FC_chol_avg <- (FC0_chol[1,,] + FC0_chol[2,,])/2
# mean_FC_chol <- apply(FC_chol_avg, 2, mean, na.rm=TRUE)
# var_FC_chol1 <- apply(FC0_chol[1,,], 2, var, na.rm=TRUE)
# var_FC_chol2 <- apply(FC0_chol[2,,], 2, var, na.rm=TRUE)
# var_FC_chol <- (var_FC_chol1 + var_FC_chol2)/2
#
# list(mean_empirical = mean_FC_chol,
# var_empirical = var_FC_chol)
#
# }
#' Estimate prior
#'
#' Estimate prior for Bayesian brain mapping based on fMRI data
#'
#' All fMRI data (entries in \code{BOLD} and \code{BOLD2}, and \code{template}) must
#' be in the same spatial resolution.
#'
#' @param BOLD,BOLD2 Vector of subject-level fMRI data in one of the following
#' formats: CIFTI file paths, \code{"xifti"} objects, GIFTI file paths,
#' \code{"gifti"} objects, NIFTI file paths, \code{"nifti"} objects,
#' or \eqn{V \times T} numeric matrices, where \eqn{V} is the number of data
#' locations and \eqn{T} is the number of timepoints.
#
# If GIFTI or \code{"gifti"}, the input can also be a length two list,
# where the first list element is a length \eqn{N} vector for the left
# hemisphere and the second list element is a length \eqn{N} vector for the
# right hemisphere.
#'
#' If \code{BOLD2} is provided it must be in the same format as \code{BOLD};
#' \code{BOLD} will be the test data and \code{BOLD2} will be the retest data.
#' \code{BOLD2} should be the same length as \code{BOLD} and have the same
#' subjects in the same order. If \code{BOLD2} is not provided, \code{BOLD}
#' will be split in half; the first half will be the test data and the second
#' half will be the retest data.
#' @param template Group-level template: either a group ICA (GICA), or a
#' parcellation.
#'
#' A GICA should be provided as a format compatible with \code{BOLD}, or a
#' (vectorized) numeric matrix (\eqn{V \times Q}). Its columns will be
#' centered.
#'
#' A parcellation must be in CIFTI format for use with CIFTI BOLD data (other
#' formats to be implemented in the future). The parcellation should have the
#' same locations as the BOLD and one column, with integer values indicating
#' the parcel to which each location belongs to. Each parcel is modeled as a
#' brain map; instead of the first step of dual regression, the medial
#' timecourse of each parcel is used.
#'
#' @param mask Required if the entries of \code{BOLD} are NIFTI
#' file paths or \code{"nifti"} objects, optional for other formats. For
#' NIFTI, this is a binary array of the same spatial dimensions as the fMRI
#' data, with \code{TRUE} corresponding to in-mask voxels. For other formats,
#' a logical vector.
#' @param inds Numeric indices of the networks in \code{template} to include in
#' the prior. If \code{NULL}, use all of the original networks (default).
#'
#' If \code{inds} is provided, the networks not included will be removed after
#' calculating dual regression, not before. This is because removing the networks
#' prior to dual regression would leave unmodelled signals in the data, which
#' could bias the priors.
#' @inheritParams scale_Param
#' @param scale_sm_surfL,scale_sm_surfR,scale_sm_FWHM Only applies if
#' \code{scale=="local"} and \code{BOLD} represents surface data (CIFTI or
#' GIFTI). To smooth the standard deviation estimates used for local scaling,
#' provide the surface geometries along which to smooth as GIFTI geometry files
#' or \code{"surf"} objects, as well as the smoothing FWHM (default: \code{2}).
#'
#' If \code{scale_sm_FWHM==0}, no smoothing of the local standard deviation
#' estimates will be performed.
#'
#' If \code{scale_sm_FWHM>0} but \code{scale_sm_surfL} and
#' \code{scale_sm_surfR} are not provided, the default inflated surfaces from
#' the HCP will be used.
#'
#' To create a \code{"surf"} object from data, see
#' \code{\link[ciftiTools]{make_surf}}. The surfaces must be in the same
#' resolution as the \code{BOLD} data.
#' @param nuisance (Optional) Nuisance matrices to regress from the BOLD data.
#' Should be a list of matrices, with time along the rows and nuisance signals
#' along the columns, where each entry corresponds to a \code{BOLD} session;
#' or, if \code{BOLD2} is provided, should be a length-2 list of such lists
#' with the first sublist corresponding to \code{BOLD} and the second sublist
#' corresponding to \code{BOLD2}. If \code{NULL}, do not remove any nuisance
#' signals.
#'
#' Nuisance regression is performed in a simultaneous regression with any spike
#' regressors from \code{scrub} and DCT bases from \code{hpf}.
#'
#' Note that the nuisance matrices should be provided with timepoints matching
#' the original \code{BOLD} and \code{BOLD2} irregardless of \code{drop_first}.
#' Nuisance matrices will be truncated automatically if \code{drop_first>0}.
#' @param scrub (Optional) Numeric vectors of integers giving the indices
#' of volumes to scrub from the BOLD data. (List the volumes to remove, not the
#' ones to keep.) Should be a list of such vectors, where each entry
#' corresponds to a \code{BOLD} session; or, if \code{BOLD2} is provided,
#' should be a length-2 list of such lists with the first sublist corresponding
#' to \code{BOLD} and the second sublist corresponding to \code{BOLD2}. If
#' \code{NULL} (default), do not scrub.
#'
#' Scrubbing is performed within a nuisance regression by adding a spike
#' regressor to the nuisance design matrix for each volume to scrub.
#'
#' Note that indices are counted beginning with the first index in the
#' \code{BOLD} session irregardless of \code{drop_first}. The indices will be
#' adjusted automatically if \code{drop_first>0}.
#' @param drop_first (Optional) Number of volumes to drop from the start of each
#' BOLD session. Default: \code{0}.
#' @inheritParams TR_param
#' @inheritParams hpf_param
#' @inheritParams GSR_Param
#' @param brainstructures Only applies if the entries of \code{BOLD} are CIFTI
#' file paths. This is a character vector indicating which brain structure(s)
#' to obtain: \code{"left"} (left cortical surface), \code{"right"} (right
#' cortical surface) and/or \code{"subcortical"} (subcortical and cerebellar
#' gray matter). Can also be \code{"all"} (obtain all three brain structures).
#' Default: \code{c("all")}.
#' @param resamp_res Only applies if the entries of \code{BOLD} are CIFTI file paths.
#' Resample the data upon reading it in? Default: \code{NULL} (no resampling).
#' @param mask Required if \code{BOLD} are NIFTI file paths or \code{"nifti"}
#' objects, and optional for other formats. For NIFTI data, this is a logical
#' array of the same spatial dimensions as the fMRI data, with \code{TRUE}
#' corresponding to in-mask voxels. For other data, this is a logical vector
#' with the same length as the number of locations in \code{template}, with
#' \code{TRUE} corresponding to in-mask locations.
#' @param keep_S Keep the DR estimates of S? If \code{FALSE} (default), do not save
#' the DR estimates and only return the priors. If \code{TRUE}, the DR
#' estimates of S are returned too. If a single file path, save the DR estimates as
#' an RDS file at that location rather than returning them.
# [TO DO] If a list of two vectors of file paths with the same lengths as
# \code{BOLD}, save the DR estimates as individual files at these locations in
# the appropriate format (CIFTI, NIFTI, or RDS files, depending on \code{BOLD}).
#' @param keep_FC Keep the DR estimates of the FC cor(A)? If \code{FALSE} (default), do not save
#' the DR estimates and only return the priors. If \code{TRUE}, the DR
#' estimates of cor(A) and its Cholesky factor are returned too.
#' If a single file path, save the DR estimates as
#' an RDS file at that location rather than returning them.
# [TO DO] If a list of two vectors of file paths with the same lengths as
# \code{BOLD}, save the DR estimates as individual files at these locations in
# the appropriate format (CIFTI, NIFTI, or RDS files, depending on \code{BOLD}).
#' @param Q2,Q2_max Obtain dual regression estimates after denoising? Denoising is
#' based on modeling and removing nuisance ICs. It may result in a cleaner
#' estimate for smaller datasets, but it may be unnecessary (and time-consuming)
#' for larger datasets.
#'
#' Set \code{Q2} to control denoising: use a positive integer to specify the
#' number of nuisance ICs, \code{NULL} to have the number of nuisance ICs
#' estimated by PESEL, or zero (default) to skip denoising.
#'
#' If \code{is.null(Q2)}, use \code{Q2_max} to specify the maximum number of
#' nuisance ICs that should be estimated by PESEL. \code{Q2_max} must be less
#' than \eqn{T * .75 - Q} where \eqn{T} is the minimum number of timepoints in
#' each fMRI scan and \eqn{Q} is the number of networks in the \code{template}.
#' If \code{NULL} (default), \code{Q2_max} will be set to \eqn{T * .50 - Q},
#' rounded.
#' @param covariates Subjects by variables numeric matrix of covariates to take
#' into account for model estimation. Column names should give the name of each
#' variable. Default: \code{NULL} (no covariates). NOTE: Not implemented yet.
#' @param FC Include the functional connectivity prior? Default: \code{TRUE}.
#' @param FC_nPivots Number of pivots to use in Cholesky-based FC prior
#' estimation. Set to zero to skip Cholesky-based FC prior estimation. Default: 100.
#' @param FC_nSamp Number of FC matrix samples to generate across all pivots. This
#' should be a multiple of FC_nPivots.
#' @param varTol Tolerance for variance of each data location. For each scan,
#' locations which do not meet this threshold are masked out of the analysis.
#' Default: \code{1e-6}. Variance is calculated on the original data, before
#' any normalization.
#' @param maskTol For computing the dual regression results for each subject:
#' tolerance for number of locations masked out due to low
#' variance or missing values. If more than this many locations are masked out,
#' a subject is skipped without calculating dual regression. \code{maskTol}
#' can be specified either as a proportion of the number of locations (between
#' zero and one), or as a number of locations (integers greater than one).
#' Default: \code{.1}, i.e. up to 10 percent of locations can be masked out.
#'
#' If \code{BOLD2} is provided, masks are calculated for both scans and then
#' the intersection of the masks is used, for each subject.
#' @param missingTol For computing the variance decomposition across all subjects:
#' tolerance for number of subjects masked out due to low variance or missing
#' values at a given location. If more than this many subjects are masked out,
#' the location's value will be \code{NA} in the priors. \code{missingTol}
#' can be specified either as a proportion of the number of locations (between
#' zero and one), or as a number of locations (integers greater than one).
#' Default: \code{.1}, i.e. up to 10 percent of subjects can be masked out
#' at a given location.
#' @param usePar,wb_path Parallelize the DR computations over subjects? Default:
#' \code{FALSE}. Can be the number of cores to use or \code{TRUE}, which will
#' use the number on the PC minus two. If the input data is in CIFTI format, the
#' \code{wb_path} must also be provided.
#' @param verbose Display progress updates? Default: \code{TRUE}.
#'
#' @importFrom stats cov quantile complete.cases
#' @importFrom fMRItools is_1 is_integer is_posNum colCenter unmask_mat infer_format_ifti_vec all_binary
#' @importFrom abind abind
#'
#' @return A list: the \code{prior} and \code{var_decomp} with entries in
#' matrix format; the \code{mask} of locations without prior values due to
#' too many low variance or missing values; the function \code{params} such as
#' the type of scaling and detrending performed; the \code{dat_struct} which can be
#' used to convert \code{prior} and \code{var_decomp} to \code{"xifti"} or
#' \code{"nifti"} objects if the \code{BOLD} format was CIFTI or NIFTI data;
#' and DR results if \code{isTRUE(keep_S)} and/or \code{isTRUE(keep_FC)}.
#'
#' Use \code{summary} to print a description of the prior results, and
#' for CIFTI-format data use \code{plot} to plot the prior mean and variance
#' estimates. Use \code{\link{export_prior}} to save the priors to
#' individual RDS, CIFTI, or NIFTI files (depending on the \code{BOLD} format).
#' @export
#'
#' @examples
#' nT <- 21
#' nV <- 140
#' nQ <- 6
#' mU <- matrix(rnorm(nV*nQ), nrow=nV)
#' mS <- mU %*% diag(seq(nQ, 1)) %*% matrix(rnorm(nQ*nT), nrow=nQ)
#' BOLD <- list(B1=mS, B2=mS, B3=mS)
#' BOLD <- lapply(BOLD, function(x){x + rnorm(nV*nT, sd=.05)})
#' template <- mU
#' estimate_prior(BOLD=BOLD, template=mU, FC_nSamp=2000, usePar=FALSE)
#'
#' \dontrun{
#' estimate_prior(
#' run1_cifti_fnames, run2_cifti_fnames,
#' gICA_cifti_fname, brainstructures="all",
#' scale="global", TR=0.71, Q2=NULL, varTol=10,
#' usePar=FALSE
#' )
#' }
estimate_prior <- function(
BOLD, BOLD2=NULL,
template,
mask=NULL,
inds=NULL,
scale=c("local", "global", "none"),
scale_sm_surfL=NULL,
scale_sm_surfR=NULL,
scale_sm_FWHM=2,
nuisance=NULL,
scrub=NULL,
drop_first=0,
hpf=0,
TR=NULL,
GSR=FALSE,
Q2=0,
Q2_max=NULL,
covariates=NULL,
brainstructures="all",
resamp_res=NULL,
keep_S=FALSE,
keep_FC=FALSE,
FC=TRUE,
FC_nPivots=100,
FC_nSamp=50000,
varTol=1e-6,
maskTol=.1,
missingTol=.1,
usePar=FALSE,
wb_path=NULL,
verbose=TRUE) {
# Check arguments ------------------------------------------------------------
# Simple argument checks.
if (is.null(scale) || isFALSE(scale)) { scale <- "none" }
if (isTRUE(scale)) {
warning(
"Setting `scale='global'`. Use `'global'` or `'local'` ",
"instead of `TRUE`, which has been deprecated."
)
scale <- "global"
}
scale <- match.arg(scale, c("local", "global", "none"))
stopifnot(fMRItools::is_1(scale_sm_FWHM, "numeric"))
if (is.null(hpf)) { hpf <- 0 }
if (is.null(TR)) {
if (hpf==.01) {
message("Setting `hpf=0` because `TR` was not provided. Either provide `TR` or set `hpf=0` to disable this message.")
hpf <- 0
} else if (hpf!=0) {
stop("Cannot apply `hpf` because `TR` was not provided. Either provide `TR` or set `hpf=0`.")
}
} else {
stopifnot(fMRItools::is_posNum(TR))
stopifnot(fMRItools::is_posNum(hpf, zero_ok=TRUE))
}
stopifnot(fMRItools::is_1(GSR, "logical"))
if (!is.null(Q2)) { # Q2_max checked later.
stopifnot(fMRItools::is_integer(Q2) && (Q2 >= 0))
}
if (!is.null(covariates)) {
warning("`covariates` is not implemented yet.")
if (is.data.frame(covariates)) { covariates <- as.matrix(covariates) }
stopifnot(is.numeric(covariates) && is.matrix(covariates))
covariate_names <- colnames(covariates)
if (is.null(covariate_names)) {
stop("Please provide the names of the covariates as the column names.")
}
nC <- ncol(covariates)
} else {
covariate_names <- NULL
nC <- 0
}
stopifnot(fMRItools::is_1(FC, "logical"))
stopifnot(fMRItools::is_1(varTol, "numeric"))
if (varTol < 0) { message("Setting `varTol=0`."); varTol <- 0 }
stopifnot(fMRItools::is_posNum(maskTol, zero_ok=TRUE))
stopifnot(fMRItools::is_posNum(missingTol, zero_ok=TRUE))
stopifnot(fMRItools::is_1(verbose, "logical"))
real_retest <- !is.null(BOLD2)
# `keep_S`
if (is.logical(keep_S)) {
stopifnot(length(keep_S)==1)
} else {
if (is.character(keep_S)) {
stopifnot(length(keep_S)==1)
if (!dir.exists(dirname(keep_S))) { stop('Directory part of `keep_S` does not exist.') }
if (!endsWith(keep_S, ".rds")) { keep_S <- paste0(keep_S, ".rds") }
} else if (is.list(keep_S)) {
stop("Not supported: `keep_S` must be `TRUE`, `FALSE`, or a single file path.")
# [TO DO]
# if (length(keep_S) != 2) {
# stop("If `keep_S` is a list it must have two entries, each being a vector of file paths the same length as `BOLD`.")
# }
# if (length(keep_S[[1]]) != nN || length(keep_S[[2]]) != nN) {
# stop("If `keep_S` is a list it must have two entries, each being a vector of file paths the same length as `BOLD`.")
# }
# if (!all(dir.exists(dirname(do.call(c, keep_S))))) { stop('At least one directory part of `keep_S` does not exist.') }
}
}
# `keep_FC`
if (is.logical(keep_FC)) {
stopifnot(length(keep_FC)==1)
} else {
if (is.character(keep_FC)) {
stopifnot(length(keep_FC)==1)
if (!dir.exists(dirname(keep_FC))) { stop('Directory part of `keep_FC` does not exist.') }
if (!endsWith(keep_FC, ".rds")) { keep_FC <- paste0(keep_FC, ".rds") }
} else if (is.list(keep_FC)) {
stop("Not supported: `keep_FC` must be `TRUE`, `FALSE`, or a single file path.")
# [TO DO]
# if (length(keep_FC) != 2) {
# stop("If `keep_FC` is a list it must have two entries, each being a vector of file paths the same length as `BOLD`.")
# }
# if (length(keep_FC[[1]]) != nN || length(keep_FC[[2]]) != nN) {
# stop("If `keep_FC` is a list it must have two entries, each being a vector of file paths the same length as `BOLD`.")
# }
# if (!all(dir.exists(dirname(do.call(c, keep_FC))))) { stop('At least one directory part of `keep_FC` does not exist.') }
}
}
# `usePar`
if (!isFALSE(usePar)) {
check_parallel_packages()
cores <- parallel::detectCores()
if (isTRUE(usePar)) { nCores <- cores[1] - 2 } else { nCores <- usePar; usePar <- TRUE }
if (nCores < 2) {
usePar <- FALSE
} else {
cluster <- parallel::makeCluster(nCores, outfile="")
doParallel::registerDoParallel(cluster)
}
}
# `BOLD` and `BOLD2` ---------------------------------------------------------
# Determine the format of `BOLD` and `BOLD2`.
format <- infer_format_ifti_vec(BOLD)[1]
if (real_retest) {
format2 <- infer_format_ifti_vec(BOLD2)[1]
if (format2 != format) {
stop("`BOLD` format is ", format, ", but `BOLD2` format is ", format2, ".")
}
}
FORMAT <- get_FORMAT(format)
FORMAT_extn <- switch(FORMAT,
CIFTI=".dscalar.nii",
GIFTI=".func.gii",
NIFTI=".nii",
MATRIX=".rds"
)
check_req_ifti_pkg(FORMAT)
# Ensure `BOLD2` is the same length.
if (real_retest) {
if (length(BOLD) != length(BOLD2)) {
stop("`BOLD2` represents corresponding retest data for `BOLD`, so it must have the same length as `BOLD`.")
}
}
# If BOLD (and BOLD2) is a CIFTI, GIFTI, NIFTI, or RDS file, check that the file paths exist.
if (format %in% c("CIFTI", "GIFTI", "NIFTI", "RDS")) {
missing_BOLD <- !file.exists(BOLD)
if (all(missing_BOLD)) stop('The files in `BOLD` do not exist.')
if (real_retest) {
missing_BOLD2 <- !file.exists(BOLD2)
if (all(missing_BOLD2)) stop('The files in `BOLD2` do not exist.')
# determine pairs with at least one missing scan
missing_BOLD <- missing_BOLD | missing_BOLD2
rm(missing_BOLD2)
if (all(missing_BOLD)) stop('Files in `BOLD` and/or `BOLD2` are missing such that no complete pair of data exists.')
}
if (any(missing_BOLD)) {
if (real_retest) {
warning('There are ', sum(missing_BOLD), ' pairs of `BOLD` and `BOLD2` with at least one non-existent scan. These pairs will be excluded from prior estimation.')
BOLD <- BOLD[!missing_BOLD]
BOLD2 <- BOLD[!missing_BOLD]
} else {
warning('There are ', sum(missing_BOLD), ' scans in `BOLD` that do not exist. These scans will be excluded from prior estimation.')
BOLD <- BOLD[!missing_BOLD]
}
}
}
nN <- length(BOLD)
# Check `scale_sm_FWHM`
if (scale_sm_FWHM !=0 && FORMAT %in% c("NIFTI", "MATRIX")) {
if (scale_sm_FWHM==2) {
message("Setting `scale_sm_FWHM == 0`.\n")
} else {
if (FORMAT == "NIFTI") {
# [TO DO] make this available
warning( "Setting `scale_sm_FWHM == 0` (Scale smoothing not available for volumetric data.).\n")
} else {
warning( "Setting `scale_sm_FWHM == 0` (Scale smoothing not available for data matrices: use CIFTI/GIFTI files.).\n")
}
}
scale_sm_FWHM <- 0
}
if (!is.null(nuisance)) {
if (!real_retest) {
stopifnot(is.list(nuisance) && length(nuisance)==nN)
} else {
stopifnot(is.list(nuisance) && length(nuisance)==2)
stopifnot(is.list(nuisance[[1]]) && length(nuisance[[1]])==nN)
stopifnot(is.list(nuisance[[2]]) && length(nuisance[[2]])==nN)
# Remake into a length-nN list of length-2 lists
nuisance <- lapply(seq(nN), function(x){ list(nuisance[[1]][[nN]], nuisance[[2]][[nN]]) })
}
}
if (!is.null(scrub)) {
if (!real_retest) {
stopifnot(is.list(scrub) && length(scrub)==nN)
} else {
stopifnot(is.list(scrub) && length(scrub)==2)
stopifnot(is.list(scrub[[1]]) && length(scrub[[1]])==nN)
stopifnot(is.list(scrub[[2]]) && length(scrub[[2]])==nN)
# Remake into a length-nN list of length-2 lists
scrub <- lapply(seq(nN), function(x){ list(scrub[[1]][[nN]], scrub[[2]][[nN]]) })
}
}
# [TO DO]: Mysteriously, RDS format is not working with parallel.
if (usePar && format=="RDS") { stop("Parallel computation not working with RDS file input. Working on this!") }
# `template` ---------------------------------------------------------------------
# Convert `template` to a numeric data matrix or array.
template_parc <- FALSE; template_parc_table <- NULL
if (FORMAT == "CIFTI") {
if (is.character(template)) { template <- ciftiTools::read_cifti(template, brainstructures=brainstructures, resamp_res=resamp_res) }
if (ciftiTools::is.xifti(template, messages=FALSE)) {
#for parcellation (instead of ICA)
if ((ncol(template) == 1) && (all(as.matrix(template) == round(as.matrix(template))))) {
template_parc <- TRUE
template_parc_vals <- sort(unique(c(as.matrix(template))))
template_parc_table <- template$meta$cifti$labels[[1]]
if (is.null(template_parc_table)) {
template_parc_table <- data.frame(
Key = template_parc_vals,
Red = 1,
Green = 1,
Blue = 1,
Alpha = 1,
row.names = paste("ParcelKey", template_parc_vals)
)
} else {
# Drop empty levels
template_parc_table <- template_parc_table[template_parc_table$Key %in% template_parc_vals,]
stopifnot(nrow(template_parc_table) > 1)
}
}
xii1 <- ciftiTools::select_xifti(template, 1) # for formatting output
template <- as.matrix(template)
} else {
# Get `xii1` from first data entry.
xii1 <- BOLD[[1]]
if (is.character(xii1)) {
xii1 <- ciftiTools::read_cifti(xii1, brainstructures=brainstructures, resamp_res=resamp_res, idx=1)
}
xii1 <- ciftiTools::convert_xifti(ciftiTools::select_xifti(xii1, 1), "dscalar")
}
stopifnot(is.matrix(template))
} else if (FORMAT == "GIFTI") {
if (is.character(template)) { template <- gifti::readgii(template) }
ghemi <- template$file_meta["AnatomicalStructurePrimary"]
if (!(ghemi %in% c("CortexLeft", "CortexRight"))) {
stop("AnatomicalStructurePrimary metadata missing or invalid for template.")
}
ghemi <- switch(ghemi, CortexLeft="left", CortexRight="right")
template <- do.call(cbind, template$data)
} else if (FORMAT == "NIFTI") {
if (is.character(template)) { template <- RNifti::readNifti(template) }
stopifnot(length(dim(template)) > 1)
} else if (FORMAT == "MATRIX") {
if (is.character(template)) { template <- readRDS(template) }
stopifnot(is.matrix(template))
}
# `inds`.
if (template_parc) {
nQ <- nrow(template_parc_table)
if (!is.null(inds)) {
if (!all(inds %in% template_parc_table$Key)) stop('Invalid entries in inds argument.')
nL <- length(inds)
} else {
inds <- template_parc_table$Key
nL <- nQ
}
inds <- match(inds, template_parc_table$Key)
} else {
nQ <- dim(template)[length(dim(template))]
if (!is.null(inds)) {
if (!all(inds %in% seq(nQ))) stop('Invalid entries in inds argument.')
nL <- length(inds)
} else {
inds <- seq(nQ)
nL <- nQ
}
}
# [TO DO]: NA in template?
if (FC) {
if (FC_nPivots > 0) {
stopifnot(FC_nPivots == round(FC_nPivots))
FC_nSamp2 <- FC_nSamp/FC_nPivots #number of samples per pivot
if (FC_nSamp2 != round(FC_nSamp2)) {
stop("`FC_nSamp` must be a multiple of `FC_nPivots`.")
}
} else {
if (FC_nPivots != 0) { stop("`FC_nPivots` should be a positive integer or zero.") }
}
}
# `mask` ---------------------------------------------------------------------
# Get `mask` as a logical array (NIFTI) or vector (everything else).
# For NIFTI, append NIFTI header from template to `mask`.
# Apply mask to `template`, and if NIFTI, vectorize `template`.
if (FORMAT == "NIFTI") {
if (is.null(mask)) { stop("`mask` is required.") }
if (is.character(mask)) { mask <- RNifti::readNifti(mask); mask <- array(as.logical(mask), dim=dim(mask)) }
if (dim(mask)[length(dim(mask))] == 1) { mask <- array(mask, dim=dim(mask)[length(dim(mask))-1]) }
if (is.numeric(mask)) {
message("Coercing `mask` to a logical array.\n")
if (!fMRItools::all_binary(mask)) {
message("Warning: values other than 0 or 1 in mask.\n")
}
mask <- array(as.logical(mask), dim=dim(mask))
}
nI <- dim(mask); nV <- sum(mask)
stopifnot(length(dim(template)) %in% c(2, length(nI)+1))
if (length(dim(template)) == length(nI)+1) {
if (length(dim(template)) != 2) {
stopifnot(all(dim(template)[seq(length(dim(template))-1)] == nI))
}
# Append NIFTI header.
mask <- RNifti::asNifti(array(mask, dim=c(dim(mask), 1)), reference=template)
# Vectorize `template`.
if (all(dim(template)[seq(length(dim(template))-1)] == nI)) {
template <- if (template_parc) {
matrix(template[as.logical(mask)], ncol=1)
} else {
matrix(template[rep(as.logical(mask), nQ)], ncol=nQ)
}
stopifnot(nrow(template) == nV)
}
}
} else { #For non-NIFTI data, mask is not required but can be provided
if (!is.null(mask)) {
if (FORMAT == "GIFTI") {
mask <- as.logical(do.call(cbind, gifti::read_gifti(mask)$data))
} else if (FORMAT == "GIFTI2") {
mask <- as.list(mask)
if (length(mask) != 2) {
stop("`mask` should be a length-2 list of GIFTI data: left hemisphere first, right hemisphere second.")
}
for (ii in 1:2) {
mask[[ii]] <- as.logical(do.call(cbind, gifti::read_gifti(mask[[ii]])$data))
}
mask <- do.call(c, mask)
} else if (FORMAT == "MATRIX") {
stopifnot(is.vector(mask))
stopifnot(is.numeric(mask) || is.logical(mask))
}
if (is.numeric(mask)) {
message("Coercing `mask` to a logical vector.\n")
if (!all_binary(mask)) {
message("Warning: values other than 0 or 1 in mask.\n")
}
mask <- as.logical(mask)
}
nI <- length(mask); nV <- sum(mask)
# Check `template` and `mask` dimensions match.
stopifnot(nrow(template) == nI)
# Apply mask to template.
template <- template[mask,,drop=FALSE]
} else { #end if(!is.null(mask))
nI <- nV <- nrow(template)
}
} #end else (not NIFTI format)
# Center `template` columns.
center_Gcols <- TRUE
if (center_Gcols && (!template_parc)) { template <- fMRItools::colCenter(template) }
# Checks now that the data dimensions have been determined. ------------------
if (!is.null(covariates)) { stopifnot(nrow(covariates) == nN) }
# Print summary of data ------------------------------------------------------
format2 <- if (format == "data") { "numeric matrix" } else { format }
if (verbose) {
cat("Data input format: ", format2, "\n")
cat("Image dimensions: ", paste(nI, collapse=" x "), "\n")
cat("Initial in-mask locations: ", nV, "\n")
if (FORMAT == "GIFTI") {
cat("Cortex Hemisphere: ", ghemi, "\n")
}
if (template_parc) {
cat('Number of parcels: ', nL)
if (nL < nQ) { cat(" (out of ", nQ, " original parcels)") }
cat("\n")
} else {
cat('Number of networks: ', nL)
if (nL < nQ) { cat(" (out of ", nQ, " original networks)") }
cat("\n")
}
cat('Number of training subjects: ', nN, "\n")
cat('Number of covariates: ', nC, "\n")
if (FC) {
if (FC_nPivots == 0) {
cat('\nIncluding Cholesky-based FC prior with',
FC_nPivots, 'random pivots.\n')
} else {
cat(paste0('\nIncluding Cholesky-based FC prior with ',
FC_nSamp, " samples (", FC_nPivots, " random pivots x ",
FC_nSamp2, " samples per pivot).\n"))
}
}
}
# Process each scan ----------------------------------------------------------
nM <- 2
# Initialize Cholesky pivots for Chol-based FC prior ---------------------
if (FC) {
if(FC_nPivots > 0){
pivots <- Chol_samp <- Chol_svd <- vector('list', length=FC_nPivots)
FC_nSamp2 <- round(FC_nSamp/FC_nPivots) #number of samples per pivot
for(pp in 1:FC_nPivots){
pivots[[pp]] <- sample(1:nL, nL, replace = FALSE)
}
}
} #end setup for FC prior estimation
if (usePar) {
check_parallel_packages()
# Loop over subjects.
`%dopar%` <- foreach::`%dopar%`
q <- foreach::foreach(ii = seq(nN), .packages=c("ciftiTools", "fMRItools", "BayesBrainMap")) %dopar% {
if (FORMAT=="CIFTI" || FORMAT=="GIFTI") {
# Load the workbench.
if (is.null(wb_path)) {
stop("`wb_path` is required for parallel computation.")
}
requireNamespace("ciftiTools")
ciftiTools::ciftiTools.setOption("wb_path", wb_path)
}
# Initialize output.
out <- list(DR=array(NA, dim=c(nM, 1, nL, nV)))
if (FC) {
out$FC <- array(NA, dim=c(nM, 1, nL, nL))
} #end setup for FC prior estimation
out$sigma_sq <- array(NA, dim=c(nM, 1, nV))
# Dual regression.
if(verbose) { cat(paste0(
'\nSubject ', ii,' of ', nN, ".\n"
)) }
if (real_retest) { B2 <- BOLD2[[ii]] } else { B2 <- NULL }
DR_ii <- try(dual_reg2(
BOLD[[ii]], BOLD2=B2,
format=format,
template=template, template_parc_table=template_parc_table,
mask=mask,
keepA=FC,
GSR=GSR,
scale=scale,
scale_sm_surfL=scale_sm_surfL, scale_sm_surfR=scale_sm_surfR,
scale_sm_FWHM=scale_sm_FWHM,
nuisance=nuisance[[ii]],
scrub=scrub[[ii]], drop_first=drop_first,
hpf=hpf, TR=TR,
Q2=Q2, Q2_max=Q2_max,
brainstructures=brainstructures, resamp_res=resamp_res,
varTol=varTol, maskTol=maskTol,
verbose=verbose
))
# Add results if this subject was not skipped.
# (Subjects are skipped if too many locations are masked out.)
if (is.null(DR_ii)) {
if(verbose) { cat(paste0(
'\tSubject ', ii,' was skipped (too many masked locations).\n'
)) }
} else if (inherits(DR_ii, "try-error")) {
message(paste0(
'\tSubject ', ii,' was skipped (error).\n'
))
if (ii==1) { message(DR_ii); stop("Error on first subject. Check data?") }
} else {
out$DR[1,,,] <- DR_ii$test$S[inds,]
out$DR[2,,,] <- DR_ii$retest$S[inds,]
if(FC) {
out$FC[1,,,] <- cov(DR_ii$test$A[,inds])
out$FC[2,,,] <- cov(DR_ii$retest$A[,inds])
#out$FC_chol[1,,] <- chol(out$FC[1,,,])[upper.tri(out$FC[1,,,], diag=TRUE)]
#out$FC_chol[2,,] <- chol(out$FC[2,,,])[upper.tri(out$FC[2,,,], diag=TRUE)]
}
#save residual variance for rescaling
out$sigma_sq[1,,] <- DR_ii$test$sigma_sq
out$sigma_sq[2,,] <- DR_ii$retest$sigma_sq
}
out
}
# Aggregate.
DR0 <- abind::abind(lapply(q, `[[`, "DR"), along=2)
if (FC) {
FC0 <- abind::abind(lapply(q, `[[`, "FC"), along=2)
#FC0_chol <- abind::abind(lapply(q, `[[`, "FC_chol"), along=2)
}
sigma_sq0 <- abind::abind(lapply(q, `[[`, "sigma_sq"), along=2)
doParallel::stopImplicitCluster()
} else {
# Initialize output.
DR0 <- array(NA, dim=c(nM, nN, nL, nV)) # measurements by subjects by components by locations
if(FC) {
FC0 <- array(NA, dim=c(nM, nN, nL, nL)) # for functional connectivity prior
#FC0_chol <- array(NA, dim=c(nM, nN, nL*(nL+1)/2))
}
sigma_sq0 <- array(NA, dim=c(nM, nN, nV))
for (ii in seq(nN)) {
if(verbose) { cat(paste0(
'\nSubject ', ii,' of ', nN, '.\n'
)) }
if (real_retest) { B2 <- BOLD2[[ii]] } else { B2 <- NULL }
DR_ii <- try(dual_reg2(
BOLD[[ii]], BOLD2=B2,
format=format,
template=template, template_parc_table=template_parc_table,
mask=mask,
keepA=FC,
GSR=GSR,
scale=scale,
scale_sm_surfL=scale_sm_surfL, scale_sm_surfR=scale_sm_surfR,
scale_sm_FWHM=scale_sm_FWHM,
nuisance=nuisance[[ii]],
scrub=scrub[[ii]], drop_first=drop_first,
hpf=hpf, TR=TR,
Q2=Q2, Q2_max=Q2_max,
brainstructures=brainstructures, resamp_res=resamp_res,
varTol=varTol, maskTol=maskTol,
verbose=verbose
))
# Add results if this subject was not skipped.
# (Subjects are skipped if error or too many locations are masked out.)
if (is.null(DR_ii)) {
if(verbose) { cat(paste0(
'\tSubject ', ii,' was skipped (too many masked locations).\n'
)) }
} else if (inherits(DR_ii, "try-error")) {
cat(paste0(
'\tSubject ', ii,' was skipped (error).\n'
))
if (ii==1) { message(DR_ii); stop("Error on first subject. Check data?") }
} else {
DR0[1,ii,,] <- DR_ii$test$S[inds,]
DR0[2,ii,,] <- DR_ii$retest$S[inds,]
if(FC) {
FC0[1,ii,,] <- cov(DR_ii$test$A[,inds])
FC0[2,ii,,] <- cov(DR_ii$retest$A[,inds])
if(!all(round(diag(FC0[1,ii,,]),6) == 1)) stop('var(A) should be 1 but it is not')
if(!all(round(diag(FC0[2,ii,,]),6) == 1)) stop('var(A) should be 1 but it is not')
#FC0_chol[1,ii,] <- chol(FC0[1,ii,,])[upper.tri(FC0[1,ii,,], diag=TRUE)]
#FC0_chol[2,ii,] <- chol(FC0[2,ii,,])[upper.tri(FC0[2,ii,,], diag=TRUE)]
}
sigma_sq0[1,ii,] <- DR_ii$test$sigma_sq
sigma_sq0[2,ii,] <- DR_ii$retest$sigma_sq
}
}
# Conserve memory.
rm(DR_ii)
}
# Aggregate results, and compute priors. ----------------------------------
# Mask out locations for which too many subjects do not have data
# because of missing values or low variance.
nVm <- nV # `nVm` will be the number of locations after masking with `mask2`.
if (missingTol < 1) { missingTol <- missingTol * nN }
# Assume is.na(DR0[mm,,,]) is the same for all mm
# Assume is.na(DR0[,,cc,]) is the same for all cc
NA_counts <- colSums(is.na(DR0[1,,1,]))
mask2 <- NA_counts < missingTol
use_mask2 <- !all(mask2)
if (use_mask2) {
if (all(!mask2)) { stop(
"No locations meet the minimum number of subjects with data (",
round(missingTol, digits=3), "). Check `maskTol` and `missingTol`."
) }
DR0 <- DR0[,,,mask2,drop=FALSE]
nVm <- sum(mask2)
sigma_sq0 <- sigma_sq0[,,mask2]
}
# Note that `NA` values may still exist in `DR0`.
#Average `sigma_sq0` across subjects and test/retest.
sigma_sq0 <- colMeans(sigma_sq0, dims=2, na.rm=TRUE)
# Vectorize components and locations
DR0 <- array(DR0, dim=c(nM, nN, nL*nVm))
if (verbose) { cat("\nCalculating spatial prior.\n") }
# Estimate the mean and variance priors.
# Also obtain the variance decomposition.
x <- estimate_prior_from_DR(DR0, c(nL, nVm))
prior <- lapply(x$prior, t)
var_decomp <- lapply(x$var_decomp, t)
rm(x)
#rescale mean and variance of S to standardize residual var
#rescaling residuals by \sigma_v implies that s_v rescaled in the same way
#this is the same as rescaling mean(s_v) and var(s_v)
rescale <- sqrt(sigma_sq0)/mean(sqrt(sigma_sq0), na.rm=TRUE)
rescale <- matrix(rescale, nrow=length(sigma_sq0), ncol=nL)
prior$mean <- prior$mean / rescale #scale mean(S)
prior[2:3] <- lapply(prior[2:3], function(x) return(x / (rescale^2)) ) #scale var(S)
var_decomp <- lapply(var_decomp, function(x) return(x / (rescale^2) ) ) #scale var(S)
# Unmask the data matrices (relative to `mask2`, not `mask`).
if (use_mask2) {
prior <- lapply(prior, fMRItools::unmask_mat, mask=mask2)
var_decomp <- lapply(var_decomp, fMRItools::unmask_mat, mask=mask2)
}
# Estimate FC prior
if(FC){
if (verbose) { cat("\nCalculating parametric FC prior.\n") }
prior$FC <- estimate_prior_FC(FC0) #estimate IW parameters
#for Cholesky-based FC prior
FC_samp_list <- NULL #collect FC samples to compute mean/var at end
if(FC_nPivots > 0){
if (verbose) { cat("\nGenerating samples for Cholesky-based FC prior.\n") }
#do Cholesky-based FC prior sampling
FC_samp_logdet <- NULL
FC_samp_cholinv <- vector('list', length = FC_nPivots)
#setup
nUT <- nL*(nL+1)/2 #number of upper triangular elements
Chol_mat_blank <- matrix(0, nL, nL)
Chol_mat_blank[upper.tri(Chol_mat_blank, diag=TRUE)] <- 1:nUT #indices of UT elements
chol_diag <- diag(Chol_mat_blank) #which Cholesky UT elements are on the diagonal
chol_offdiag <- Chol_mat_blank[upper.tri(Chol_mat_blank, diag=FALSE)] #which Cholesky UT elements are on the diagonal
#for each pivot: (steps 1-4 performed by Chol_samp function)
#1. perform Cholesky on each session, transform values to real line
#2. vectorize and perform SVD (save D, V and sd(U) for each pivot to facilitate additional sampling)
#3. draw FC_nSamp2 samples, construct Cholesky elements, and reverse transform
#4. rescale values to correspond to a correlation matrix
#5. for each sample, compute log determinant and inverse
# a) log|X| = 2*sum(log(diag(L))), where L is the upper or lower triangular Cholesky matrix
# b) a'X^(-1)a can be written b'b, where b = R_p^(-T)*P*a, where R_p is the upper triangular Cholesky matrix
count_pivot_fails <- 0
pivot_failures <- c()
for(pp in 1:FC_nPivots){
# count errors in chol
counter_env <- new.env()
counter_env$count <- 0
#perform Cholesky decomposition on each matrix in FC0 --> dim(FC0) = c(nM, nN, nL, nL)
Chol_p <- apply(FC0, 1:2, function(x, pivot){ # dim = nChol x nM x nN
xp <- x[pivot,pivot]
# chol will return an error when there are NAs in FCO or when there is any rank deficiency in one of the sessions
# if error, return NA instead
tryCatch({
chol_xp <- chol(xp)
chol_xp[upper.tri(chol_xp, diag = TRUE)]
}, error = function(e) {
counter_env$count <- counter_env$count + 1
rep(NA_real_, length(xp[upper.tri(xp, diag = TRUE)]))
})
}, pivot = pivots[[pp]])
if (counter_env$count > 0) {
count_pivot_fails <- count_pivot_fails + 1
pivot_failures <- c(pivot_failures, counter_env$count)
}
#rbind across sessions to form a matrix
Chol_mat_p <- rbind(t(Chol_p[,1,]), t(Chol_p[,2,])) # dim = nM*nN x nChol
# remove all rows with NA before calling Chol_samp_fun
Chol_mat_p <- Chol_mat_p[stats::complete.cases(Chol_mat_p), ]
#take samples
Chol_samp_pp <- Chol_samp_fun(Chol_mat_p, p=pivots[[pp]], M=FC_nSamp2,
chol_diag, chol_offdiag, Chol_mat_blank) #returns a nSamp2 x nChol matrix
Chol_samp[[pp]] <- Chol_samp_pp$Chol_samp
Chol_svd[[pp]] <- Chol_samp_pp$chol_svd
FC_samp_list <- c(FC_samp_list, Chol_samp_pp$FC_samp_list)
# 5(a) compute the log(det(FC)) for each pivoted Cholesky sample
logdet_p <- 2 * rowSums(log(Chol_samp[[pp]][,chol_diag])) #log(det(X)) = 2*sum(log(diag(R)))
FC_samp_logdet <- c(FC_samp_logdet, logdet_p)
# 5(b) compute the inverse of each pivoted Cholesky sample
# If chol_inv is the inverse of the pivoted UT cholesky matrix,
# then chol_inv[op,] %*% t(chol_inv[op,]) gives inv(FC), where op = order(pivot)
op <- order(pivots[[pp]])
FC_samp_cholinv[[pp]] <- t(apply(Chol_samp[[pp]], 1, function(x){
x_mat <- UT2mat(x, LT=0) #form into a matrix (upper triangular)
x_mat_inv <- backsolve(x_mat, diag(nL)) #take the inverse of an UT matrix fast
#x_mat_inv_reo <- x_mat_inv[op,] #if we reverse the pivot -- no longer upper triangular! so we will need to do this later.
x_mat_inv[upper.tri(x_mat_inv, diag=TRUE)] #the inverse of an UT matrix is also upper triangular
}, simplify=TRUE)) #this is extremely fast
} #end loop over pivots
#compute mean across FC samples
M_all <- FC_nPivots*FC_nSamp2 #total number of samples
FC_samp_mean <- Reduce("+", FC_samp_list)/M_all #mean of FC
FC_samp_var <- Reduce("+", lapply(FC_samp_list, function(x){ (x - FC_samp_mean)^2 }))/M_all #var of FC
# Compute the maximum eigenvalue of each FC^(-1) (same as 1 / min eigenvalue of each FC sample)
FC_samp_maxeig <- sapply(FC_samp_list, function(x){
vals <- eigen(x, only.values = TRUE)$values
return(1/min(vals))
})
prior$FC_Chol <- list(Chol_samp = Chol_samp, #pivoted Cholesky factors for every sample
FC_samp_logdet = FC_samp_logdet, #log determinant values for every sample
FC_samp_cholinv = FC_samp_cholinv, #pivoted Cholesky inverses for every sample
FC_samp_maxeig = FC_samp_maxeig, #maximum eigenvalue of inverse FC samples
FC_samp_mean = FC_samp_mean, #mean of FC samples
FC_samp_var = FC_samp_var, #var of FC samples
Chol_svd = Chol_svd,
pivots = pivots) #need to use these along with FC_samp_cholinv to determine inv(FC)
} #end Cholesky-based FC prior estimation
if (length(pivot_failures) > 0) {
mean_failures_per_failed_pivot <- mean(pivot_failures)
min_failures_per_failed_pivot <- min(pivot_failures)
max_failures_per_failed_pivot <- max(pivot_failures)
} else {
mean_failures_per_failed_pivot <- 0
min_failures_per_failed_pivot <- 0
max_failures_per_failed_pivot <- 0
}
if (verbose) {
cat("\n--- Cholesky Error Summary ---\n")
cat("Number of pivots with any failures:", count_pivot_fails, "/", FC_nPivots, "\n")
cat("Failures per failed pivot - mean:", mean_failures_per_failed_pivot, "| range:", min_failures_per_failed_pivot, "to", max_failures_per_failed_pivot, "\n")
}
}
# [TO DO]: replace with fMRIscrub::unmask_mat or a vector version
unmask_vec <- function(vec, mask) {
vec2 <- rep(NA, length(mask))
vec2[mask] <- vec
vec2
}
# Format result ---------------------------------------------------
if (use_mask2) {
sigma_sq0 <- unmask_vec(sigma_sq0, mask2)
}
# Keep DR estimate of S
if (!isFALSE(keep_S)) {
DR0 <- array(DR0, dim=c(nM, nN, nL, nVm)) # Undo vectorize
if (use_mask2) {
DR0temp <- array(NA, dim=c(dim(DR0)[seq(3)], length(mask2)))
DR0temp[,,,mask2] <- DR0
DR0 <- DR0temp
}
if (is.character(keep_S)) {
saveRDS(DR0, keep_S)
keep_S <- FALSE # no longer need it.
} else if (!isTRUE(keep_S)) {
warning("`keep_S` should be `TRUE`, `FALSE`, or a file path. Using `FALSE`.")
keep_S <- FALSE
}
}
if (!keep_S) { rm(DR0) }
# Keep DR estimate of FC
if (FC) {
if (!isFALSE(keep_FC)) {
if (is.character(keep_FC)) {
saveRDS(FC0, keep_FC) # in this case we don't save the Cholesky factors
keep_FC <- FALSE # no longer need it.
} else if (!isTRUE(keep_FC)) {
warning("`keep_FC` should be `TRUE`, `FALSE`, or a file path. Using `FALSE`.")
keep_FC <- FALSE
}
}
if (!keep_FC) rm(FC0)
}
tparams <- list(
FC=FC, FC_nPivots=FC_nPivots, FC_nSamp=FC_nSamp,
num_subjects=nN, num_visits=nM,
inds=inds,
GSR=GSR, scale=scale,
scale_sm_FWHM=scale_sm_FWHM,
hpf=hpf, TR=TR,
Q2=Q2, Q2_max=Q2_max,
covariate_names=covariate_names,
brainstructures=brainstructures, resamp_res=resamp_res,
varTol=varTol, maskTol=maskTol, missingTol=missingTol,
pseudo_retest=!real_retest
)
# If masking was performed, and if verbose, report the NA count in priors.
# If no masking was performed, remove unnecessary metadata.
if (use_mask2) {
if (verbose) {
cat("Template locations removed (filled with `NA`) due to varTol & missingTol:", sum(!mask2), "\n")
}
} else {
mask2 <- NULL
}
if (template_parc && FORMAT=="CIFTI") {
xii1 <- ciftiTools::convert_xifti(xii1, "dscalar")
}
dat_struct <- switch(FORMAT,
CIFTI = ciftiTools::newdata_xifti(xii1, 0),
GIFTI = list(hemisphere=ghemi),
NIFTI = NULL,
MATRIX = NULL
)
# Results list.
result <- list(
prior=prior,
var_decomp=var_decomp,
sigma_sq0=sigma_sq0,
mask_input=mask,
mask=mask2,
params=tparams,
dat_struct=dat_struct,
template_parc_table=template_parc_table
)
# Add DR if applicable.
if (keep_S) { result$S <- DR0 }
if (FC && keep_FC) { result$FC <- FC0 }
# Return results.
class(result) <- paste0("prior.", tolower(FORMAT))
result
}
#' @rdname estimate_prior
#'
estimate_prior.cifti <- function(
BOLD, BOLD2=NULL,
template, inds=NULL,
scale=c("local", "global", "none"),
scale_sm_surfL=NULL, scale_sm_surfR=NULL, scale_sm_FWHM=2,
nuisance=NULL,
scrub=NULL,
drop_first=0,
hpf=0, TR=NULL,
GSR=FALSE,
Q2=0, Q2_max=NULL,
brainstructures="all", resamp_res=resamp_res,
keep_S=FALSE, keep_FC=FALSE,
FC=TRUE,
varTol=1e-6, maskTol=.1, missingTol=.1,
usePar=FALSE, wb_path=NULL,
verbose=TRUE) {
estimate_prior(
BOLD=BOLD, BOLD2=BOLD2,
template=template, inds=inds,
scale=scale, scale_sm_surfL=scale_sm_surfL, scale_sm_surfR=scale_sm_surfR,
scale_sm_FWHM=scale_sm_FWHM,
nuisance=nuisance, scrub=scrub, drop_first=drop_first,
hpf=hpf, TR=TR,
GSR=GSR,
Q2=Q2, Q2_max=Q2_max,
brainstructures=brainstructures, resamp_res=resamp_res,
keep_S=keep_S,
FC=FC,
varTol=varTol, maskTol=maskTol, missingTol=missingTol,
usePar=usePar, wb_path=wb_path,
verbose=verbose
)
}
#' @rdname estimate_prior
#'
estimate_prior.gifti <- function(
BOLD, BOLD2=NULL,
template, inds=NULL,
scale=c("local", "global", "none"),
scale_sm_surfL=NULL, scale_sm_surfR=NULL, scale_sm_FWHM=2,
nuisance=NULL,
scrub=NULL,
drop_first=0,
hpf=0, TR=NULL,
GSR=FALSE,
Q2=0, Q2_max=NULL,
brainstructures="all",
keep_S=FALSE,keep_FC=FALSE,
FC=TRUE,
varTol=1e-6, maskTol=.1, missingTol=.1,
usePar=FALSE, wb_path=NULL,
verbose=TRUE) {
estimate_prior(
BOLD=BOLD, BOLD2=BOLD2,
template=template, inds=inds,
scale=scale, scale_sm_surfL=scale_sm_surfL, scale_sm_surfR=scale_sm_surfR,
scale_sm_FWHM=scale_sm_FWHM,
nuisance=nuisance, scrub=scrub, drop_first=drop_first,
hpf=hpf, TR=TR,
GSR=GSR,
Q2=Q2, Q2_max=Q2_max,
brainstructures=brainstructures,
keep_S=keep_S,
FC=FC,
varTol=varTol, maskTol=maskTol, missingTol=missingTol,
usePar=usePar, wb_path=wb_path,
verbose=verbose
)
}
#' @rdname estimate_prior
#'
estimate_prior.nifti <- function(
BOLD, BOLD2=NULL,
template, inds=NULL,
scale=c("local", "global", "none"),
nuisance=NULL,
scrub=NULL,
drop_first=0,
hpf=0, TR=NULL,
GSR=FALSE,
Q2=0, Q2_max=NULL,
mask=NULL,
keep_S=FALSE,keep_FC=FALSE,
FC=TRUE,
varTol=1e-6, maskTol=.1, missingTol=.1,
usePar=FALSE, wb_path=NULL,
verbose=TRUE) {
estimate_prior(
BOLD=BOLD, BOLD2=BOLD2,
template=template, inds=inds,
scale=scale,
nuisance=nuisance, scrub=scrub, drop_first=drop_first,
hpf=hpf, TR=TR,
GSR=GSR,
Q2=Q2, Q2_max=Q2_max,
mask=mask,
keep_S=keep_S,
FC=FC,
varTol=varTol, maskTol=maskTol, missingTol=missingTol,
usePar=usePar, wb_path=wb_path,
verbose=verbose
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.