#' helper function to generate confounds txt file for inclusion as additional regressors
#'
#' @details This function checks for whether an l1 confounds file already exists in the expected location.
#' If it does not, the l1 confounds file will be generated according to the $confound_settings$l1_confound_regressors
#' variable. If that is NULL, no confounds will be created.
#'
#' Confounds are generated by looking for motion parameters based on $confound_settings$motion_params_file and
#' $confound_settings$confound_input_file. These are integrated, then columns that are requested in
#' $confound_settings$l1_confound_regressors are written to a text file in the analysis subfolder for each subject
#'
#' Likewise, any run-level exclusions are tested according to $confound_settings$exclude_run. This should be a
#' quoted expression that can tested against the confounds data.frame such that a single TRUE/FALSE is returned
#' indicating whether a run is excluded (TRUE) or retained (FALSE) for higher-level analyses.
#'
#' @return a list containing: confounds=<location of l1 confounds file>, exclude_run=<TRUE/FALSE denoting run exclusion,
#' exclude_data=<location of file containing columns used to calculate run exclusion>
#' @param run_df a single-row data.frame containing information about the run whose confounds should be calculated
#' @param id The subject id
#' @param session The session number
#' @param run_number The run number
#' @param gpa a \code{glm_pipeline_arguments} object containing pipeline specification
#' @param demean whether to demean confounds after calculation
#' @keywords internal
#' @importFrom dplyr n
#' @importFrom checkmate assert_data_frame test_null assert_integerish assert_class
#' @importFrom data.table setnames fread
get_l1_confounds <- function(run_df = NULL, id = NULL, session = NULL, run_number = NULL, gpa, demean=TRUE) {
# For now, we have a fork in the inputs here. If a user specifies id, session, and run_number separately, then use those
# If a single-row data.frame is passed in, use that, and update its fields. The latter is our default in finalize_pipeline_configuration
if (!is.null(run_df)) {
checkmate::assert_data_frame(run_df)
return_run_df <- TRUE
id <- run_df$id
session <- run_df$session
run_number <- run_df$run_number
} else {
return_run_df <- FALSE
# lookup run information
run_df <- gpa$run_data %>%
dplyr::filter(id == !!id & session == !!session & run_number == !!run_number)
}
# populate drop_volumes in run_df if not present
if (!"drop_volumes" %in% names(run_df)) run_df$drop_volumes <- gpa$drop_volumes
if (checkmate::test_null(id)) stop("get_l1_confounds requires a specific id for lookup")
checkmate::assert_integerish(session, null.ok=TRUE)
if (is.null(session)) session <- 1
checkmate::assert_integerish(run_number, lower=1, null.ok = FALSE)
checkmate::assert_class(gpa, "glm_pipeline_arguments")
lg <- lgr::get_logger("glm_pipeline/l1_setup")
lg$set_threshold(gpa$lgr_threshold)
# these columns should be populated to run_df for every case
calculation_columns <- c(
"run_nifti", "run_volumes", "orig_volumes", "first_volume",
"last_volume", "truncate_volumes", "l1_confound_file", "exclude_run"
)
# use defaults if missing in run_df (ensure same return for every input)
calculation_defaults <- data.frame(
orig_volumes = NA_integer_, first_volume = NA_integer_, last_volume = NA_integer_,
truncate_volumes = NA_integer_, l1_confound_file = NA_character_, exclude_run = FALSE, run_volumes = NA_integer_
)
run_df <- populate_defaults(run_df, calculation_defaults)
# structure to return if this lookup fails (predictable elements so that caller can handle them)
if (isTRUE(return_run_df)) {
empty_set <- run_df # unmodified copy of run_df
} else {
empty_set <- list(
l1_confound_file = NA_character_, l1_confounds_df = data.frame(),
exclude_run = FALSE, exclude_data = data.frame(), truncation_data = data.frame()
)
}
if (nrow(run_df) > 1L) {
print(run_df)
lg$error("Multiple matches for a single run in get_l1_confounds.")
return(empty_set)
} else if (nrow(run_df) == 0L) {
lg$error("Unable to locate a record in gpa$run_data for id %s, session %s, run_number %s.", id, session, run_number)
return(empty_set)
}
# Park confound files in an analysis-level subfolder, not a particular model, since they are re-used across models.
# Note: normalizePath will fail to evaluate properly if directory does not exist
analysis_outdir <- get_output_directory(id=id, session=session, gpa=gpa, create_if_missing = TRUE, what="sub")
# Determine whether we should be returning information about l1 confound regressors
# and whether this information has already been calculated
generate_l1_confounds <- FALSE
if (is.null(gpa$confound_settings$l1_confound_regressors) && is.null(gpa$confound_settings$spike_volumes)) {
# no confounds requested
expected_l1_confound_file <- NA_character_
} else {
expected_l1_confound_file <- file.path(analysis_outdir, paste0("run", run_number, "_l1_confounds.txt"))
if (!file.exists(expected_l1_confound_file)) {
lg$debug("Expected confounds file does not exist: %s.", expected_l1_confound_file)
generate_l1_confounds <- TRUE
}
}
# default empty data.frames for exclusion and truncation (in case of NULL exclude or truncate criteria)
exclude_data <- data.frame()
truncation_data <- data.frame()
l1_cached_df <- read_df_sqlite(gpa = gpa, id = id, session = session, run_number = run_number, table = "l1_run_calculations")
# determine whether we should be returning information about run exclusions
# and whether this information has already been calculated
generate_run_exclusion <- FALSE
if (is.null(gpa$confound_settings$exclude_run)) {
# no basis for exclusion (all runs okay)
exclude_run <- FALSE
} else {
exclude_data <- read_df_sqlite(gpa = gpa, id = id, session = session, run_number = run_number, table = "l1_exclusion_data")
if (!is.null(l1_cached_df)) {
exclude_run <- as.logical(l1_cached_df$exclude_run)
} else {
lg$debug("No record of run exclusion exists in database: %s.", gpa$output_locations$sqlite_db)
exclude_run <- NA #default to missing
generate_run_exclusion <- TRUE
}
}
# Determine whether we should be returning information about run truncation and whether this information has already been calculated.
# Note that truncation has to be calculated alongside timing events since truncate expressions often involve timing
generate_run_truncation <- FALSE
if (!is.null(gpa$confound_settings$truncate_run)) {
truncation_data <- read_df_sqlite(gpa = gpa, id = id, session = session, run_number = run_number, table = "l1_truncation_data")
if (is.null(truncation_data)) {
lg$debug("No record of run truncation data exists in database: %s.", gpa$output_locations$sqlite_db)
generate_run_truncation <- TRUE
}
}
# If run exclusion, l1 confounds, and truncation data exist, just use the precalculated information
if (!(generate_l1_confounds || generate_run_exclusion || generate_run_truncation)) {
if (!is.na(expected_l1_confound_file)) lg$debug("Returning extant file: %s in get_l1_confounds", expected_l1_confound_file)
# populate cached values for run if not recalculating anything
run_df$exclude_run <- exclude_run
run_df$l1_confound_file <- expected_l1_confound_file
if (is.null(l1_cached_df)) {
msg <- "Not re-generating confound information, but l1_cached_df is NULL. Cannot proceed."
lg$error(msg)
stop(msg)
} else {
# SQLite cannot support logical/boolean types, need to convert 0/1 outcome
l1_cached_df$exclude_run <- as.logical(l1_cached_df$exclude_run)
}
# add/replace any cached calculations.
# TODO: the logic of exclude_run and l1_confound above seem redundant. Likewise, the 'is.null(l1_cached_df)'
# above seems like it could be removed/factored out.
run_df[, calculation_columns] <- l1_cached_df[, calculation_columns]
if (isTRUE(return_run_df)) {
return(run_df)
} else {
return(list(
l1_confound_file = expected_l1_confound_file,
l1_confounds_df = data.table::fread(expected_l1_confound_file, data.table = FALSE),
exclude_run = exclude_run, exclude_data = exclude_data, truncation_data = truncation_data
))
}
}
# read external confounds file (contains all volumes)
confound_df <- read_df_sqlite(gpa = gpa, id = id, session = session, run_number = run_number, table = "l1_confound_inputs")
if (is.null(confound_df) && isTRUE(run_df$confound_input_file_present[1])) {
lg$debug("Generating l1 confounds for id: %s, session: %s, run_number: %s", id, session, run_number)
cfile <- get_mr_abspath(run_df, "confound_input_file")[1]
lg$debug("Reading confound file: %s", cfile)
confound_df <- tryCatch(data.table::fread(cfile, data.table=FALSE, na.strings=gpa$confound_settings$na_strings),
error = function(e) {
lg$error("Failed to read confound file: %s with error %s", cfile, as.character(e))
return(NULL)
})
if (!is.null(gpa$confound_settings$confound_input_colnames) && !is.null(confound_df)) {
if (length(gpa$confound_settings$confound_input_colnames) != ncol(confound_df)) {
lg$warn(
"Mismatch in number of columns in confound file: %s relative to $confound_settings$confound_input_colnames",
run_df$confound_input_file[1]
)
} else {
# only set names in confound_df if the number of columns matches
data.table::setnames(confound_df, gpa$confound_settings$confound_input_colnames)
}
}
# cache original confounds to db
insert_df_sqlite(gpa,
id = id, session = session, run_number = run_number,
data = confound_df, table = "l1_confound_inputs", immediate=TRUE
)
}
#read motion parameters file (all volumes)
motion_df <- read_df_sqlite(gpa = gpa, id = id, session = session, run_number = run_number, table = "l1_motion_parameters")
if (is.null(motion_df) && isTRUE(run_df$motion_params_present[1])) {
lg$debug("Generating motion params for id: %s, session: %s, run_number: %s", id, session, run_number)
mfile <- get_mr_abspath(run_df, "motion_params_file")[1]
lg$debug("Reading motion file: %s", mfile)
# Note: Avoid demeaning columns within generate_motion_regressors so that calculated regressors do not
# change scale prior to testing the run exclusion criteria below. Demeaning can be applied safely thereafter.
# Note: do not specify drop_volumes or last_volume at this phase -- we want original (full length) motion params
motion_df <- tryCatch({
generate_motion_regressors(
mfile,
col.names = gpa$confound_settings$motion_params_colnames,
demean=FALSE, na.strings=gpa$confound_settings$na_strings, lg=lg
)}, error = function(e) {
lg$error("Failed to read motion file: %s with error %s", run_df$motion_params_file[1], as.character(e))
return(NULL)
})
# cache motion parameters to db
insert_df_sqlite(gpa,
id = id, session = session, run_number = run_number, data = motion_df,
table = "l1_motion_parameters", immediate=TRUE
)
}
# combine motion and confound files (all volumes)
if (is.null(motion_df) && is.null(confound_df)) {
lg$info(
"Neither confounds nor motion parameters are available for id: %s, session: %s, run_number: %d",
id, session, run_number
)
return(empty_set)
} else if (is.null(motion_df)) {
all_confounds <- confound_df
} else if (is.null(confound_df)) {
all_confounds <- motion_df
} else {
if (nrow(motion_df) != nrow(confound_df)) {
lg$error("Number of rows in motion_df is: %d and in confound_df is %d. Cannot combine", nrow(motion_df), nrow(confound_df))
return(empty_set)
} else {
# prefer confound_df as authoritative in cases where motion_df has overlapping columns
overlap_names <- intersect(names(motion_df), names(confound_df))
if (length(overlap_names) > 0L) {
lg$info(
"Motion parameters have overlapping columns with confounds file: %s. Preferring confounds to motion params",
run_df$confound_input_file[1L]
)
lg$info("Overlap: %s", overlap_names)
data.table::setnames(motion_df, old = overlap_names, new = paste0(overlap_names, ".mot"))
}
all_confounds <- dplyr::bind_cols(confound_df, motion_df)
}
}
# add volume and time columns to confounds in case these are helpful in exclusion or truncation expressions
all_confounds <- all_confounds %>%
mutate(volume = 1:n(), time = (volume - 1) * run_df$tr[1L])
# handle NA values in confounds -> convert to 0
has_na <- sapply(all_confounds, anyNA)
if (any(has_na)) {
lg$warn("NA values found in confounds file: %s", cfile)
lg$warn("These will be converted to 0s before evaluating exclude_run and writing l1_confound_file: %s", expected_l1_confound_file)
all_confounds[, has_na] <- as.data.frame(apply(all_confounds[, has_na], 2, function(x) {
x[is.na(x)] <- 0
return(x)
}))
}
# retain columns used in calculating run truncation
if (isTRUE(generate_run_truncation) && !is.null(gpa$confound_settings$truncate_run)) {
truncation_data <- all_confounds[, intersect(gpa$confound_settings$run_truncation_columns, names(all_confounds)), drop = FALSE]
insert_df_sqlite(gpa,
id = id, session = session, run_number = run_number, data = truncation_data,
table = "l1_truncation_data", immediate = TRUE
)
}
# At this point, truncation_data should be populated. Perform trucation on run_nifti if needed and update run_df with final volume settings
run_df <- truncate_runs(run_df, gpa, analysis_outdir, truncation_data, lg)
# truncate confounds to match
all_confounds <- all_confounds[run_df$first_volume:run_df$last_volume, , drop = FALSE]
motion_df <- motion_df[run_df$first_volume:run_df$last_volume, , drop = FALSE]
# calculate whether to retain or exclude this run
if (isTRUE(generate_run_exclusion) && !is.null(gpa$confound_settings$exclude_run)) {
if (!all(gpa$confound_settings$run_exclusion_columns %in% names(all_confounds))) {
lg$warn("Missing exclusion columns for subject: %s, session: %s", run_df$id[1L], run_df$session[1L])
lg$warn("Column: %s", setdiff(gpa$confound_settings$run_exclusion_columns, names(all_confounds)))
lg$warn("We will exclude this run from analysis until this is resolved!")
exclude_run <- TRUE
} else {
# evaluate run exclusion expression
exclude_run <- tryCatch(with(all_confounds, eval(parse(text = gpa$confound_settings$exclude_run))),
error = function(e) {
lg$error(
"Problem evaluating run exclusion for subject: %s, session: %s, run_number: %s, expr: %s",
id, session, run_number,
gpa$confound_settings$exclude_run
)
lg$error("Defaulting to exclusion of run.")
return(TRUE)
}
)
if (length(exclude_run) > 1L) {
lg$error("Run exclusion expression %s returned %d results!", gpa$confound_settings$exclude_run, length(exclude_run))
lg$error("Modify the expression so that it returns a single TRUE/FALSE.")
lg$error("Defaulting to exclusion of run.")
exclude_run <- TRUE
}
}
# write the exclusion basis to the database
exclude_data <- all_confounds[, intersect(gpa$confound_settings$run_exclusion_columns, names(all_confounds)), drop = FALSE]
insert_df_sqlite(gpa,
id = id, session = session, run_number = run_number, data = exclude_data,
table = "l1_exclusion_data", immediate=TRUE
)
}
# add run exclusion column to data.frame
run_df$exclude_run <- exclude_run
# check for missing confound columns
if (!all(gpa$confound_settings$l1_confound_regressors %in% names(all_confounds))) {
lg$warn("Missing confound columns for subject: %s, session: %s", run_df$id[1L], run_df$session[1L])
lg$warn("Column: %s", setdiff(gpa$confound_settings$l1_confound_regressors, names(all_confounds)))
}
# trim l1 confounds to only those requested
all_confounds <- all_confounds[, intersect(gpa$confound_settings$l1_confound_regressors, names(all_confounds))]
num_cols <- sapply(all_confounds, class) %in% c("integer", "numeric")
if (any(!num_cols)) {
lg$warn("Confound file contains non-numeric columns. Not sure what will happen!")
lg$warn("Column: %s", names(all_confounds)[!num_cols])
}
# demean confound regressors, if requested (usually a good idea)
if (isTRUE(demean)) {
lg$debug("Demeaning columns of l1 confounds matrix: %s", expected_l1_confound_file)
all_confounds[, num_cols] <- as.data.frame(apply(all_confounds[, num_cols], 2, function(x) {
x - mean(x, na.rm = TRUE)
}))
}
# incorporate spike regressors if requested (not used in conventional AROMA)
spikes <- compute_spike_regressors(motion_df, gpa$confound_settings$spike_volumes, lg = lg)
if (!is.null(spikes)) all_confounds <- cbind(all_confounds, spikes)
lg$debug("Writing l1 confounds to file: %s", expected_l1_confound_file)
write.table(all_confounds, file = expected_l1_confound_file, row.names = FALSE, col.names = FALSE)
run_df$l1_confound_file <- expected_l1_confound_file
# cache final confounds to db
insert_df_sqlite(gpa,
id = id, session = session, run_number = run_number,
data = all_confounds, table = "l1_confounds", immediate=TRUE
)
# cache run_df calculations to db
insert_df_sqlite(gpa,
id = id, session = session, run_number = run_number,
data = run_df[, calculation_columns, drop = FALSE], table = "l1_run_calculations", immediate = TRUE
)
if (isTRUE(return_run_df)) {
return(run_df)
} else {
return(list(
l1_confound_file = expected_l1_confound_file,
l1_confounds_df = all_confounds,
exclude_run = exclude_run, exclude_data = exclude_data, truncation_data = truncation_data
))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.