fmri.pipeline: Analysis of fMRI Data on HPC clusters

Documented in fsl_l1_model

#' This function specifies a first-level GLM model in FSL based on a build_design_matrix
#'   bdm object.
#'
#' @param d_obj a single-subject design matrix object generated by build_design_matrix
#' @param gpa a gpa (glm_pipeline_arguments) object generated by setup_glm_pipeline
#' @param model_name a string indicating the model name within \code{gpa} to setup. If
#'   you wish to setup multiple models, this is handled upstream in setup_l1_models.R
#' @param run_nifti an optional character vector of NIfTI filenames used in l1 analysis.
#'   Prefer to pass these via the build_design_matrix object in $run_nifti
#'
#' @importFrom checkmate assert_class assert_string assert_character assert_file_exists
#' @importFrom lgr get_logger
#' @importFrom parallel clusterApply stopCluster makeForkCluster
#' @author Michael Hallquist
#' @export
#'
fsl_l1_model <- function(
  id=NULL, session=NULL, l1_confound_files=NULL, d_obj, gpa,
  model_name=NULL, run_nifti=NULL, nvoxels=NULL, execute_feat=FALSE) {

  checkmate::assert_scalar(id, null.ok = FALSE)
  checkmate::assert_scalar(session, null.ok = FALSE)
  checkmate::assert_character(l1_confound_files, null.ok = TRUE)
  checkmate::assert_class(d_obj, "bdm")
  checkmate::assert_class(gpa, "glm_pipeline_arguments")
  checkmate::assert_string(model_name) # single string
  if (is.null(nvoxels)) { nvoxels <- rep(5e4, length(run_nifti)) } #arbitrarily use 50k voxels in fsf

  lg <- lgr::get_logger("glm_pipeline/l1_setup")
  lg$set_threshold(gpa$lgr_threshold)

  if (!is.null(d_obj$run_nifti)) {
    lg$debug("Using internal NIfTI files (run_nifti) within d_obj for Feat level 1 setup")
    run_nifti <- d_obj$run_nifti
  }

  if (!is.null(l1_confound_files)) {
    stopifnot(length(l1_confound_files) == length(run_nifti))
  } else {
    l1_confound_files <- NA_character_ # to make data.frame() call work for empty input
  }

  stopifnot(length(run_nifti) == length(d_obj$run_volumes)) #need these to align
  checkmate::assert_character(run_nifti, null.ok=FALSE)
  checkmate::assert_file_exists(run_nifti) #all exist

  stopifnot(model_name %in% names(gpa$l1_models$models))

  fsf_template <- readLines(system.file("feat_lvl1_nparam_template.fsf", package = "fmri.pipeline"))

  fsl_l1_output_dir <- get_output_directory(
    id = id, session = session, l1_model = model_name,
    gpa = gpa, glm_software = "fsl", what = "l1"
  )

  l1_contrasts <- gpa$l1_models$models[[model_name]]$contrasts #contrast matrix for this model
  regressor_names <- gpa$l1_models$models[[model_name]]$regressors #names of regressors in design matrix for this model

  # TODO: this is not implemented and may not be necessary
  if (dir.exists(fsl_l1_output_dir) &&
    file.exists(file.path(fsl_l1_output_dir, "feat_l1_inputs.rds")) &&
    isFALSE(gpa$glm_settings$fsl$force_l1_creation)) {
    lg$info("%s exists. Skipping l1 fsf setup in fsl_l1_model().", fsl_l1_output_dir)
    subj_l1_spec <- readRDS(file.path(fsl_l1_output_dir, "feat_l1_inputs.rds"))
  }

  lg$info("Create l1 fsl_l1_output_dir: %s", fsl_l1_output_dir)
  dir.create(fsl_l1_output_dir, showWarnings=FALSE) #one directory up from a given run

  feat_l1_df <- data.frame(
    id = id, session = session, run_number = d_obj$runs_to_output, run_volumes = d_obj$run_volumes,
    run_nifti = run_nifti, l1_model = model_name, l1_confound_file = l1_confound_files, to_run = as.logical(NA)
  )
  feat_info <- list() #to hold status of feat runs
  all_l1_feat_fsfs <- c()

  #FSL computes first-level models on individual runs
  for (rr in seq_along(run_nifti)) {
    lg$info("Processing FSL L1 model setup for NIfTI: %s", run_nifti[rr])
    this_template <- fsf_template # start with default copy of template for this run

    l1_feat_fsf <- file.path(fsl_l1_output_dir, paste0("FEAT_LVL1_run", feat_l1_df$run_number[rr], ".fsf"))
    l1_feat_dir <- file.path(fsl_l1_output_dir, paste0("FEAT_LVL1_run", feat_l1_df$run_number[rr]))
    feat_info[[rr]] <- get_feat_status(feat_dir = l1_feat_dir, feat_fsf = l1_feat_fsf, lg = lg)

    # skip re-creation of FSF and do not run below unless force_l1_creation==TRUE
    if (file.exists(l1_feat_fsf) && isFALSE(gpa$glm_settings$fsl$force_l1_creation)) {
      lg$info("Skipping existing feat fsf file: %s", l1_feat_fsf)
      next
    }

    # handle inclusion of confound regressors
    # turn off confounds if l1_confound_regressors is NULL, l1_confound_files[rr] is NA or does not exist
    disable_confounds <- TRUE # default to TRUE
    if (!is.null(gpa$confound_settings$l1_confound_regressors)) {
      # TODO: consider treating confound regressors as a feature of a given l1 model, rather than enforcing across all models
      l1_confound_file <- l1_confound_files[rr]
      if (!file.exists(l1_confound_file)) {
        lg$warn(
          "Cannot find l1_confound_file for id: %d, session: %d, run_number: %d, file: %s",
          id, session, feat_l1_df$run_number[rr], l1_confound_file
        )
      } else {
        this_template <- gsub(".CONFOUNDS.", l1_confound_file, this_template, fixed = TRUE)
        disable_confounds <- FALSE
      }
    }

    # turn off confounds, if needed (missing l1 confounds file, or no confounds requested)
    if (isTRUE(disable_confounds)) { # disable confounds
      this_template <- gsub("set fmri(confoundevs) 1", "set fmri(confoundevs) 0", this_template, fixed = TRUE) # disable
      l1 <- grep("# Confound EVs text file for analysis 1", this_template, fixed = TRUE)
      l2 <- grep("set confoundev_files(1) \".CONFOUNDS.\"", this_template, fixed = TRUE)
      this_template <- this_template[-1 * c(l1, l2)] # drop confound files lines
    }

    # search and replace within fsf file for appropriate sections
    # .OUTPUTDIR. is the feat output location
    # .NVOL. is the number of volumes in the run
    # .FUNCTIONAL. is the fmri data to process (sans extension)
    # .CONFOUNDS. is the confounds file for GLM
    # .TR. is the sequence TR in seconds

    this_template <- gsub(".OUTPUTDIR.", l1_feat_dir, this_template, fixed = TRUE)
    this_template <- gsub(".NVOL.", d_obj$run_volumes[rr], this_template, fixed=TRUE)
    this_template <- gsub(".FUNCTIONAL.", gsub(".nii(.gz)*$", "", run_nifti[rr]), this_template, fixed=TRUE)
    this_template <- gsub(".TR.", d_obj$tr, this_template, fixed=TRUE)
    this_template <- gsub(".NVOXELS.", nvoxels[rr], this_template, fixed=TRUE)

    # handle custom L1 FSF syntax
    this_template <- add_custom_feat_syntax(this_template, gpa$additional$feat_l1_args, lg)

    if (isTRUE(gpa$use_preconvolve)) {
      lg$info("Using preconvolved regressors in Feat level 1 analysis")
      # generate ev syntax

      #add common ingredients for preconvolved regressors
      regressors <- lapply(regressor_names, function(rname) {
        t_file <- tryCatch(
          d_obj$timing_files$convolved[glue("run_number{feat_l1_df$run_number[rr]}"), rname],
          error = function(e) {
            lg$error(glue("Could not find timing file for run: {feat_l1_df$run_number[rr]}, regressor: {rname} in {fsl_l1_output_dir}"))
            return(NULL)
          }
        )

        if (!checkmate::test_file_exists(t_file)) {
          msg <- glue("Cannot find timing file: {t_file}")
          lg$error(msg)
          stop(msg)
        }

        list(
          name = rname, waveform = "custom_1", convolution = "none", tempfilt = 1,
          timing_file = t_file
        )
      })

      lg$debug("fsl_generate_fsf_lvl1_ev_syntax")
      ev_syn <- fsl_generate_fsf_lvl1_ev_syntax(regressors)

      #creation of l1 contrast matrices, including the diagonal contrasts, now abstracted to finalize_pipeline_configuration.R
      #thus, l1_contrasts is already a contrast matrix ready to be passed to the fsl_generate_fsf_contrast_syntax function
      lg$debug("fsl_generate_fsf_contrast_syntax")
      cmat_syn <- fsl_generate_fsf_contrast_syntax(l1_contrasts)

      #combine all syntax
      this_template <- c(this_template, ev_syn, cmat_syn)
    } else {
      #TODO -- add back in standard 3-column FSL timing
      lg$error("No support for FSL-internal convolved yet")
      stop("cannot use FSL internal timing files right this moment...")
    }

    lg$info("Writing l1 fsf file: %s", l1_feat_fsf)
    cat(this_template, file=l1_feat_fsf, sep="\n")

    all_l1_feat_fsfs <- c(all_l1_feat_fsfs, l1_feat_fsf)
  }

  # combined feat lookups with other columns
  feat_info_df <- data.table::rbindlist(feat_info)
  stopifnot(nrow(feat_info_df) == nrow(feat_l1_df))
  feat_l1_df <- dplyr::bind_cols(feat_l1_df, feat_info_df)
  feat_l1_df$to_run <- !feat_l1_df$feat_complete

  # If execute_feat is TRUE, execute feat on each fsf files at this stage,
  # using an 8-node socket cluster (since we have 8 runs).
  # If execute_feat is FALSE, just create the fsf files but don't execute the analysis
  if (isTRUE(execute_feat) && length(all_l1_feat_fsfs) > 0L) {
    nnodes <- min(length(all_l1_feat_fsfs), parallel::detectCores())
    lg$info("Starting fork cluster with %d workers", nnodes)
    cl_fork <- parallel::makeForkCluster(nnodes=nnodes)
    runfeat <- function(fsf) {
      runname <- basename(fsf)
      run_fsl_command(paste("feat", fsf), stdout=file.path(dirname(fsf), paste0("feat_stdout_", runname)),
        stderr=file.path(dirname(fsf), paste0("feat_stderr_", runname)))
    }

    lg$info("Executing all subject feat files with clusterApply")
    parallel::clusterApply(cl_fork, all_l1_feat_fsfs, runfeat)
    parallel::stopCluster(cl_fork)
  }

  return(feat_l1_df)

}