R/generate_spm_mat.R

Defines functions generate_spm_mat

Documented in generate_spm_mat

#' Uses an object from build_design_matrix to generate a corresponding SPM GLM design matrix.
#'
#' @param bdm The object (of class 'bdm') generated by build_design_matrix. In particular, the function
#'            uses the \code{$design} and \code{$run_niftis} elements to extract relevant regressors and
#'            information about the NIfTI files to analyze.
#' @param ts_files A character vector of nuisance text files -- one per run -- to be added as covariates in
#'            SPM's multi_reg field.
#' @param output_dir The path where SPM scripts and outputs should go for this design matrix. The path can be
#'            relative or absolute, but will be converted to absolute internally using normalizePath().
#' @param hpf The high-pass filter cutoff (in seconds) used to remove low frequencies prior to analysis.
#'            Default: 100
#' @param hrf_derivs Whether to include derivatives of the regressors in the model to capture HRF variation.
#'            Options are: "none", "time" (temporal derivative), "time_dispersion" (temporal and dispersion derivatives).
#'            Default: "none"
#' @param nifti_tmpdir Where to place uncompressed NIfTIs for analysis (SPM doesn't handle .nii.gz)
#' @param cleanup_tmp Whether to remove uncompressed .nii files after completing this function
#' @param condition_contrasts Whether to setup contrasts for each condition. In multi-run data, these contrasts will
#'            represent the condition average across all runs (e.g., [ .5, .5 ] for 2 runs). Default: TRUE
#' @param unit_contrasts Whether to estimate a unit-height contrast for every regressor in the design.
#'            This essentially includes a diagonal matrix in the contrast specification. Default: FALSE
#' @param include_block_contrasts Whether to estimate contrasts for the run-specific intercepts in the design.
#'            Note that this qualifies \code{condition_contrasts} and \code{unit_contrasts}, but does nothing on its own.
#' @param effects_of_interest_F Whether to include a single F test contrast with all regressors of interest. Useful
#'            in adjusting VOIs for nuisance regressors. Default: TRUE
#' @param spm_execute_setup Whether to run the GLM setup script in MATLAB, creating SPM.mat. Default: FALSE
#' @param spm_execute_glm Whether to run the GLM after creating SPM.mat. This could take a while! Default: FALSE
#' @param spm_execute_contrasts Whether to compute contrasts after GLM is complete. Depends on \code{spm_execute_glm}. Default: FALSE
#' @param concatenate_runs Whether to convert multi-run data into a single concatenated session. This will execute spm_fmri_concatenate
#'            after the GLM setup is complete. This script computes run-specific whitening and high-pass filters while keeping
#'            a single concatenated time series. This is useful as a preamble to DCM, which often uses concatenated time series. Default: FALSE
#' @param generate_qsub Whether to create a qsub PBS script for running MATLAB scripts for design matrix creation and estimation. Default: TRUE
#' @param execute_qsub Whether to submit the PBS script create by \code{generate_qsub} to the cluster using the qsub command. Default: FALSE
#' @param spm_path The path to an spm12 directory. This will be included in MATLAB scripts to ensure that spm is found.
#'
#' @importFrom R.matlab readMat
#'
#' @details
#'
#' This function is intended to setup a first-level (subject) fMRI GLM analysis in SPM12. It accepts an object from build_design_matrix
#' and uses the contents of this object to setup timing and HRF settings (including parametric modulation) for all regressors.
#'
#' At this point, the function is not very flexible in terms of allowing custom tweaks to each field. It is mostly intended to setup
#' the necessary SPM structures to support DCM, which depends on an SPM.mat file. I discovered the spm12r package recently,
#' which has a lot of flexibility for setting up a first-level model. In the future, we may wish to adapt the code below to use
#' this instead. \url{https://cran.rstudio.com/web/packages/spm12r/vignettes/fmri_task_processing.html}
#'
#' @return A list containing the MATLAB syntax/scripts for GLM setup, execution, and contrast estimation
#' @author Michael Hallquist
#' 
#' @export
generate_spm_mat <- function(bdm, ts_files=NULL, output_dir="spm_out",
                             hpf=100, hrf_derivs="none", nifti_tmpdir=tempdir(),
                             cleanup_tmp=FALSE, condition_contrasts=TRUE,
                             unit_contrasts=TRUE, effects_of_interest_F=TRUE,
                             spm_execute_setup=FALSE, spm_execute_glm=FALSE,
                             spm_execute_contrasts=FALSE, concatenate_runs=FALSE, generate_qsub=TRUE,
                             execute_qsub=FALSE, spm_path="/gpfs/group/mnh5174/default/lab_resources/spm12") {

  #for concatenate runs, need to setup a single SPM mat (one session), run GLM setup, then call spm_fmri_concatenate('SPM.mat', nscans)
  #where the latter is a vector run volumes ($run_volumes in BDM)
  
  spm_syntax <- list()

  if (is.null(spm_path)) { message("No spm_path provided. The scripts will assume that spm is already in the MATLAB path.") }
  if (!dir.exists(spm_path)) { stop("Provided spm_path does not exist: ", spm_path) }
  
  if (!inherits(bdm, "bdm")) { stop("generate_spm_mat requires an object of class 'bdm' from build_design_matrix.") }

  if (spm_execute_glm && !spm_execute_setup) {
    message("Because spm_execute_glm is TRUE, I will set spm_execute_setup to TRUE, too.")
    spm_execute_setup <- TRUE #running the GLM depends on first setting up the SPM.mat object
  }

  if (spm_execute_contrasts && !spm_execute_setup) {
    message("Because spm_execute_contrasts is TRUE, I will set spm_execute_setup to TRUE, too.")
    spm_execute_setup <- TRUE #running the contrasts depends on first setting up the SPM.mat object
  }
    
  if (spm_execute_contrasts && !spm_execute_glm) {
    message("Because spm_execute_contrasts is TRUE, I will set spm_execute_glm to TRUE, too.")
    spm_execute_glm <- TRUE #running the contrasts depends on first estimating the model
  }

  if (execute_qsub && !generate_qsub) {
    message("Because execute_qsub is TRUE, I will set generate_qsub to TRUE, too.")
    generate_qsub <- TRUE
  }
  
  #extract key ingredients from bdm object  
  if (concatenate_runs) {
    design <- bdm$design_concat #already has events concatenated into one run
  } else {
    design <- bdm$design
  }
  
  run_niftis <- bdm$run_niftis
  tr <- bdm$tr
  
  if (inherits(design, "list")) {
    #in case of 1-run input, convert 1D list to an array with 1 row and nregressors columns
    design <- array(design, dim=c(1, length(design)), dimnames=list(c("run1"), names(design)))
  }

  if (is.null(tr)) { stop("You must specify a TR in seconds") }
  if (!concatenate_runs && length(run_niftis) != nrow(design)) { stop("Length of run_niftis argument is not equal to rows in design") }
  
  stopifnot(all(file.exists(run_niftis)))

  if (!is.null(ts_files)) {
    stopifnot(all(file.exists(ts_files)))
    stopifnot(length(ts_files) == length(run_niftis))
  }

  if (!dir.exists(output_dir)) { dir.create(output_dir) }
  output_dir <- normalizePath(output_dir) #convert to absolute path to avoid any ambiguity
  
  nruns <- dim(design)[1]
  nregressors <- dim(design)[2]
  
  #NB. RNifti::niftiHeader is far faster than readNIfTI with read_data=FALSE
  run_headers <- lapply(run_niftis, function(x) { RNifti::niftiHeader(x) })
  run_lengths <- sapply(run_headers, function(x) { x$dim[5] }) #first element is just 3 versus 4 dimensions

  #spm only works with .nii files, not .nii.gz
  gzipped <- grepl(".nii.gz$", run_niftis)
  dir.create(nifti_tmpdir, showWarnings=FALSE)
  run_niftis[gzipped] <- sapply(run_niftis[gzipped], function(x) {
    tmpout <- tempfile(fileext=".nii", tmpdir=nifti_tmpdir)
    system(paste0("gunzip -c ", x, " > ", tmpout))
    return(tmpout)
  })

  spm_preamble <- c(
    ifelse(is.null(spm_path), "", paste0("addpath('", spm_path, "');")),
    "spm('defaults', 'fmri');",
    "spm_jobman('initcfg');",
    ""    
  )

  baseobj <- paste0("matlabbatch{1}.spm.stats.fmri_spec")

  if (hrf_derivs == "none") {
    hrf_string <- paste0(baseobj, ".bases.hrf = struct('derivs', [0 0]);")
  } else if (hrf_derivs == "time") {
    hrf_string <- paste0(baseobj, ".bases.hrf = struct('derivs', [1 0]);")
  } else if (hrf_derivs == "time_dispersion") {
    hrf_string <- paste0(baseobj, ".bases.hrf = struct('derivs', [1 1]);")
  } else { stop("don't understand hrf_derivs argument: ", hrf_derivs) }

  m_string <- c(
    paste0("matlabbatch = []; %initialize empty structure"),
    paste0(baseobj, ".dir = {'", output_dir, "'};"),
    "",
    "% timing",
    paste0(baseobj, ".timing.units = 'secs';"),
    paste0(baseobj, ".timing.RT = ", tr, ";"),
    paste0(baseobj, ".timing.fmri_t = 20; % microtime resolution"),
    paste0(baseobj, ".timing.fmri_t0 = 1; % alignment within TR"),
    #"% factorial",
    paste0(baseobj, ".fact = struct('name', {}, 'levels', {});"),
    "% basis functions",
    hrf_string,
    "% volterra",
    paste0(baseobj, ".volt = 1;"),
    "% global",
    paste0(baseobj, ".global = 'None';"),
    "% mthresh",
    paste0(baseobj, ".mthresh = 0.8000;"),
    "% mask",
    paste0(baseobj, ".mask = {''};"),
    "% cvi",
    paste0(baseobj, ".cvi = 'AR(1)';")
  )

  for (rr in 1:nruns) {
    if (concatenate_runs) {
      run_vol_concat <- cumsum(run_lengths)
      stopifnot(nruns==1) #should be using a concat design that sees only one run
      scans_string <- c()
      for (nn in 1:length(run_niftis)) {
        scans_string <- c(
          scans_string,
          paste0("% Run ", nn, " scans"),
          paste0("Nt = ", run_lengths[nn], ";"),
          paste0("Nt_start = ", ifelse(nn > 1, run_vol_concat[nn-1], 0), ";"),
          "for i = 1:Nt",
          paste0("  ", baseobj, ".sess(", rr, ").scans{i+Nt_start,1} = [ '", run_niftis[nn], "' ',' num2str(i) ];"),
          "end",
          ""
        )
      }
    } else {
      scans_string <- c(
        paste0("Nt = ", run_lengths[rr], ";"),
        "for i = 1:Nt",
        paste0("  ", baseobj, ".sess(", rr, ").scans{i,1} = [ '", run_niftis[rr], "' ',' num2str(i) ];"),
        "end"
      )
    }

    m_string <- c(m_string,
      paste0(baseobj, ".sess(", rr, ").scans = {};"),
      scans_string,
      paste0(baseobj, ".sess(", rr, ").multi = {''};"),
      paste0(baseobj, ".sess(", rr, ").regress = struct('name', {}, 'val', {});"),
      paste0(baseobj, ".sess(", rr, ").hpf = ", hpf, ";")
    )

    #add additional time series for this run
    if (!is.null(ts_files)) {
      m_string <- c(m_string, paste0(baseobj, ".sess(", rr, ").multi_reg = { '", ts_files[rr], "' };") ) } else {
        m_string <- c(m_string, paste0(baseobj, ".sess(", rr, ").multi_reg = { '' };"))
      }

    #collect regressors that are aligned to the same event
    rmat <- design[rr,]

    event_alignment <- sapply(rmat, function(x) { attr(x, "event") })
    uevents <- unique(event_alignment)

    ulist <- list()

    for (u in uevents) {
      ureg <- rmat[which(event_alignment == u)]

      parametric <- sapply(ureg, function(x) { ifelse(all(abs(x[,"value"]) - 1.0 < 1e-5), FALSE, TRUE) })
      uvalues <- sapply(ureg, function(r) { r[,"value"] })
      which_1 <- apply(uvalues, 2, function(x) { all(abs(x - 1.0) < 1e-3, na.rm=TRUE) })
      which_na <- apply(uvalues, 1, function(r) { any(is.na(r)) }) # missing values for any event (row)?

      #SPM does not like NAs or NaNs in parametric modulators.
      #It always generates both the event-ness regressor (based on the onsets vector) and the parametric modulator (modulated by pmod)
      #Therefore, we need to handle NAs in the modulators. If I were using SPM for a GLM, I might just handle the convolution myself and hand off a convolved signal.
      #But because the immediate goal here is to have a design matrix for DCM, I will remove the NA trials first -- for both event-ness and modulator --
      #And put a separate 'eventness' regressor for trials when an event occurred, but there is no modulation value
      
      if (length(ureg) > 1L) {
        #determine eventness and parametric modulator(s)
        utimes <- sapply(ureg, function(r) { r[,"onset"] })
        stopifnot(all(na.omit(apply(utimes, 1, function(x) { sd(x) }) < 1e-3))) #make sure that all times are the same (within rounding error)

        if (sum(which_1) > 1L) {
          stop("More than one unit-height regressor for event: ", u)
        } else if (sum(which_1) == 0L) {
          stop("No unit-height regressor for event: ", u)
        }

        ulist[[u]][["event"]] <- ureg[[ names(ureg)[which_1] ]][!which_na,,drop=FALSE]
        ulist[[u]][ names(ureg)[!which_1] ] <- lapply(ureg[ names(ureg)[!which_1] ], function(reg) { reg[!which_na,,drop=FALSE] })
      } else {
        ulist[[u]][["event"]] <- ureg[[1]][!which_na,,drop=FALSE] #just copy this across -- only one regressor on this alignment
        if (parametric[1]) { warning("Only one regressor with alignment: ", u, ", but it appears to be parametric. SPM will add the eventness regressor, too.") }
      }

      #add eventness regressor for only missing trials, if needed
      if (any(which_na)) {
        ulist[[paste0(u, "_nopmod")]][["event"]] <- ureg[[ names(ureg)[which_1] ]][which_na,,drop=FALSE]
      }
      
    }

    for (cc in 1:length(ulist)) {
      #event-only lists have length 1
      if (length(ulist[[cc]]) > 1L) {
        pmod_string <- c()
        for (pm in 2:length(ulist[[cc]])) {
          pmod_string <- c(pmod_string, paste0(baseobj, ".sess(", rr, ").cond(", cc, ").pmod(", pm-1, ") = struct('name', {'", names(ulist[[cc]])[pm], "-pmod'}, 'param', { [ ",
            paste(ulist[[cc]][[pm]][,"value"], collapse=", "), " ] }, 'poly', { 1 });"))
        }
      } else {
        pmod_string <- paste0(baseobj, ".sess(", rr, ").cond(", cc, ").pmod = struct('name', {}, 'param', {}, 'poly', {});")
      }
      
      m_string <- c(m_string,
        paste0(baseobj, ".sess(", rr, ").cond(", cc, ").name = '", names(ulist)[cc], "';"),
        paste0(baseobj, ".sess(", rr, ").cond(", cc, ").onset = [ ", paste(ulist[[cc]][["event"]][,"onset"], collapse=", "), " ];"),
        paste0(baseobj, ".sess(", rr, ").cond(", cc, ").duration = [ ", paste(ulist[[cc]][["event"]][,"duration"], collapse=", "), " ];"),
        paste0(baseobj, ".sess(", rr, ").cond(", cc, ").tmod = 0;"),
        pmod_string,
        paste0(baseobj, ".sess(", rr, ").cond(", cc, ").orth = 1;"),
        "",
        ""
      )

    }
  }

  #Generate syntax for executing SPM GLM setup
  exec_string <- c(
    spm_preamble,
    ifelse (file.exists(file.path(output_dir, "SPM.mat")), paste0("delete('", file.path(output_dir, "SPM.mat"), "');"), ""), #remove SPM.mat if it exists
    paste0("run('", file.path(output_dir, "glm_design_batch.m"), "');"), #source the settings for this GLM
    "% RUN DESIGN MATRIX JOB",
    paste0("spm_jobman('run',matlabbatch);")
  )

  if (concatenate_runs) {
    exec_string <- c(exec_string, paste0("spm_fmri_concatenate( [ '", output_dir, "' filesep 'SPM.mat'], [ ", paste(bdm$run_volumes, collapse=", "), " ]);"))
  }
  
  #write GLM setup to file
  cat(m_string, file=file.path(output_dir, "glm_design_batch.m"), sep="\n")
  cat(exec_string, file=file.path(output_dir, "setup_glm_design.m"), sep="\n")
  spm_syntax[["glm_design"]] <- m_string
  spm_syntax[["exec_glm_setup"]] <- exec_string

  if (spm_execute_setup) {
    system(paste0("module load matlab/R2017b; matlab -nodisplay -nosplash -r \"run('", file.path(output_dir, "setup_glm_design.m"), "');exit;\""))
  } else {
    message("To run GLM setup script, execute this command: ", paste0("module load matlab/R2017b; matlab -nodisplay -nosplash -r \"run('", file.path(output_dir, "setup_glm_design.m"), "');exit;\""))
  }

  #SPM model estimation setup
  baseobj <- paste0("matlabbatch{1}.spm.stats.fmri_est")
  m_string <- c(
    spm_preamble,
    paste0("matlabbatch = []; %initialize empty structure"),
    "% ESTIMATE MODEL",
    "",
    paste0(baseobj, ".spmmat = { [ '", output_dir, "' filesep 'SPM.mat']};"),
    "% write_residuals",
    paste0(baseobj, ".write_residuals = 0;"),
    "% method",
    paste0(baseobj, ".method.Classical = 1;"),
    "% RUN MODEL ESTIMATION JOB",
    "spm_jobman('run',matlabbatch);"
  )

  #write estimation to file
  cat(m_string, file=file.path(output_dir, "run_glm.m"), sep="\n")
  spm_syntax[["run_glm"]] <- m_string  

  if (spm_execute_glm) {
    system(paste0("module load matlab/R2017b; matlab -nodisplay -nosplash -r \"run('", file.path(output_dir, "run_glm.m"), "');exit;\""))
  } else {
    message("To estimate GLM after design matrix setup, execute this command: ", paste0("module load matlab/R2017b; matlab -nodisplay -nosplash -r \"run('", file.path(output_dir, "run_glm.m"), "');exit;\""))
  }

  spm_contrast_cmds <- generate_spm_contrasts(output_dir, condition_contrasts, unit_contrasts,
    effects_of_interest_F, spm_path, execute=spm_execute_contrasts)
  
#  if (spm_execute_contrasts) {
#    system(paste0("module load matlab/R2017b; matlab -nodisplay -nosplash -r \"run('", file.path(output_dir, "estimate_glm_contrasts.m"), "');exit;\""))
#  } else {
#    message("To estimate contrasts after design matrix setup, execute this command: ", paste0("module load matlab/R2017b; matlab -nodisplay -nosplash -r \"run('", file.path(output_dir, "estimate_glm_contrasts.m"), "');exit;\""))
#  }     
  
  if (generate_qsub) {
    qsub_job <- c(
      "#!/usr/bin/env sh",
      "",
      paste0("#PBS -l walltime=4:00:00"),
      "#PBS -A mnh5174_a_g_hc_default",
      "#PBS -j oe",
      "#PBS -m n", #no email
      paste0("#PBS -l nodes=1:ppn=1:himem"), #just 1 core needed for SPM
      "#PBS -W group_list=mnh5174_collab",
      "#PBS -l pmem=8gb", #make sure each process has enough memory
      "",
      "export G=/gpfs/group/mnh5174/default #our group allocation",
      "module use $G/sw/modules",
      "",
      "module load matlab/R2017b",
      "module load r/3.6.0",
      paste0("cd '", output_dir, "'"),
      paste0("matlab -nodisplay -nosplash -r \"run('", file.path(output_dir, "setup_glm_design.m"), "');exit;\""),
      paste0("matlab -nodisplay -nosplash -r \"run('", file.path(output_dir, "run_glm.m"), "');exit;\""),
      spm_contrast_cmds$extract_cmd,
      spm_contrast_cmds$setup_cmd,
      paste0("matlab -nodisplay -nosplash -r \"run('", file.path(output_dir, "estimate_glm_contrasts.m"), "');exit;\""),
      ifelse(cleanup_tmp, paste0("rm -f ", paste(run_niftis[gzipped], collapse=" ")), "")
    )

    cat(qsub_job, file=file.path(output_dir, "execute_spm_qsub.bash"), sep="\n")
    spm_syntax[["qsub_job"]] <- qsub_job    
  }

  if (execute_qsub) {
    system(paste0("qsub ", file.path(output_dir, "execute_spm_qsub.bash")))
  }
  
  if (cleanup_tmp && !generate_qsub) {
    if (any(gzipped)) {
      sapply(run_niftis[gzipped], function(x) { unlink(x) })
    }
  }
  
  return(spm_syntax)
}
PennStateDEPENdLab/dependlab documentation built on April 10, 2024, 5:15 p.m.