R/fmri_utility_fx.R

Defines functions path_sanitize fmri.design cleanup_regressor visualize_design_matrix concat_design_runs fir1Bandpass detrendts runmean fmri.stimulus convolve_regressor place_dmat_on_time_grid get_collin_events add_baseline_regressors

Documented in add_baseline_regressors cleanup_regressor concat_design_runs convolve_regressor detrendts fir1Bandpass fmri.design fmri.stimulus get_collin_events path_sanitize place_dmat_on_time_grid runmean visualize_design_matrix

#' Helper function to append baseline regressors to the convolved design matrix.
#'   These are especially useful for GLMs that use concatenated data for multiple runs,
#'   where run-specific intercepts and drift terms are needed.
#'
#' @param dmat_convolved The design matrix containing convolved regressors for all events,
#'   created by \code{build_design_matrix}
#' @param baseline_coef_order The polynomial order of baseline regressors to be added
#' @param baseline_parameterization The parameterization of baseline regressors.
#'   Default is "Legendre". Alternative is "orthogonal.polynomials".
#'
#' @return A modified version of \code{dmat_convolved} that contains the baseline regressors
#'   requested by the user.
#' @importFrom orthopolynom legendre.polynomials
#' @keywords internal
add_baseline_regressors <- function(dmat_convolved, baseline_coef_order = -1L, baseline_parameterization = "Legendre") {

  # return unchanged copy if no baseline regressors requested
  if (baseline_coef_order < 0) {
    return(dmat_convolved)
  }

  # add baseline terms to convolved design matrices
  dmat_convolved <- lapply(dmat_convolved, function(r) {
    n <- names(r)
    if (baseline_parameterization == "Legendre") {
      # consistent with AFNI approach, use Legendre polynomials, which are centered at zero to allow for appropriate baseline
      unnormalized_p_list <- orthopolynom::legendre.polynomials(baseline_coef_order, normalized = FALSE)
      # Evaluate polynomial between -1 and 1, which is where its desirable centered behavior exists. Although the functions are
      #   centered at 0, evaluating them on a given grid may lead to slight deviations from mean=0. Thus, center perfectly.
      baseline <- orthopolynom::polynomial.values(polynomials = unnormalized_p_list, x = seq(-1, 1, length.out = nrow(r)))
      baseline[2:length(baseline)] <- lapply(baseline[2:length(baseline)], function(v) {
        v - mean(v)
      }) # don't center constant!

      names(baseline) <- paste0("base", 0:baseline_coef_order)
      d <- cbind(r, baseline)
    } else if (baseline_parameterization == "orthogonal_polynomials") {
      # Compute polynomials that are orthogonal to design effects
      # This may be somewhat dubious. For example, a linear trend in a task regressor
      #   would be preserved because it is given preference over baseline
      d <- data.frame(fmri::fmri.design(r, order = baseline_coef_order))
      names(d) <- c(n, paste0("base", 0:baseline_coef_order))
    } else {
      stop("Don't know how to handle baseline_parameterization:", baseline_parameterization)
    }

    return(d)
  })

  return(dmat_convolved)
}

#' Small helper function to calculate collinearity diagnostics on the events in the design
#'   prior to convolution. This can help to diagnose problems with parametric modulators that
#'   correlate excessively, for example.
#'
#' @param dmat A design matrix list generated by \code{build_design_matrix} that contains
#'   a set of regressors for each run, where run is indicated by the rows of \code{dmat}
#'
#' @keywords internal
get_collin_events <- function(dmat) {
  # compute collinearity diagnostics on the unconvolved signals
  collin_diag <- apply(dmat, 1, function(run) {
    # Custom regressors are not guaranteed to have ntrials as the dimension. 
    # Remove them from raw diagnostics since they're not on the same trial grid.

    #rlengths <- sapply(run, length)
    custom_reg <- grep("^custom_", names(run))
    if (length(custom_reg) > 0L) { run <- run[-1*custom_reg]}

    #check correlations among regressors for trial-wise estimates

    #get the union of all trial numbers across regressors
    utrial <- sort(unique(unlist(lapply(run, function(regressor) { regressor[, "trial"]}))))

    #initialize a trial x regressor matrix
    cmat <- matrix(NA, nrow=length(utrial), ncol=length(run), dimnames=list(trial=utrial, regressor=names(run)))

    #populate the relevant values for each regressor at the appropriate trials
    #need to convert the trial column to character so that the lookup goes to the dimnames, not numeric row positions
    for (i in seq_along(run)) {
      cmat[as.character(run[[i]][, "trial"]), names(run)[i]] <- run[[i]][, "value"]
    }

    cmat <- as.data.frame(cmat) #convert to a data.frame

    #remove any constant columns (e.g., task indicator regressor for clock) so that VIF computation is sensible
    zerosd <- sapply(cmat, sd, na.rm=TRUE)
    cmat_noconst <- cmat[, !is.na(zerosd) & zerosd != 0.0, drop=FALSE]

    if (ncol(cmat_noconst) == 0L) {
      #if only task indicator regressors are included, then they cannot
      # be collinear before convolution because there is no variability
      return(list(r=NA_real_, vif=NA_real_))
    } else {
      # drop warnings about SD of 0 since we want it to populate NAs there.
      corvals <- suppressWarnings(cor(cmat, use="pairwise.complete.obs"))
      vif_mat <- data.frame(cbind(dummy=rnorm(nrow(cmat_noconst)), cmat_noconst)) #add dummy constant for vif
      vif_form <- as.formula(paste("dummy ~ 1 +", paste(names(cmat_noconst), collapse=" + ")))

      var_infl <- tryCatch(car::vif(lm(vif_form, data = vif_mat)),
        error = function(e) {
          rep(NA, ncol(cmat_noconst))
        }
      ) # return NA if failure
      return(list(r=corvals, vif=var_infl))
    }
  })

  return(collin_diag)
}


#' Function to convert dmat (runs x regressor list) to a time-oriented representation.
#' This yields a list of runs where each element is data.frame of volumes x regressors
#'
#' @param dmat A runs x regressors 2-d list where each element is a matrix containing
#'          onset, duration, and value for a signal
#' @param convolve If \code{TRUE} (default), convolve the time-oriented signals with an HRF
#' @param run_timing A vector of cumulative start times for each run in a multi-run dataset
#' @param bdm_args A list of arguments passed to build_design_matrix, as well as a few fields added
#'          during the initial argument parsing. See build_design_matrix for details. Should contain:
#'          \itemize{
#'            \item convolve_wi_run TRUE/FALSE
#'            \item run_volumes Numeric vector of run length
#'            \item normalizations Character vector of HRF normalizations for each regressor.
#'              Options are "none", "durmax_1", or "evtmax_1".
#'            \item add_derivs A logical vector (\code{TRUE/FALSE}) of regressors whose temporal derivatives should
#'              be included. Temporal derivatives are only applied if \code{convolve} is \code{TRUE}.
#'            \item convmax_1 A logical vector (\code{TRUE/FALSE}) denoting whether to rescale max height to 1 after convolution
#'            \item high_pass The cutoff frequency (in Hz) used for high-pass filtering. If \code{NULL}, no filtering is applied.
#'            \item tr The repetition time (sometimes called TR) in seconds
#'            \item hrf_parameters The parameters for the double-gamma HRF
#'          }
#' @param lg An lgr logger object used for logging messages
#' @details Note that any volumes dropped from the beginning of each run should already be reflected in the timings
#'   of regressors in \code{dmat}. This prevents us from needing to have a drop_volumes implementation inside convolve_regressor,
#'   which is confusing anyhow. Likewise, run_timing should reflect the post-drop cumulative volumes.
#' @author Michael Hallquist
#' @keywords internal
place_dmat_on_time_grid <- function(dmat, convolve=TRUE, run_timing=NULL, bdm_args, lg = NULL) {

  if (isTRUE(bdm_args$convolve_wi_run)) {
    #create an HRF-convolved version of the list

    #i is run, j is regressor
    dmat_convolved <- lapply(1:dim(dmat)[1L], function(i) {
      run_convolve <- lapply(1:dim(dmat)[2L], function(j) {
        reg <- dmat[[i, j]] # regressor j for a given run i
        attr(reg, "reg_name") <- dimnames(dmat)[[2L]][j] # tag regressor with a name attribute so that return is named properly

        if (nrow(reg) == 0L) {
          if (isTRUE(bdm_args$keep_empty_regressors)) {
            empty <- matrix(rep(0, bdm_args$run_volumes[i]), ncol = 1)
            colnames(empty) <- attr(reg, "reg_name")
            return(empty) # keep all-zeros regressor
          } else {
            return(NULL)
          }
        } else {
          convolve_regressor(
            n_vols = bdm_args$run_volumes[i], reg = reg, tr = bdm_args$tr,
            normalization = bdm_args$normalizations[j], rm_zeros = bdm_args$rm_zeros[j],
            center_values = bdm_args$center_values, convmax_1 = bdm_args$convmax_1[j],
            demean_convolved = FALSE, high_pass = bdm_args$high_pass, convolve = convolve,
            ts_multiplier = bdm_args$ts_multiplier[[j]][[i]],
            hrf_parameters = bdm_args$hrf_parameters, lg=lg
          )
        }
      })
      
      # drop null events before combining into data.frame
      run_convolve <- run_convolve[sapply(run_convolve, function(x) !is.null(x))]
      df <- as.data.frame(run_convolve, check.names = FALSE) #pull into a data.frame with n_vols rows and nregressors cols (convolved)
      #names(df) <- dimnames(dmat)[[2L]]
      return(df)
    })
  } else {
    # Issue with convolution of each run separately is that the normalization and mean centering are applied within-run.
    # In the case of EV, for example, this will always scale the regressor in terms of relative differences in value within
    # run, but will fail to capture relative differences across run (e.g., if value tends to be higher in run 8 than run 1).

    dmat_sumruns <- lapply(1:dim(dmat)[2L], function(reg) {
      thisreg <- dmat[, reg]
      concattiming <- do.call(rbind, lapply(seq_along(thisreg), function(run) {
        timing <- thisreg[[run]]
        timing[, "onset"] <- timing[, "onset"] + ifelse(run > 1, run_timing[run-1], 0)
        timing
      }))

      #concat runs of the ts_multiplier within the current regressor
      concat_ts_multiplier <- do.call(c, bdm_args$ts_multiplier[[reg]])

      #convolve concatenated events with hrf
      all.convolve <- convolve_regressor(n_vols=sum(bdm_args$run_volumes), reg=concattiming, tr=bdm_args$tr,
                         normalization=bdm_args$normalizations[reg], rm_zeros = bdm_args$rm_zeros[reg],
                         center_values=bdm_args$center_values, convmax_1=bdm_args$convmax_1[j],
                         demean_convolved = FALSE, high_pass=bdm_args$high_pass, convolve=convolve,
                         ts_multiplier=concat_ts_multiplier,
                         hrf_parameters = bdm_args$hrf_parameters, lg=lg)

      #now, to be consistent with code below (and elsewhere), split back into runs
      splitreg <- split(all.convolve, do.call(c, sapply(seq_along(bdm_args$run_volumes), function(x) { rep(x, bdm_args$run_volumes[x]) })))

      return(splitreg)
    })

    #now have a list of length(regressors) with length(runs) elements.
    #need to reshape into a list of data.frames where each data.frame is a run with regressors
    dmat_convolved <- lapply(seq_along(dmat_sumruns[[1L]]), function(run) {
      df <- data.frame(lapply(dmat_sumruns, "[[", run))
      names(df) <- dimnames(dmat)[[2L]]
      return(df)
    })
  }

  #If we are convolving regressors, also consider whether to add temporal derivatives
  #handle the addition of temporal dervitives
  if (convolve && any(bdm_args$add_derivs)) {
    which_vars <- which(dimnames(dmat)[[2]] %in% names(bdm_args$signals)[bdm_args$add_derivs])
    message("Adding temporal derivatives for ", paste(dimnames(dmat)[[2]][which_vars], collapse=", "), ", orthogonalized against design matrix")

    dmat_convolved <- lapply(dmat_convolved, function(run) {
      X <- as.matrix(run) #need as a matrix for lm call

      X_derivatives <- do.call(cbind, lapply(which_vars, function(col) {
        dx <- c(0, diff(X[, col]))

        ##orthogonalize wrt design
        return(residuals(lm(dx ~ X)))
      }))

      colnames(X_derivatives) <- paste0("d_", colnames(X)[which_vars])

      cbind(run, X_derivatives) #return design matrix with derivatives added
    })
  }

  return(dmat_convolved)
}


#' This function convolves a regressor with a normalized HRF whose peak amplitude is 1.0
#'
#' It extends \code{fmri.stimulus} by allowing for two normalization approaches (building on AFNI dmUBLOCK):
#'   1) "evtmax_1": pre-convolution HRF max=1.0 normalization of each stimulus regardless of duration: identical to dmUBLOCK(1)
#'   2) "durmax_1": pre-convolution HRF max=1.0 normalization for long events (15+ sec) -- height of HRF is modulated by duration
#'        of event: identical to dmUBLOCK(0)
#'
#' @param n_vols The number of volumes (scans) to be output in the convolved regressor
#' @param reg A matrix containing the trial, onset, duration, and value for each event
#' @param tr The repetition time in seconds
#' @param normalization The HRF normalization method used: "none", "durmax_1", or "evtmax_1"
#' @param rm_zeros Whether to remove zeros from events vector prior to convolution. Generally a good idea since we 
#'    typically center values prior to convolution, and retaining zeros will lead them to be non-zero after mean centering.
#' @param center_values Whether to demean values vector before convolution. Default \code{TRUE}.
#' @param convmax_1 Whether to rescale the convolved regressor to a maximum height of 1.
#' @param demean_convolved Whether to demean the regressor after convolution (default: \code{TRUE})
#' @param high_pass The cutoff frequency (in Hz) used for high-pass filtering. If \code{NULL}, no filtering is applied.
#' @param convolve If \code{TRUE}, the regressor is convolved with the HRF. If \code{FALSE},
#'          the regressor values are simply aligned onto the time grid without convolution
#'          based on the corresponding onsets, durations, and values.
#' @param ts_multiplier A vector that is n_vols in length that will be multiplied against the stimulus vector before convolution.
#' @param hrf_parameters. A named vector of parameters passed to \code{fmri.stimulus} that control the shape of the double gamma HRF.
#'          Default: \code{c(a1 = 6, a2 = 12, b1 = 0.9, b2 = 0.9, cc = 0.35)}.
#' @param lg An lgr object used for logging messages
#'
#' @author Michael Hallquist
#' @keywords internal
convolve_regressor <- function(n_vols, reg, tr=1.0, normalization="none", rm_zeros=TRUE,
                                   center_values=TRUE, convmax_1=FALSE, demean_convolved=FALSE,
                                   high_pass=NULL, convolve=TRUE, ts_multiplier=NULL,
                                   hrf_parameters=c(a1 = 6, a2 = 12, b1 = 0.9, b2 = 0.9, cc = 0.35), lg=NULL) {

  if (is.null(lg)) {
    lg <- lgr::get_logger()
  }

  #reg should be a matrix containing, minimally: trial, onset, duration, value
  stopifnot(is.matrix(reg))

  if (is.null(tr) || !is.numeric(tr)) {
    msg <- glue("tr must be a number (in seconds), but was: %s", tr)
    lg$error(msg)
    stop(msg)
  }

  # check for the possibility that the onset of an event falls after the number of good volumes in the run
  # if so, this should be omitted from the convolution altogether
  which_high <- (reg[, "onset"] / tr) >= n_vols

  if (any(which_high, na.rm=TRUE)) {
    if (isTRUE(convolve)) {
      lg$warn("At least one event onset falls on or after last volume of run. Omitting this from model.")
      lg$warn("%s", capture.output(print(reg[which_high, ])))
    }

    r_orig <- reg
    reg <- reg[!which_high, , drop = FALSE] #this loses attributes, need to copy them over for code to work as expected
    attr(reg, "event") <- attr(r_orig, "event")
    attr(reg, "reg_name") <- attr(r_orig, "reg_name")
  }

  hrf_max <- NULL #only used for durmax_1 normalization

  normeach <- FALSE #signals evtmax_1 scaling
  if (!convolve) {
    normalize_hrf <- FALSE #irrelevant when we are not convolving
  } else if (normalization == "evtmax_1") {
    normalize_hrf <- TRUE
    normeach <- TRUE
  } else if (normalization == "durmax_1") {
    normalize_hrf <- TRUE

    # this is my hacky way to figure out the peak value for durmax_1 setup
    # obtain an estimate of the peak HRF height of a long event convolved with the HRF at these settings of a1, b1, etc.
    hrf_boxcar <- fmri.stimulus(
      n_vols = 300 / tr, values = 1.0, times = 100, durations = 100, tr = tr,
      demean = FALSE, # don't mean center for computing max height
      a1 = hrf_parameters["a1"], a2 = hrf_parameters["a2"], b1 = hrf_parameters["b1"],
      b2 = hrf_parameters["b2"], cc = hrf_parameters["cc"]
    )
    hrf_max <- max(hrf_boxcar)
  } else if (normalization == "none") {
    normalize_hrf <- FALSE
  } else { stop("unrecognized normalization: ", normalization) }

  #cleanup NAs and zeros in the regressor before proceeding with placing it onto the time grid
  cleaned <- cleanup_regressor(reg[, "onset"], reg[, "duration"], reg[, "value"], rm_zeros = rm_zeros)
  times <- cleaned$times; durations <- cleaned$durations; values <- cleaned$values

  if (length(times) == 0L) {
    lg$warn("No non-zero events for regressor to be convolved. Returning all-zero result for fMRI GLM.")
    ret <- matrix(0, nrow=n_vols, ncol=1)
    colnames(ret) <- attr(reg, "reg_name")
    return(ret)
  }

  # Handle mean centering of parametric values prior to convolution
  # This is useful when one wishes to dissociate variance due to parametric modulation versus stimulus occurrence
  # Don't demean a constant regressor such as an event regressor (all values = 1)
  if (length(values) > 1L && !all(is.na(values)) && center_values && sd(values, na.rm=TRUE) > 1e-5) {
    values <- values - mean(values, na.rm=TRUE)
  }

  # Split regressor into separate events prior to convolution
  # In the case of evtmax_1 normalization, normalize the HRF for the event to max height of 1 prior
  #   to multiplying against the event value/height.
  # In the case of durmax_1 normalization, normalize the HRF to a height of 1 for long events (~15s).
  if (isTRUE(normalize_hrf)) {
    
    #for each event, convolve it with hrf, normalize, then sum convolved events to get full timecourse
    normed_events <- sapply(seq_along(times), function(i) {
      #obtain unit-convolved duration-modulated regressor to define HRF prior to modulation by parametric regressor
      stim_conv <- fmri.stimulus(
        n_vols = n_vols, values = 1.0, times = times[i], durations = durations[i], tr = tr, demean = FALSE,
        center_values = FALSE, convolve = convolve, ts_multiplier = ts_multiplier,
        a1 = hrf_parameters["a1"], a2 = hrf_parameters["a2"], b1 = hrf_parameters["b1"], b2 = hrf_parameters["b2"], cc = hrf_parameters["cc"]
      )

      if (normeach) {
        if (times[i] + durations[i] > (n_vols*tr - 20)) {
          # When the event occurs at the end of the time series and is the only event (i.e., as in evtmax_1),
          # the HRF never reaches its peak. The further it is away from the peak, the stranger the stim_conv/max(stim_conv)
          # scaling will be -- it can lead to odd between-event scaling in the regressor.

          # Update Sep2019: it turns out that anything convolved regressor that has not achieved its peak by the end of the run
          #   -- even if it fits within the time interval -- can be rescaled improperly. A general solution is to place this
          #   event in the middle of the interval, convolve it with the HRF, then use *that* height as the normalization factor.
          # Apply this alternative correction to any event that begins or is 'on' in the last 20 seconds.
          msg <- glue(
            "Event occurs at the tail of the run. Onset: {round(times[i], 2)}, Offset: {round(times[i] + durations[i], 2)}, Run duration: {n_vols*tr}. ",
            "Using HRF peak from center of run for evtmax_1 regressor to avoid strange scaling. ",
            "Please check that the end of your convolved regressors matches your expectation."
          )
          lg$debug(msg)

          mid_vol <- n_vols*tr/2
          stim_at_center <- fmri.stimulus(n_vols=n_vols, values=1.0, times=mid_vol, durations=durations[i], tr=tr, demean=FALSE, 
            center_values=FALSE, convolve = convolve, ts_multiplier=ts_multiplier,
            a1=hrf_parameters["a1"], a2=hrf_parameters["a2"], b1=hrf_parameters["b1"], b2=hrf_parameters["b2"], cc=hrf_parameters["cc"])

          # Rescale HRF to a max of 1.0 for each event, regardless of duration -- EQUIVALENT TO dmUBLOCK(1). 
          # Use the peak of the HRF aligned to center of time interval.
          stim_conv <- stim_conv/max(stim_at_center)
        } else {
          # Rescale HRF to a max of 1.0 for each event, regardless of duration -- EQUIVALENT TO dmUBLOCK(1).
          stim_conv <- stim_conv/max(stim_conv)
        }
      } else if (normalize_hrf == TRUE && normeach == FALSE) {
        stim_conv <- stim_conv/hrf_max #rescale HRF to a max of 1.0 for long event -- EQUIVALENT TO dmUBLOCK(0)
      }

      stim_conv <- stim_conv*values[i] #for each event, multiply by parametric regressor value
    })

    tc_conv <- apply(normed_events, 1, sum) #sum individual HRF regressors for combined time course
  } else {
    #handle unnormalized convolution
    tc_conv <- fmri.stimulus(n_vols=n_vols, values=values, times=times, durations=durations, tr=tr, demean=FALSE,
      center_values=FALSE, convolve = convolve, ts_multiplier = ts_multiplier,
      a1=hrf_parameters["a1"], a2=hrf_parameters["a2"], b1=hrf_parameters["b1"], b2=hrf_parameters["b2"], cc=hrf_parameters["cc"])
  }

  #force the regressor to be a 1-column matrix so that apply calls below work
  tc_conv <- matrix(tc_conv, ncol=1)

  #apply high-pass filter after convolution if requested
  if (!is.null(high_pass)) {
    tc_conv <- apply(tc_conv, 2, function(col) {
      fir1Bandpass(col, TR=tr, low=high_pass, high=1/tr/2, plotFilter=FALSE, forward_reverse=TRUE, padx=1, detrend=1)
    })
  }

  # Allow for the convolved regressor to be rescaled to have a maximum height of 1.0. This normalizes
  # the range of the regressor across runs and subjects such that the betas for the regressor are on
  # similar scales, just with a change of gain... experimenting with this currently given SCEPTIC DAUC
  # regressors which tend to be highly skewed, potentially driven by behavioral parameter scaling challenges.
  # Note that this interacts with the normeach setting such that for normeach=FALSE (durmax_1), the height
  # of 1.0 will reflect a combination of the parameter and the duration. The interpretation is somewhat
  # cleaner under normeach=TRUE (evtmax_1) since the HRF has height of 1.0 for each stimulus, and then
  # rescaling to max 1.0 after convolution with the parametric value will change the relative heights of
  #the stimuli solely according to the parameter.
  if (convmax_1) {
    tc_conv <- tc_conv/max(tc_conv)
  }

  #grand mean center convolved regressor
  if (demean_convolved) {
    tc_conv <- apply(tc_conv, 2, function(col) { col - mean(col, na.rm=TRUE) })
  }

  #name the matrix columns appropriately
  colnames(tc_conv) <- attr(reg, "reg_name")
  return(tc_conv)
}

#' Convolve a regressor with a hemodynamic response function for fMRI analysis.
#'
#' Extended from \code{fmri} package to allow for continuous-valued regressor,
#' which is passed using the values parameter.
#'
#' The function also supports mean centering of parametric regressor prior to convolution
#' to dissociate it from stimulus occurrence (when event regressor also in model)
#'
#' @param n_vols The number of volumes (scans) to be output in the convolved regressor
#' @param onsets A vector of times (in scans) specifying event onsets
#' @param durations A vector of durations (in seconds) for each event
#' @param values A vector of parametric values used as regressor heights prior to convolution
#' @param times A vector of times (in seconds) specifying event onsets. If times are passed in, the onsets
#'   argument (which is in scans) is ignored. That is, \code{onsets} and \code{times} are mutually exclusive.
#' @param center_values Whether to demean values vector before convolution
#' @param rm_zeros Whether to remove zeros from events vector prior to convolution. Generally a good idea since we typically 
#'   center values prior to convolution, and retaining zeros will lead them to be non-zero after mean centering.
#' @param convolve Whether to convolve the regressor with the HRF. If FALSE, a time series of events, durations, and heights is returned.
#' @param tr The repetition time (sometimes called TR) in seconds
#' @param ts_multiplier A time series of length \code{n_vols} that is multiplied against the stimulus before convolution.
#' @param demean Whether to demean the regressor after convolution. Default: TRUE
#' @param convmax_1 Whether to rescale the convolved regressor to a maximum height of 1.
#' @param a1 The a1 parameter of the double gamma
#' @param a2 The a2 parameter of the double gamma
#' @param b1 The b1 parameter of the double gamma
#' @param b2 The b2 parameter of the double gamma
#' @param cc The cc parameter of the double gamma
#' @param conv_method Method for convolving HRF with stimulus. Either "r" or "cpp". The "r" method uses an FFT-based
#'   internal convolution with convolve(x, y, conj=TRUE). The "cpp" method uses an internal C++ function with a
#'   loop-based convolution over the vectors. Unfortunately, at present, the C++ approach is noticeably slower since
#'   it does not use FFT to obtain the filter.
#' @param microtime_resolution The number of bins between TRs used for calculating regressor values in continuous time
#'
#' @importFrom stats approx convolve
#' @export
fmri.stimulus <- function(n_vols=1, onsets=c(1), durations=c(1), values=c(1), times=NULL, center_values=FALSE,
                          rm_zeros=TRUE, convolve=TRUE, tr=2, ts_multiplier=NULL, demean=TRUE, convmax_1=FALSE,
                          a1 = 6, a2 = 12, b1 = 0.9, b2 = 0.9, cc = 0.35, conv_method="r", microtime_resolution=20) {

  #double gamma function
  double_gamma_hrf <- function(x, a1=6, a2=12, b1=0.9, b2=0.9, cc=0.35) {
    d1 <- a1 * b1
    d2 <- a2 * b2
    c1 <- ( x/d1 )^a1
    c2 <- cc * ( x/d2 )^a2
    res <- c1 * exp(-(x-d1)/b1) - c2 * exp(-(x-d2)/b2)
    res
  }

  double_gamma_glover <- function(t, n1 = 6, t1=0.9, a2=0.35, n2=12, t2=0.9) {
    gamma1 <- t^n1 * exp(-t/t1)
    gamma2 <- t^n2 * exp(-t/t2)
    c1 <- max(gamma1)
    c2 <- max(gamma2)
    res <- gamma1/c1 - a2*gamma2/c2
    return(res)
  }

  # xx <- double_gamma_hrf(1:30)
  # yy <- double_gamma_glover(1:30)
  # plot(xx, type="b")
  # lines(yy, type="b", col="blue")
  # summary(xx-yy)
  
  # def original_glover(t, n1=6., t1=0.9, a2=0.35, n2=12., t2=0.9):
  #   """ As seen in Glover 1999, page 420. Using values for the auditory cortex (canonical Glover).
  #   Variables are named after those in the paper. 
  #   """
  # gamma1 = t ** n1 * np.exp(-t / t1)
  # gamma2 = t ** n2 * np.exp(-t / t2)
  # 
  # c1 = gamma1.max()
  # c2 = gamma2.max()
  # 
  # return gamma1 / c1 - a2 * gamma2 / c2
  # 
  
  #verify that ts_multiplier is n_vols in length
  if (!is.null(ts_multiplier)) { stopifnot(is.vector(ts_multiplier) && length(ts_multiplier) == n_vols) }

  if (!is.null(times)) {
    cleaned <- cleanup_regressor(times, durations, values, rm_zeros = rm_zeros)
    times <- cleaned$times; durations <- cleaned$durations; values <- cleaned$values
  } else {
    cleaned <- cleanup_regressor(onsets, durations, values, rm_zeros = rm_zeros)
    onsets <- cleaned$times; durations <- cleaned$durations; values <- cleaned$values
  }

  #handle mean centering of parametric values prior to convolution
  #this is useful when one wishes to dissociate variance due to parametric modulation versus stimulus occurrence
  if (!all(is.na(values)) && isTRUE(center_values) && !all(abs(values - 1.0) < 1e-5)) {
    values <- values - mean(values, na.rm=TRUE)
  }

  if (is.null(times)) {
    #onsets are specified in terms of scans (i.e., use onsets argument)
    microtime_resolution <- 1
  } else {
    #verify that there are events to be modeled in the time vector
    if (length(times) == 0L) {
      warning("No non-zero events for regressor to be convolved. Returning all-zero result for fMRI GLM.")
      return(rep(0, n_vols))
    } else if (all(times >= n_vols*tr)) {
      #all events fall outside of the modeled time window
      return(rep(0, n_vols))
    }

    #upsample time grid by a factor (default 20) to get best estimate of hrf at each volume
    onsets <- times/tr*microtime_resolution
    durations <- durations/tr*microtime_resolution
    tr <- tr/microtime_resolution
    n_vols <- n_vols*microtime_resolution

    if (!is.null(ts_multiplier)) {
      #upsample ts_multiplier using linear interpolation
      ts_multiplier <- approx(ts_multiplier, n=n_vols)$y
    }

  }
  number_of_onsets <- length(onsets)

  if (length(durations) == 1) {
    durations <- rep(durations, number_of_onsets)
  } else if (length(durations) != number_of_onsets)  {
    msg <- glue("Length of durations vector ({length(durations)}) does not match the number of onsets ({number_of_onsets})!")
    lg$error
    stop(msg)
  }

  if (length(values) == 1) {
    #use the same regressor height (usually 1.0) for all onsets
    values <- rep(values, number_of_onsets)
  } else if (length(values) != number_of_onsets) {
    stop("Length of values vector does not match the number of onsets!")
  }

  if (any(onsets >= n_vols)) {
    #remove any events that begin outside the modeled time window
    badonsets <- onsets >= n_vols
    onsets <- onsets[!badonsets]
    durations <- durations[!badonsets]
    values <- values[!badonsets]
    number_of_onsets <- number_of_onsets - sum(badonsets)
  }

  stimulus <- rep(0, n_vols)

  for (i in 1:number_of_onsets) {
    for (j in onsets[i]:(onsets[i]+durations[i]-1)) {
      stimulus[j] <- values[i]
    }
  }

  #always truncate the stimulus back down to the target output length
  #if a stimulus starts after the last modeled timepoint or extends into it, stimulus
  #will become longer than expected, causing a convolution error. For now, be quiet about it,
  #but perhaps we should let the user know when he/she specifies a time vector that extends into
  #unmodeled timepoints
  stimulus <- stimulus[1:n_vols]

  #if we have a time-based modulator of the signal, multiply it against the stimulus vector before convolution
  if (!is.null(ts_multiplier)) {
    stimulus <- stimulus * ts_multiplier
  }

  #  zero pad stimulus vector with 20 TRs on left and right to avoid bounding/edge effects in convolve
  stimulus <- c(rep(0,20*microtime_resolution),stimulus,rep(0,20*microtime_resolution))

  if (conv_method=="cpp") {
    regressor <- convolve_double_gamma(stimulus, a1, a2, b1/tr, b2/tr, cc)/microtime_resolution
  } else if (conv_method=="r") {
    #compute the double-gamma HRF for one event, scale to be as long as time series
    #this goes from last-to-first volume since this is reversed in convolution
    hrf_values <- double_gamma_hrf(((40*microtime_resolution)+n_vols):1, a1, a2, b1/tr, b2/tr, cc)
    #hrf_values <- double_gamma_glover(((40*microtime_resolution)+n_vols):1, n1=a1, t1=b1/tr, a2=cc, n2=a1, t2=b2/tr)
    regressor <- convolve(stimulus, hrf_values)/microtime_resolution
  }

  regressor <- regressor[-(1:(20*microtime_resolution))][1:n_vols]
  regressor <- regressor[unique((microtime_resolution:n_vols)%/%microtime_resolution)*microtime_resolution]
  dim(regressor) <- c(n_vols/microtime_resolution,1)

  #rescale regressor to maximum height of 1.0 for scaling similarity across instances (see hrf_convolve_normalize for details)
  #only applicable to convolve regressors
  if (convmax_1 && convolve) { regressor <- regressor/max(regressor) }

  if (!convolve) {
    # just return the box car without convolving by HRF
    # remove zero padding
    stimulus <- stimulus[-(1:(20 * microtime_resolution))][1:n_vols]
    
    # subset the elements of the upsampled grid back onto the observed TRs
    stimulus <- stimulus[unique((microtime_resolution:n_vols)%/%microtime_resolution)*microtime_resolution]
    return(stimulus)
  } else if (demean) {
    regressor - mean(regressor, na.rm=TRUE)
  } else {
    regressor
  }
}

#' compute a moving average smooth over a time series (here, a vector of RTs)
#'
#' used to fit smoothed RTs (\code{clock_model} object).
#' Function is an adapted version of \code{runmean} from the \code{caTools} package.
#' @keywords internal
runmean <- function(x, k=5) {
  n <- length(x)
  k2 <- k%/%2
  k1 <- k - k2 - 1
  y <- c(sum(x[1:k]), diff(x, k))
  y <- cumsum(y)/k
  y <- c(rep(0, k1), y, rep(0, k2))

  idx1 <- 1:k1
  idx2 <- (n - k2 + 1):n

  #use mean of available data at tails
  for (i in idx1) y[i] <- mean(x[1:(i + k2)])
  for (i in idx2) y[i] <- mean(x[(i - k1):n])

  return(y)
}

#' Detrend a time series up to quadratic trend. Used by fir1Bandpass prior to filtering
#'
#' @param x A time series to be detrended
#' @param order The polynomial order used for detrending. 0=demean; 1=linear; 2=quadratic
#' @export
detrendts <- function(x, order=0) {
  #order 0=demean; order 1=linear; order 2=quadratic
  lin <- seq_along(x)

  if (order == 0) {
    residuals(lm(x ~ 1))
  } else if (order == 1) {
    residuals(lm(x ~ 1 + lin))
  } else if (order == 2) {
    quad <- lin^2
    residuals(lm(x ~ 1 + lin + quad))
  } else {
    stop("order not supported:", order)
  }
}

#' Apply a FIR-1 bandpass filter a signal. Can low- or high-pass filter by specifying 0 for low or >= Nyquist for high.
#'
#' @param x The time series to be filtered
#' @param TR The sampling frequency in seconds
#' @param low The lower filter cutoff in Hz. Fluctuations below this frequency will be filtered out
#' @param high The upper filter cutoff in Hz. Fluctuations above this frequency will be filtered out
#' @param n The order of the filter coefficients. Should probably leave this alone in general
#'
#' @keywords internal
#' @importFrom signal fir1 filtfilt freqz
#' @export
fir1Bandpass <- function(x, TR=2.0, low=.009, high=.08, n=250, plotFilter=FALSE, forward_reverse=TRUE, padx=0, detrend=1) {
  #check for all NA
  if (all(is.na(x))) return(x)

  #n refers to filter order. 250 does quite well with typical signals
  Fs <- 1/TR
  nyq <- Fs/2

  #enforce filter upper bound at 1.0 (nyquist)
  if (high/nyq > 1.0) { high <- nyq }

  #coefficients are specified in the normalized 0-1 range.
  fir1_coef <- fir1(n, c(low/nyq, high/nyq), type="pass")

  if (plotFilter) print(freqz(fir1_coef, Fs=Fs))

  origLen <- length(x)

  #handle detrending (almost always a good idea to demean, if not detrend, for fourier series to be valid!)
  if (!is.null(detrend) && detrend >= 0)
    x <- detrendts(x, order=detrend)

  #zero-pad data, if requested
  x <- c(x, rep(0*x, padx))

  #as the order of the filter exceeds the length of the time series,
  #some sort of phase distortion is introduced.
  #forward+reverse filtering cleans it up
  if (forward_reverse) xfilt <- filtfilt(fir1_coef, x)
  else xfilt <- filter(fir1_coef, x)

  return(xfilt[1:origLen])
}


#' Concatenate design matrices for each run to form a single design with unique baselines per run (ala AFNI)
#'
#' @param d A design matrix object created by \code{build_design_matrix}. The $design_convolved element will be used for concatenation.
#' @param convolved If \code{TRUE} (the default), concatenate the convolved design matrix. If \code{FALSE}, use the unconvolved.
#' @importFrom dplyr bind_rows
#' @export
concat_design_runs <- function(d, convolved=TRUE) {

  if (!is.list(d) || is.null(d$design_convolved)) { stop("Cannot identify $design_convolved element of object") }

  to_concat <- if (convolved) { d$design_convolved } else { d$design_unconvolved }
  d_allruns <- do.call(bind_rows, lapply(seq_along(to_concat), function(r) {
    thisrun <- to_concat[[r]]
    basecols <- grepl("base", names(thisrun))
    ##note that this will rename repeated names into something like ev and ev.1, which is good
    names(thisrun) <- gsub("base", paste0("run", r, "base"), names(thisrun))
    thisrun
  }))

  d_allruns[which(is.na(d_allruns), arr.ind=TRUE)] <- 0

  d_allruns <- as.matrix(d_allruns) #needed for lm
  attr(d_allruns, "run_names") <- names(to_concat) #keep the run names around for tracking
  if (length(to_concat) > 1L) {
    run_boundaries <- c(1, cumsum(sapply(to_concat[seq_along(to_concat) - 1], nrow) + 1)) #keep the run names around for tracking
    names(run_boundaries) <- names(to_concat)
  } else {
    run_boundaries <- 1 #just first volume
    names(run_boundaries) <- names(to_concat)[1]
  }

  attr(d_allruns, "run_boundaries") <- run_boundaries
  d_allruns
}


#' Visualize design matrix, including event onset times and run boundaries
#'
#' @param d a concatenated design matrix created by \code{build_design_matrix} and passed to \code{concat_design_runs}
#' @param outfile a filename used to export the design matrix using \code{ggsave}
#' @param run_boundaries a named vector of positions in the time series where a run boundary occurs (used for plotting)
#' @param events a named list of vectors containing the times of each event
#' @param include_baseline whether to display the baseline regressors in the design.
#' @importFrom ggplot2 ggplot aes geom_line theme_bw facet_grid geom_vline ggsave scale_color_brewer scale_color_discrete guides guide_legend
#' @importFrom tidyr gather
#' @export
visualize_design_matrix <- function(d, outfile=NULL, run_boundaries=NULL, events=NULL, include_baseline=FALSE) {

  #this needs to come before removal of baseline because attributes are dropped at subset
  if (is.null(run_boundaries) && !is.null(attr(d, "run_boundaries"))) {
    #check whether run_boundaries are attached to the convolved design as an attribute and use this
    run_boundaries <- attr(d, "run_boundaries")
    run_names <- attr(d, "run_names")
  } else {
    if (is.null(names(run_boundaries))) {
      run_names <- paste0("run", seq_along(run_boundaries))
    } else {
      run_names <- names(run_boundaries)
    }
  }

  if (!include_baseline) {
    d <- d[ ,!grepl("(run[0-9]+)*base", colnames(d)), drop = FALSE]
  } else {
    d <- d[, !grepl("(run[0-9]+)*base0", colnames(d)), drop = FALSE] #always remove constant
  }

  #print(round(cor(d), 3))
  d <- as.data.frame(d)
  d$volume <- seq_len(nrow(d))
  d.m <- d %>% gather(key="variable", value="value", -volume)
  g <- ggplot(d.m, aes(x=volume, y=value)) + geom_line(size=1.2) + theme_bw(base_size=15) + facet_grid(variable ~ ., scales="free_y")

  if (!is.null(run_boundaries)) {
    rundf <- data.frame(run=run_names, boundary=run_boundaries)
    g <- g + geom_vline(data=rundf, aes(xintercept=boundary, color=run), size=1.3) + scale_color_discrete("Run") + #scale_color_brewer("Run", palette="Blues")
      #theme(legend.key = element_rect(size = 2), legend.key.size = unit(1.5, 'lines'))
      guides(color=guide_legend(keywidth=0.3, keyheight=1.5, default.unit="lines")) #not beautifully spaced, but come back to this later...
  }

  colors <- c("black", "blue", "red", "orange") #just a hack for color scheme right now

  if (!is.null(events)) {
    for (i in seq_along(events)) {
      eventdf <- data.frame(name=names(events)[i], positions=events[[i]])
      g <- g + geom_vline(data=eventdf, aes(xintercept=events, color=name)) + scale_color_brewer("Event", palette="Greens")
    }
  }

  if (!is.null(outfile)) {
    ggsave(filename=outfile, plot=g, width=21, height=9)
  }

  return(invisible(g))
}

#' Small utility function to handle any NAs in the regressor specification.
#' Also, optionally, removes values of 0 from the regressor prior to convolution.
#' The latter is usually a good idea to avoid a non-event (zero) becoming an event
#' after mean centering of a parametric regressor. Convolving a zero-height regressor
#' with an HRF will maintain all zeros, whereas a non-zero value (resulting from mean centering)
#' will not.
#'
#' @param times Vector of times (in seconds) for regressor events
#' @param durations Vector of durations (in seconds) for regressor events
#' @param values Vector of values (parametric heights) for regressor events
#' @param rm_zeros Logical indicating whether to remove zero-valued events from regressor
#'
#' @author Michael Hallquist
#' @keywords internal
cleanup_regressor <- function(times, durations, values, rm_zeros=TRUE) {
  #always remove any NAs from any of the input vectors
  napos <- which(is.na(times) | is.na(durations) | is.na(values))
  if (length(napos) > 0L) {
    times <- times[-1*napos]
    values <- values[-1*napos]
    durations <- durations[-1*napos]
  }

  #remove zeros from events vector to avoid these becoming non-zero values in the hrf convolution after grand mean centering
  #for example, for negative RPEs, if one does not occur, it will have a value of zero. But after grand mean centering below, this
  #will now be treated as an "event" by the HRF convolution since the amplitude is no longer zero.
  if (rm_zeros) {
    zeros <- which(values==0.0)
    if (length(zeros) > 0L) {
      times <- times[-1*zeros]
      values <- values[-1*zeros]
      durations <- durations[-1*zeros]
    }
  }

  return(list(times=times, durations=durations, values=values))
}

#' fmri.design function from \code{fmri} R package. Brought into this package to avoid dependency on \code{fmri} package,
#'   especially the \code{gsl} package dependency in \code{aws}, which is finicky on clusters if the GSL module isn't available.
#' 
#' @param stimulus The regressor of interest
#' @param order The polynomial order of baseline/drift terms to incorporate in the design matrix
#' @param cef confound regressors to add
#' @param verbose if TRUE, output detailed progress about each step in the design setup
#' @keywords internal
fmri.design <- function(stimulus, order = 2, cef = NULL, verbose = FALSE) {
  checkmate::assert_logical(verbose, len = 1L)

  ## create matrices and make consistency checks
  if (is.list(stimulus)) {
    nstimulus <- length(stimulus)
    dims <- dim(stimulus[[1]])
    if (!is.null(dims)) {
      slicetiming <- TRUE
      nslices <- dims[2]
      scans <- dims[1]
      stims <- array(0, c(scans, nstimulus, nslices))
      for (j in 1:nstimulus) {
        if (!all(dim(stimulus[[j]]) == dims)) stop("Inconsistent dimensions in stimulus list")
        stims[, j, ] <- as.matrix(stimulus[[j]])
      }
    }
  } else {
    slicetiming <- FALSE
    stims <- as.matrix(stimulus)
    dims <- dim(stims)
    nstimulus <- dims[2]
    scans <- dims[1]
    nslices <- 1
    dim(stims) <- c(dims, nslices)
  }
  if (!is.null(cef)) {
    cef <- as.matrix(cef)
    if (dim(stims)[1] != dim(cef)[1]) {
      stop("Length of stimuli ", dim(stimulus)[1], " does not match the length of confounding effects ", dim(cef)[1])
    }
    neffects <- dim(cef)[2]
  } else {
    neffects <- 0
  }


  ## create empty design matrix and fill first columns with Stimulus

  dz <- c(scans, nstimulus + neffects + order + 1, nslices)
  z <- array(0, dz)
  if (verbose) cat("fmriDesign: Adding stimuli to design matrix\n")
  z[, 1:nstimulus, ] <- stims

  ## this is the mean effect
  if (verbose) cat("fmriDesign: Adding mean effect to design matrix\n")
  z[, neffects + nstimulus + 1, ] <- 1
  ## now confounding effects
  if (neffects != 0) {
    if (verbose) cat("fmriDesign: Adding", neffects, "confounding effect(s) to design matrix\n")
    z[, (nstimulus + 1):(nstimulus + neffects), ] <- cef
  }

  ## now the polynomial trend (make orthogonal to stimuli)
  if (order != 0) {
    if (verbose) cat("fmriDesign: Adding polynomial trend of order", order, "to design matrix\n")
    for (k in 1:nslices) {
      ortho <- t(stims[, , k]) %*% stims[, , k]
      hz <- numeric(nstimulus)
      for (i in (neffects + nstimulus + 2):(neffects + nstimulus + order + 1)) {
        z[, i, k] <- (1:scans)^(i - nstimulus - neffects - 1)
        z[, i, k] <- z[, i, k] / mean(z[, i, k])
      }
    }
  }
  if (dim(z)[3] == 1) dim(z) <- dim(z)[1:2]
  
  return(z)
}


#' Sanitize a filename by removing directory paths and invalid characters
#'
#' `path_sanitize()` removes the following:
#' - [Control characters](https://en.wikipedia.org/wiki/C0_and_C1_control_codes)
#' - [Reserved characters](https://web.archive.org/web/20230126161942/https://kb.acronis.com/content/39790)
#' - Unix reserved filenames (`.` and `..`)
#' - Trailing periods and spaces (invalid on Windows)
#' - Windows reserved filenames (`CON`, `PRN`, `AUX`, `NUL`, `COM1`, `COM2`,
#'   `COM3`, COM4, `COM5`, `COM6`, `COM7`, `COM8`, `COM9`, `LPT1`, `LPT2`,
#'   `LPT3`, `LPT4`, `LPT5`, `LPT6`, LPT7, `LPT8`, and `LPT9`)
#' The resulting string is then truncated to [255 bytes in length](https://en.wikipedia.org/wiki/Comparison_of_file_systems#Limits)
#' @param filename A character vector to be sanitized.
#' @param replacement A character vector used to replace invalid characters.
#' @seealso <https://www.npmjs.com/package/sanitize-filename>, upon which this
#'   function is based.
#' @keywords internal
#' @details 
#'   Adapted slightly from fs package. Added parentheses as illegal characters given FSL evaluates these in unquoted filenames
#' @examples
#' # potentially unsafe string
#' str <- "~/.\u0001ssh/authorized_keys"
#' path_sanitize(str)
#'
#' path_sanitize("..")
path_sanitize <- function(filename, replacement = "") {
  illegal <- "[/\\?<>\\:*|\":()]"
  control <- "[[:cntrl:]]"
  reserved <- "^[.]+$"
  windows_reserved <- "^(con|prn|aux|nul|com[0-9]|lpt[0-9])([.].*)?$"
  windows_trailing <- "[. ]+$"

  filename <- gsub(illegal, replacement, filename)
  filename <- gsub(control, replacement, filename)
  filename <- gsub(reserved, replacement, filename)
  filename <- gsub(windows_reserved, replacement, filename, ignore.case = TRUE)
  filename <- gsub(windows_trailing, replacement, filename)

  # TODO: this substr should really be unicode aware, so it doesn't chop a
  # multibyte code point in half.
  filename <- substr(filename, 1, 255)
  if (replacement == "") {
    return(filename)
  }
  path_sanitize(filename, "")
}
UNCDEPENdLab/fmri.pipeline documentation built on April 3, 2025, 3:21 p.m.