R/dmcFit.R

Defines functions calculateBinProbabilities costValueFunction gs calculateCostValueGS cs calculateCostValueCS calculateCostValueSPE calculateCostValueRMSE minimizeCostValue mean.dmcfit_subject dmcFitSubjectDE dmcFitSubject dmcFitDE dmcFit

Documented in calculateBinProbabilities calculateCostValueCS calculateCostValueGS calculateCostValueRMSE calculateCostValueSPE dmcFit dmcFitDE dmcFitSubject dmcFitSubjectDE mean.dmcfit_subject

#' @title dmcFit
#'
#' @description Fit theoretical data generated from dmcSim to observed data by
#' minimizing the root-mean-square error ("RMSE") between a weighted combination
#' of the CAF and CDF functions using optim (Nelder-Mead). Alternative cost functions
#' include squared percentage error ("SPE"), and g-squared statistic ("GS").
#'
#' @param resOb Observed data (see flankerData and simonTask for data format) and the function dmcObservedData to create
#' the required input from either an R data frame or external *.txt/*.csv files
#' @param nTrl Number of trials to use within dmcSim.
#' @param startVals Starting values for the to-be estimated parameters. This is a list with values specified individually
#' for amp, tau, drc, bnds, resMean, resSD, aaShape, spShape, spBias, sigm (e.g., startVals = list(amp = 20, tau = 200,
#' drc = 0.5, bnds = 75, resMean = 300, resSD = 30, aaShape = 2, spShape = 3, spBias = 0, sigm = 4, bndsRate=0, bndsSaturation=0)).
#' @param minVals Minimum values for the to-be estimated parameters. This is a list with values specified individually
#' for amp, tau, drc, bnds, resMean, resSD, aaShape, spShape, spBias, sigm (e.g., minVals = list(amp = 0, tau = 5, drc = 0.1,
#' bnds = 20, bndsRate=0, bndsSaturation=0, resMean = 200, resSD = 5, aaShape = 1, spShape = 2, spBias = -20, sigm = 1)).
#' @param maxVals Maximum values for the to-be estimated parameters. This is a list with values specified individually
#' for amp, tau, drc, bnds, resMean, resSD, aaShape, spShape, spBias, sigm (e.g., maxVals = list(amp = 40, tau = 300, drc = 1.0,
#' bnds = 150, bndsRate=1, bndsSaturation=500, resMean = 800, resSD = 100, aaShape = 3, spShape = 4, spBias = 20, sigm = 10))
#' @param fixedFit Fix parameter to starting value. This is a list with bool values specified individually for
#' amp, tau, drc, bnds, resMean, resSD, aaShape, spShape, spBias, sigm (e.g., fixedFit = list(amp = F, tau = F, drc = F,
#' bnds = F, bndsRate=T, bndsSaturation=T, resMean = F, resSD = F, aaShape = F, spShape = F, spBias = T, sigm = T))
#' NB. Value if fixed at startVals.
#' @param freeCombined If fitting 2+ datasets at once, which parameters are allowed to vary between both
#' fits (default = all parameters fixed between the two fits e.g. parameter = F).
#' This is a list with bool values specified individually for
#' amp, tau, drc, bnds, resMean, resSD, aaShape, spShape, spBias, sigm (e.g., freeCombined = list(amp = F,
#' tau = F, drc = F, bnds = F, bndsRate=F, bndsSaturation=F, resMean = F, resSD = F, aaShape = F, spShape = F, spBias = F, sigm = F))
#' @param fitInitialGrid TRUE/FALSE
#' @param fitInitialGridN 10 linear steps between parameters min/max values (reduce if searching more than ~2/3 initial parameters)
#' @param fixedGrid Fix parameter for initial grid search. This is a list with bool values specified individually for
#' amp, tau, drc, bnds, resMean, resSD, aaShape, spShape, spBias, sigm (e.g., fixedGrid = list(amp = T, tau = F, drc = T,
#' bnds = T, bndsRate=T, bndsSaturation=T, resMean = T, resSD = T, aaShape = T, spShape = T, spBias = T, sigm = T)). As a default, the initial gridsearch
#' only searches the tau space.
#' @param nCAF The number of CAF bins.
#' @param nDelta The number of delta bins.
#' @param pDelta An alternative option to nDelta (tDelta = 1 only) by directly specifying required percentile values (vector of values 0-100)
#' @param tDelta The type of delta calculation (1=direct percentiles points, 2=percentile bounds (tile) averaging)
#' @param deltaErrors TRUE/FALSE Calculate delta bins for error trials
#' @param costFunction The cost function to minimise: root mean square error ("RMSE": default),
#' squared percentage error ("SPE"), or likelihood-ratio chi-square statistic ("GS")
#' @param rtMax The limit on simulated RT (decision + non-decisional components)
#' @param spDist The starting point (sp) distribution (0 = constant, 1 = beta, 2 = uniform)
#' @param drOnset The starting point of controlled drift rate (i.e., "target" information) relative to automatic ("distractor" incormation) (> 0 ms)
#' @param drDist The drift rate (dr) distribution type (0 = constant, 1 = beta, 2 = uniform)
#' @param drShape The drift rate (dr) shape parameter
#' @param drLim The drift rate (dr) range
#' @param bndsRate Collapsing bounds rate. 0 (default) = fixed bounds
#' @param bndsSaturation Collapsing bounds saturation
#' @param printInputArgs TRUE (default) /FALSE
#' @param printResults TRUE/FALSE (default)
#' @param optimControl Additional control parameters passed to optim (see optim details section)
#' @param numCores Number of cores to use
#'
#' @return dmcfit returns an object of class "dmcfit" with the following components:
#'
#' \item{sim}{Individual trial data points (RTs for all trial types e.g., correct/error trials) and activation vectors from the simulation}
#' \item{summary}{Condition means for reaction time and error rate}
#' \item{caf}{Conditional Accuracy Function (CAF) data per bin}
#' \item{delta}{DataFrame with distributional delta analysis data correct trials (Bin, meanComp, meanIncomp, meanBin, meanEffect)}
#' \item{delta_errs}{DataFrame with distributional delta analysis data incorrect trials (Bin, meanComp, meanIncomp, meanBin, meanEffect)}
#' \item{par}{The fitted model parameters + final cost value of the fit}
#'
#' @examples
#' \donttest{
#' # Code below can exceed CRAN check time limit, hence donttest
#' # Example 1: Flanker data from Ulrich et al. (2015)
#' fit <- dmcFit(flankerData) # only initial search tau
#' plot(fit, flankerData)
#' summary(fit)
#'
#' # Example 2: Simon data from Ulrich et al. (2015)
#' fit <- dmcFit(simonData) # only initial search tau
#' plot(fit, simonData)
#' summary(fit)
#'
#' # Example 3: Flanker data from Ulrich et al. (2015) with non-default
#' # start vals and some fixed values
#' fit <- dmcFit(flankerData,
#'   startVals = list(drc = 0.6, aaShape = 2.5),
#'   fixedFit = list(drc = TRUE, aaShape = TRUE)
#' )
#'
#' # Example 4: Simulated Data (+ve going delta function)
#' dat <- createDF(nSubjects = 20, nTrl = 500, design = list("Comp" = c("comp", "incomp")))
#' dat <- addDataDF(dat,
#'   RT = list(
#'     "Comp_comp" = c(510, 100, 100),
#'     "Comp_incomp" = c(540, 130, 85)
#'   ),
#'   Error = list(
#'     "Comp_comp" = c(4, 3, 2, 1, 1),
#'     "Comp_incomp" = c(20, 4, 3, 1, 1)
#'   )
#' )
#' datOb <- dmcObservedData(dat, columns = c("Subject", "Comp", "RT", "Error"))
#' plot(datOb)
#' fit <- dmcFit(datOb, nTrl = 5000)
#' plot(fit, datOb)
#' summary(fit)
#'
#' # Example 5: Fitting 2+ datasets within all common parameters values
#' fit <- dmcFit(list(flankerData, simonData), nTrl=1000)
#' plot(fit[[1]], flankerData)
#' plot(fit[[2]], simonData)
#' summary(fit)
#'
#' # Example 6: Fitting 2+ datasets within some parameters values varying
#' fit <- dmcFit(list(flankerData, simonData), freeCombined=list(amp=TRUE, tau=TRUE), nTrl=1000)
#' summary(fit) # NB. amp/tau values different, other parameter values equal
#' }
#'
#'
#' @export
dmcFit <- function(resOb,
                   nTrl = 100000,
                   startVals = list(),
                   minVals = list(),
                   maxVals = list(),
                   fixedFit = list(),
                   freeCombined = list(),
                   fitInitialGrid = TRUE,
                   fitInitialGridN = 10, # reduce if grid search 3/4+ parameters
                   fixedGrid = list(), # default only initial search tau
                   nCAF = 5,
                   nDelta = 19,
                   pDelta = vector(),
                   tDelta = 1,
                   deltaErrors = FALSE,
                   spDist = 1,
                   drOnset = 0,
                   drDist = 0,
                   drShape = 3,
                   drLim = c(0.1, 0.7),
                   bndsRate = 0,
                   bndsSaturation = 0,
                   rtMax = 5000,
                   costFunction = "RMSE",
                   printInputArgs = TRUE,
                   printResults = FALSE,
                   optimControl = list(),
                   numCores = 2) {

  if (is(resOb, "dmcob")) {
    resOb <- list(resOb)
  }

  # default parameter space
  defaultStartVals    <- list(amp = 20, tau = 200, drc = 0.5, bnds = 75,  bndsRate=0, bndsSaturation=0,   resMean = 300, resSD = 30,  aaShape = 2, spShape = 3, spBias = 0,   sigm = 4)
  defaultMinVals      <- list(amp = 0,  tau = 5,   drc = 0.1, bnds = 20,  bndsRate=0, bndsSaturation=0,   resMean = 200, resSD = 5,   aaShape = 1, spShape = 2, spBias = -20, sigm = 1)
  defaultMaxVals      <- list(amp = 40, tau = 300, drc = 1.0, bnds = 150, bndsRate=1, bndsSaturation=500, resMean = 800, resSD = 100, aaShape = 3, spShape = 4, spBias = 20,  sigm = 10)
  defaultFixedFit     <- list(amp = F,  tau = F,   drc = F,   bnds = F,   bndsRate=T, bndsSaturation=T,   resMean = F,   resSD = F,   aaShape = F, spShape = F, spBias = T,   sigm = T)
  defaultFixedGrid    <- list(amp = T,  tau = F,   drc = T,   bnds = T,   bndsRate=T, bndsSaturation=T,   resMean = T,   resSD = T,   aaShape = T, spShape = T, spBias = T,   sigm = T)
  defaultFreeCombined <- list(amp = F,  tau = F,   drc = F,   bnds = F,   bndsRate=F, bndsSaturation=F,   resMean = F,   resSD = F,   aaShape = F, spShape = F, spBias = F,   sigm = F)

  startVals <- modifyList(defaultStartVals, startVals)
  # startVals <- lapply(startVals, function(x) ifelse(x == 0, .Machine$double.xmin, x))
  if (!fitInitialGrid) {
    startVals <- unlist(startVals)
  }

  minVals      <- modifyList(defaultMinVals, minVals)
  maxVals      <- modifyList(defaultMaxVals, maxVals)
  fixedFit     <- modifyList(defaultFixedFit, fixedFit)
  fixedGrid    <- modifyList(defaultFixedGrid, fixedGrid)
  freeCombined <- modifyList(defaultFreeCombined, freeCombined)

  # parScale <- unlist(startVals) / min(unlist(startVals) + 1)
  # hard-code parscale to approx above, but avoid 0's?
  parScale <- list(amp = 20, tau = 200, drc = 0.5, bnds = 75, bndsRate=0.5, bndsSaturation=200, resMean = 300, resSD = 30, aaShape = 2, spShape = 3, spBias = 20, sigm = 4)

  prms <- replicate(length(resOb), startVals, simplify = FALSE)

  # default optim control parameters
  defaultOptimControl <- list(parscale = c(parScale[!as.logical(fixedFit)], parScale[as.logical(freeCombined)]), maxit = 500)
  optimControl <- modifyList(defaultOptimControl, optimControl)

  # change nDelta to pDelta if pDelta not empty
  if (length(pDelta) != 0) {
    nDelta <- pDelta
    tDelta <- 1
  }

  # check observed data contains correct number of delta/CAF bins
  for (i in 1:length(resOb)) {
    if (length(nDelta) == 1) {
      if (nrow(resOb[[i]]$delta) != nDelta) {
        stop(paste0("Number of delta bins in observed data set ", i, " and nDelta bins are not equal!"))
      }
    } else if (length(nDelta) > 1) {
      if (nrow(resOb[[i]]$delta) != length(nDelta)) {
        stop(paste0("Number of delta bins in observed data set ", i, " and nDelta bins are not equal!"))
      }
    }
    if (nrow(resOb[[i]]$caf) != nCAF) {
      stop(paste0("Number of CAF bins in observed data set ", i, " and nCAF bins are not equal!"))
    }
  }

  # which cost function?
  calculateCostValue <- costValueFunction(costFunction)

  if (is.character(costFunction)) {
    if (tolower(costFunction) %in% c("cs", "gs")) {
      # add additional probabilities to observed data required for cs/gs cost functions
      for (i in 1:length(resOb)) {
        resOb[[i]] <- calculateBinProbabilities(resOb[[i]])
      }
    }
  }

  # fit procedure initial grid
  if (fitInitialGrid) {
    minValsGrid <- minVals
    maxValsGrid <- maxVals
    startValsGrid <- startVals

    # which parameters to search within grid
    if (TRUE %in% fixedGrid) {
      minValsGrid[fixedGrid == T] <- startValsGrid[fixedGrid == T]
      maxValsGrid[fixedGrid == T] <- startValsGrid[fixedGrid == T]
    }

    startValsGrid <- Map(seq, minValsGrid, maxValsGrid, length.out = fitInitialGridN)
    startValsGrid <- dplyr::distinct(expand.grid(Map(unique, startValsGrid)))
    message("Searching initial parameter gridspace: N = ", nrow(startValsGrid), ", with ", nTrl, " trials per simulation.")

    # CRAN maintainer personal communication: THIS IS NOT THE WAY TO DO IT!
    # CRAN submissions can only use maximum of two cores unless user requests more!
    # R check limits number of cores to 2 (https://stackoverflow.com/questions/50571325/r-cran-check-fail-when-using-parallel-functions)
    # chk <- tolower(Sys.getenv("_R_CHECK_LIMIT_CORES_", ""))
    # if (nzchar(chk) && (chk == "true")) {
    #   num_cores <- 2L
    # } else {
    #   num_cores <- parallel::detectCores() / 2
    # }

    # Older code (pre June 2020) used doSNOW progress bar
    # R CMD check --as-cran -->  doSNOW warning “superseded packages”
    # Use pbapply for progress bar
    # https://stackoverflow.com/questions/58473626/r-doparallel-progress-bar-to-monitor-finished-jobs

    pCostValue <- function(i) {
      resTh <- dmcSim(
        amp = startValsGrid$amp[i], tau = startValsGrid$tau[i], drc = startValsGrid$drc[i], bnds = startValsGrid$bnds[i],
        resMean = startValsGrid$resMean[i], resSD = startValsGrid$resSD[i], aaShape = startValsGrid$aaShape[i],
        spShape = startValsGrid$spShape[i], spBias = startValsGrid$spBias[i], sigm = startValsGrid$sigm[i],
        spLim = c(-startValsGrid$bnds[i], startValsGrid$bnds[i]), spDist = spDist, bndsRate = startValsGrid$bndsRate[i],
        bndsSaturation = startValsGrid$bndsSaturation[i], drOnset = drOnset, drDist = drDist, drShape = drShape, drLim = drLim,
        rtMax = rtMax, nTrl = nTrl, nDelta = nDelta, pDelta = pDelta, tDelta = tDelta, deltaErrors = deltaErrors, nCAF = nCAF,
        printInputArgs = TRUE, printResults = FALSE
      )
      cost <- 0
      for (i in 1:length(resOb)) {
        cost <- cost + calculateCostValue(resTh, resOb[[i]])
      }
      return(cost)
    }

    cl <- parallel::makeCluster(numCores)
    invisible(parallel::clusterExport(cl = cl, varlist = c("dmcSim", "calculateCostValue"), envir = environment()))

    # calculate initial cost values across grid starting values and find min
    costValue <- pbapply::pblapply(cl = cl, X = 1:nrow(startValsGrid), FUN = pCostValue)
    startVals <- startValsGrid[which.min(costValue), ]

    parallel::stopCluster(cl)
  }

  # optimize
  fit <- optim(
    par = as.numeric(c(startVals[!as.logical(fixedFit)], startVals[as.logical(freeCombined)])),
    fn = minimizeCostValue,
    costFunction = calculateCostValue, prms = prms, fixedFit = fixedFit, freeCombined = freeCombined, resOb = resOb,
    nTrl = nTrl, nDelta = nDelta, pDelta = pDelta, tDelta = tDelta, deltaErrors = deltaErrors, nCAF = nCAF,
    spDist = spDist, drOnset = drOnset, drDist = drDist,
    drShape = drShape, drLim = drLim, rtMax = rtMax, minVals = minVals, maxVals = maxVals,
    printInputArgs = printInputArgs, printResults = printResults,
    method = "Nelder-Mead", control = optimControl
  )

  # number of fitted parameters
  nFreeParametersUnique <- sum(as.logical(fixedFit) == FALSE)
  nFreeParametersTotal <- sum(as.logical(fixedFit) == FALSE) + sum(as.logical(freeCombined == TRUE))

  for (i in 1:length(resOb)) {
    prms[[i]][!as.logical(fixedFit)] <- fit$par[c(1:nFreeParametersUnique)]
  }
  if (length(resOb) >= 2) {
    for (i in 2:length(resOb)) {
      prms[[i]][as.logical(freeCombined)] <- fit$par[-c(1:nFreeParametersUnique)]
    }
  }

  # bounds check
  for (i in 1:length(prms)) {
    prms[[i]] <- pmax(unlist(prms[[i]]), unlist(minVals))
    prms[[i]] <- pmin(unlist(prms[[i]]), unlist(maxVals))
  }
  for (i in 1:length(resOb)) {
    tmp_minVals <- unlist(minVals[!as.logical(fixedFit)])
    tmp_maxVals <- unlist(maxVals[!as.logical(fixedFit)])
    tmp_prms <- unlist(prms[[i]][!as.logical(fixedFit)])
    if (any(tmp_prms == tmp_minVals) || any(tmp_prms == tmp_maxVals)) {
      warning(paste0("Parameter estimates at minVals/maxVals bounds for data set ", i, "!"))
    }
  }
  for (i in 1:length(resOb)) {
    prms[[i]] <- as.list(prms[[i]])
  }

  dmcfit <- list()
  for (i in 1:length(resOb)) {
    dmcfit[[i]] <- dmcSim(
      amp = prms[[i]]$amp, tau = prms[[i]]$tau, drc = prms[[i]]$drc, bnds = prms[[i]]$bnds,
      resMean = prms[[i]]$resMean, resSD = prms[[i]]$resSD, aaShape = prms[[i]]$aaShape,
      spDist = spDist, spBias = prms[[i]]$spBias, spShape = prms[[i]]$spShape, spLim = c(-prms[[i]]$bnds, prms[[i]]$bnds),
      sigm = prms[[i]]$sigm, bndsRate = prms[[i]]$bndsRate, bndsSaturation = prms[[i]]$bndsSaturation, drOnset = drOnset,
      drDist = drDist, drShape = drShape, drLim = drLim, rtMax = rtMax, nTrl = nTrl, nDelta = nDelta, pDelta = pDelta,
      tDelta = tDelta, deltaErrors = deltaErrors, nCAF = nCAF, printResults = TRUE
    )
  }

  # fitted parameters
  for (i in 1:length(resOb)) {
    dmcfit[[i]]$nDataSets <- length(resOb)
    dmcfit[[i]]$par <- prms[[i]]
    dmcfit[[i]]$par["cost"] <- calculateCostValue(dmcfit[[i]], resOb[[i]])
  }

  if (!is.function(costFunction)) {
    if (tolower(costFunction) %in% c("cs", "gs")) {
      for (i in 1:length(dmcfit)) {
        dmcfit[[i]]$par["df"] <- (2 * (12 - 1)) - nFreeParametersTotal # 2 conditions with 6 bins - N fitted parameters
      }
    }
    if (tolower(costFunction) == "gs") {
      for (i in 1:length(dmcfit)) {
        dmcfit[[i]]$par["BIC"] <- dmcfit[[i]]$par$cost + (nFreeParametersTotal * log(sum(resOb[[i]]$data$outlier == 0)))
      }
    }
  }

  dmcfit <- lapply(dmcfit, `class<-`, value = "dmcfit")
  if (length(dmcfit) == 1) {
    return(dmcfit[[1]])
  } else {
    class(dmcfit) <- "dmcfits"
    return(dmcfit)
  }

}


#' @title dmcFitDE
#'
#' @description Fit theoretical data generated from dmcSim to observed data by
#' minimizing the root-mean-square error (RMSE) between a weighted combination
#' of the CAF and CDF functions using the R-package DEoptim. Alternative cost functions
#' include squared percentage error ("SPE"), and g-squared statistic ("GS").
#'
#' @param resOb Observed data (see flankerData and simonTask for data format)
#' @param nTrl The number of trials to use within dmcSim.
#' @param minVals Minimum values for the to-be estimated parameters. This is a list with values specified individually
#' for amp, tau, drc, bnds, resMean, resSD, aaShape, spShape, sigm (e.g., minVals = list(amp = 10, tau = 5, drc = 0.1,
#' bnds = 20, bndsRate=0, bndsSaturation=0, resMean = 200, resSD = 5, aaShape = 1, spShape = 2, spBias = -20, sigm = 1)).
#' @param maxVals Maximum values for the to-be estimated parameters. This is a list with values specified individually
#' for amp, tau, drc, bnds, resMean, resSD, aaShape, spShape, sigm (e.g., maxVals = list(amp = 40, tau = 300, drc = 1.0,
#' bnds = 150, bndsRate=1, bndsSaturation=500, resMean = 800, resSD = 100, aaShape = 3, spShape = 4, spBias = 20, sigm = 10))
#' @param fixedFit Fix parameter to starting value. This is a list with bool values specified individually for
#' amp, tau, drc, bnds, resMean, resSD, aaShape, spShape, spBias, sigm (e.g., fixedFit = list(amp = F, tau = F, drc = F,
#' bnds = F, bndsRate=T, bndsSaturation=T, resMean = F, resSD = F, aaShape = F, spShape = F, spBias = T, sigm = T))
#' NB. Value if fixed at startVals.
#' @param freeCombined If fitting 2+ datasets at once, which parameters are allowed to vary between both
#' fits (default = all parameters fixed between the two fits e.g. parameter = F).
#' This is a list with bool values specified individually for
#' amp, tau, drc, bnds, resMean, resSD, aaShape, spShape, spBias, sigm (e.g., freeCombined = list(amp = F,
#' tau = F, drc = F, bnds = F, bndsRate=F, bndsSaturation=F, resMean = F, resSD = F, aaShape = F, spShape = F, spBias = F, sigm = F))
#' @param nCAF The number of CAF bins.
#' @param nDelta The number of delta bins.
#' @param pDelta An alternative option to nDelta (tDelta = 1 only) by directly specifying required percentile values (vector of values 0-100)
#' @param tDelta The type of delta calculation (1=direct percentiles points, 2=percentile bounds (tile) averaging)
#' @param deltaErrors TRUE/FALSE Calculate delta bins for error trials
#' @param costFunction The cost function to minimise: root mean square error ("RMSE": default),
#' squared percentage error ("SPE"), or likelihood-ratio chi-square statistic ("GS")
#' @param bndsRate 0 (default) = fixed bnds
#' @param bndsSaturation bndsSaturation
#' @param rtMax The limit on simulated RT (decision + non-decisional components)
#' @param spDist The starting point distribution (0 = constant, 1 = beta, 2 = uniform)
#' @param drOnset The starting point of controlled drift rate (i.e., "target" information) relative to automatic ("distractor" information) (> 0 ms)
#' @param drDist The drift rate (dr) distribution type (0 = constant, 1 = beta, 2 = uniform)
#' @param drShape The drift rate (dr) shape parameter
#' @param drLim The drift rate (dr) range
#' @param bndsRate Collapsing bounds rate. 0 (default) = fixed bounds
#' @param bndsSaturation Collapsing bounds saturation
#' @param deControl Additional control parameters passed to DEoptim (see DEoptim.control)
#' @param numCores Number of cores to use
#'
#' @return dmcfit returns an object of class "dmcfit" with the following components:
#'
#' \item{sim}{Individual trial data points (RTs for all trial types e.g., correct/error trials) and activation vectors from the simulation}
#' \item{summary}{Condition means for reaction time and error rate}
#' \item{caf}{Conditional Accuracy Function (CAF) data per bin}
#' \item{delta}{DataFrame with distributional delta analysis data correct trials (Bin, meanComp, meanIncomp, meanBin, meanEffect)}
#' \item{delta_errs}{Optional: DataFrame with distributional delta analysis data incorrect trials (Bin, meanComp, meanIncomp, meanBin, meanEffect)}
#' \item{par}{The fitted model parameters + final cost value of the fit}
#'
#' @examples
#' \donttest{
#' # The code below can exceed CRAN check time limit, hence donttest
#' # NB. The following code when using numCores = 2 (default) takes approx 20 minutes on
#' # a standard desktop, whilst when increasing the number of cores used, (numCores = 12),
#' # the code takes approx 5 minutes.
#'
#' # Example 1: Flanker data from Ulrich et al. (2015)
#' fit <- dmcFitDE(flankerData, nTrl = 1000);
#' plot(fit, flankerData)
#' summary(fit)
#'
#' # Example 2: Simon data from Ulrich et al. (2015)
#' fit <- dmcFitDE(simonData, nTrl = 5000, deControl = list(itermax=30))
#' plot(fit, simonData)
#' summary(fit)
#' }
#' @export
dmcFitDE <- function(resOb,
                     nTrl = 100000,
                     minVals = list(),
                     maxVals = list(),
                     fixedFit = list(),
                     freeCombined = list(),
                     nCAF = 5,
                     nDelta = 19,
                     pDelta = vector(),
                     tDelta = 1,
                     deltaErrors = FALSE,
                     costFunction = "RMSE",
                     spDist = 1,
                     drOnset = 0,
                     drDist = 0,
                     drShape = 3,
                     drLim = c(0.1, 0.7),
                     bndsRate = 0,
                     bndsSaturation = 0,
                     rtMax = 5000,
                     deControl = list(),
                     numCores = 2) {

  if (is(resOb, "dmcob")) {
    resOb <- list(resOb)
  }

  # default parameter space
  defaultMinVals      <- list(amp = 0,  tau = 5,   drc = 0.1, bnds = 20,  bndsRate=0, bndsSaturation=0, resMean = 200, resSD = 5,   aaShape = 1, spShape = 2, spBias = 0, sigm = 4)
  defaultMaxVals      <- list(amp = 40, tau = 300, drc = 1.0, bnds = 150, bndsRate=0, bndsSaturation=0, resMean = 800, resSD = 100, aaShape = 3, spShape = 4, spBias = 0, sigm = 4)
  defaultFixedFit     <- list(amp = F,  tau = F,   drc = F,   bnds = F,   bndsRate=T, bndsSaturation=T, resMean = F,   resSD = F,   aaShape = F, spShape = F, spBias = T, sigm = T)
  defaultFreeCombined <- list(amp = F,  tau = F,   drc = F,   bnds = F,   bndsRate=F, bndsSaturation=F, resMean = F,   resSD = F,   aaShape = F, spShape = F, spBias = F, sigm = F)

  minVals      <- modifyList(defaultMinVals, minVals)
  maxVals      <- modifyList(defaultMaxVals, maxVals)
  fixedFit     <- modifyList(defaultFixedFit, fixedFit)
  freeCombined <- modifyList(defaultFreeCombined, freeCombined)

  # NB will need to explicitly set min/max values for bndsRate/bndsSaturation/spBias if fitting!
  prms <- replicate(length(resOb), (unlist(minVals) + unlist(maxVals)) / 2, simplify = FALSE) # start in middle of min/max vals

  # change nDelta to pDelta if pDelta not empty
  if (length(pDelta) != 0) {
    nDelta <- pDelta
    tDelta <- 1
  }

  # check observed data contains correct number of delta/CAF bins
  for (i in 1:length(resOb)) {
    if (length(nDelta) == 1) {
      if (nrow(resOb[[i]]$delta) != nDelta) {
        stop(paste0("Number of delta bins in observed data set ", i, "and nDelta bins are not equal!"))
      }
    } else if (length(nDelta) > 1) {
      if (nrow(resOb[[i]]$delta) != length(nDelta)) {
        stop(paste0("Number of delta bins in observed data set ", i, "and nDelta bins are not equal!"))
      }
    }
    if (nrow(resOb[[i]]$caf) != nCAF) {
      stop(paste0("Number of CAF bins in observed data set ", i, "and nCAF bins are not equal!"))
    }
  }

  # which cost function?
  calculateCostValue <- costValueFunction(costFunction)

  if (is.character(costFunction)) {
    if (tolower(costFunction) %in% c("cs", "gs")) {
      # add additional probabilities to observed data required for cs/gs cost functions
      for (i in 1:length(resOb)) {
        resOb[[i]] <- calculateBinProbabilities(resOb[[i]])
      }
    }
  }

  # CRAN maintainer personal communication: THIS IS NOT THE WAY TO DO IT!
  # CRAN submissions can only use maximum of two cores unless user requests more!
  # R check limits number of cores to 2 (https://stackoverflow.com/questions/50571325/r-cran-check-fail-when-using-parallel-functions)
  # chk <- tolower(Sys.getenv("_R_CHECK_LIMIT_CORES_", ""))
  # if (nzchar(chk) && (chk == "true")) {
  #   num_cores <- 2L
  # } else {
  #   num_cores <- parallel::detectCores() / 2
  # }

  cl <- parallel::makeCluster(numCores)
  invisible(parallel::clusterExport(cl = cl, varlist = c("dmcSim", "calculateCostValue"), envir = environment()))

  defaultControl <- list(VTR = 0, strategy = 1, NP = 100, itermax = 200, trace = 1, cluster = cl)
  deControl <- modifyList(defaultControl, deControl)

  # optimize
  fit <- DEoptim::DEoptim(
    fn = minimizeCostValue,
    lower = c(unlist(minVals[!as.logical(fixedFit)]), unlist(minVals[as.logical(freeCombined)])),
    upper = c(unlist(maxVals[!as.logical(fixedFit)]), unlist(maxVals[as.logical(freeCombined)])),
    costFunction = calculateCostValue,
    prms = prms,
    fixedFit = fixedFit,
    freeCombined = freeCombined,
    minVals = minVals,
    maxVals = maxVals,
    resOb = resOb,
    nTrl = nTrl,
    nDelta = nDelta,
    nCAF = nCAF,
    pDelta = pDelta,
    tDelta = tDelta,
    deltaErrors = deltaErrors,
    spDist = spDist,
    drOnset = drOnset,
    drDist = drDist,
    drShape = drShape,
    drLim = drLim,
    rtMax = rtMax,
    printInputArgs = FALSE,
    printResults = FALSE,
    control = deControl
  )

  parallel::stopCluster(cl)

  # number of fitted parameters
  nFreeParametersUnique <- sum(as.logical(fixedFit) == FALSE)
  nFreeParametersTotal <- sum(as.logical(fixedFit) == FALSE) + sum(as.logical(freeCombined == TRUE))

  for (i in 1:length(resOb)) {
    prms[[i]][!as.logical(fixedFit)] <- as.list(fit$optim$bestmem)[c(1:nFreeParametersUnique)]
  }
  if (length(resOb) >= 2) {
    for (i in 2:length(resOb)) {
      prms[[i]][as.logical(freeCombined)] <- as.list(fit$optim$bestmem)[-c(1:nFreeParametersUnique)]
    }
  }

  # bounds check
  for (i in 1:length(prms)) {
    prms[[i]] <- pmax(unlist(prms[[i]]), unlist(minVals))
    prms[[i]] <- pmin(unlist(prms[[i]]), unlist(maxVals))
  }
  for (i in 1:length(resOb)) {
    tmp_minVals <- unlist(minVals[!as.logical(fixedFit)])
    tmp_maxVals <- unlist(maxVals[!as.logical(fixedFit)])
    tmp_prms <- unlist(prms[[i]][!as.logical(fixedFit)])
    if (any(tmp_prms == tmp_minVals) || any(tmp_prms == tmp_maxVals)) {
      warning(paste0("Parameter estimates at minVals/maxVals bounds for data set ", i, "!"))
    }
  }
  for (i in 1:length(resOb)) {
    prms[[i]] <- as.list(prms[[i]])
  }
  cat("\n")

  dmcfit <- list()
  for (i in 1:length(resOb)) {
    dmcfit[[i]] <- dmcSim(
      amp = prms[[i]]$amp, tau = prms[[i]]$tau, drc = prms[[i]]$drc, bnds = prms[[i]]$bnds,
      resMean = prms[[i]]$resMean, resSD = prms[[i]]$resSD, aaShape = prms[[i]]$aaShape,
      spDist = spDist, spBias = prms[[i]]$spBias, spShape = prms[[i]]$spShape, spLim = c(-prms[[i]]$bnds, prms[[i]]$bnds),
      sigm = prms[[i]]$sigm, drOnset = drOnset, bndsRate = prms[[i]]$bndsRate, bndsSaturation = prms[[i]]$bndsSaturation,
      drDist = drDist, drShape = drShape, drLim = drLim, rtMax = rtMax, nTrl = nTrl, nDelta = nDelta,
      pDelta = pDelta, tDelta = tDelta, deltaErrors = deltaErrors, nCAF = nCAF,
      printResults = TRUE
    )
  }

  # fitted parameters
  for (i in 1:length(resOb)) {
    dmcfit[[i]]$nDataSets <- length(resOb)
    dmcfit[[i]]$par <- prms[[i]]
    dmcfit[[i]]$par["cost"] <- calculateCostValue(dmcfit[[i]], resOb[[i]])
  }

  if (!is.function(costFunction)) {
    if (tolower(costFunction) %in% c("cs", "gs")) {
      for (i in 1:length(dmcfit)) {
        dmcfit[[i]]$par["df"] <- (2 * (12 - 1)) - nFreeParametersTotal # 2 conditions with 6 bins - N fitted parameters
      }
    }
    if (tolower(costFunction) == "gs") {
      for (i in 1:length(dmcfit)) {
        dmcfit[[i]]$par["BIC"] <- dmcfit[[i]]$par$cost + (nFreeParametersTotal * log(sum(resOb[[i]]$data$outlier == 0)))
      }
    }
  }

  dmcfit <- lapply(dmcfit, `class<-`, value = "dmcfit")
  if (length(dmcfit) == 1) {
    return(dmcfit[[1]])
  } else {
    class(dmcfit) <- "dmcfits"
    return(dmcfit)
  }

  return(dmcfit)
}


#' @title dmcFitSubject
#'
#' @description Fit theoretical data generated from dmcSim to observed data by
#' minimizing the root-mean-square error ("RMSE") between a weighted combination
#' of the CAF and CDF functions using optim (Nelder-Mead). Alternative cost functions
#' include squared percentage error ("SPE"), and g-squared statistic ("GS").
#'
#' @param resOb Observed data (see flankerData and simonTask for data format) and the function dmcObservedData to create
#' the required input from either an R data frame or external *.txt/*.csv files
#' @param nTrl Number of trials to use within dmcSim.
#' @param startVals Starting values for the to-be estimated parameters. This is a list with values specified individually
#' for amp, tau, drc, bnds, resMean, resSD, aaShape, spShape, spBias, sigm (e.g., startVals = list(amp = 20, tau = 200,
#' drc = 0.5, bnds = 75, resMean = 300, resSD = 30, aaShape = 2, spShape = 3, spBias = 0, sigm = 4, bndsRate=0, bndsSaturation=0)).
#' @param minVals Minimum values for the to-be estimated parameters. This is a list with values specified individually
#' for amp, tau, drc, bnds, resMean, resSD, aaShape, spShape, spBias, sigm (e.g., minVals = list(amp = 0, tau = 5, drc = 0.1,
#' bnds = 20, bndsRate=0, bndsSaturation=0, resMean = 200, resSD = 5, aaShape = 1, spShape = 2, spBias = -20, sigm = 1)).
#' @param maxVals Maximum values for the to-be estimated parameters. This is a list with values specified individually
#' for amp, tau, drc, bnds, resMean, resSD, aaShape, spShape, spBias, sigm (e.g., maxVals = list(amp = 40, tau = 300, drc = 1.0,
#' bnds = 150, bndsRate=1, bndsSaturation=500, resMean = 800, resSD = 100, aaShape = 3, spShape = 4, spBias = 20, sigm = 10))
#' @param fixedFit Fix parameter to starting value. This is a list with bool values specified individually for
#' amp, tau, drc, bnds, resMean, resSD, aaShape, spShape, spBias, sigm (e.g., fixedFit = list(amp = F, tau = F, drc = F,
#' bnds = F, bndsRate=T, bndsSaturation=T, resMean = F, resSD = F, aaShape = F, spShape = F, spBias = T, sigm = T))
#' NB. Value if fixed at startVals.
#' @param fitInitialGrid TRUE/FALSE
#' @param fitInitialGridN 10 linear steps between parameters min/max values (reduce if searching more than ~2/3 initial parameters)
#' @param fixedGrid Fix parameter for initial grid search. This is a list with bool values specified individually for
#' amp, tau, drc, bnds, resMean, resSD, aaShape, spShape, spBias, sigm (e.g., fixedGrid = list(amp = T, tau = F, drc = T,
#' bnds = T, bndsRate=T, bndsSaturation=T, resMean = T, resSD = T, aaShape = T, spShape = T, spBias = T, sigm = T)). As a default,
#' the initial gridsearch only searches the tau space.
#' @param freeCombined If fitting 2+ datasets at once, which parameters are allowed to vary between both
#' fits (default = all parameters fixed between the two fits e.g. parameter = F).
#' This is a list with bool values specified individually for
#' amp, tau, drc, bnds, resMean, resSD, aaShape, spShape, spBias, sigm (e.g., freeCombined = list(amp = F,
#' tau = F, drc = F, bnds = F, bndsRate=F, bndsSaturation=F, resMean = F, resSD = F, aaShape = F, spShape = F, spBias = F, sigm = F))
#' @param nCAF Number of CAF bins.
#' @param nDelta Number of delta bins.
#' @param pDelta An alternative option to nDelta (tDelta = 1 only) by directly specifying required percentile values (vector of values 0-100)
#' @param tDelta The type of delta calculation (1=direct percentiles points, 2=percentile bounds (tile) averaging)
#' @param deltaErrors TRUE/FALSE Calculate delta bins for error trials
#' @param costFunction The cost function to minimise: root mean square error ("RMSE": default),
#' squared percentage error ("SPE"), or likelihood-ratio chi-square statistic ("GS")
#' @param spDist The starting point (sp) distribution (0 = constant, 1 = beta, 2 = uniform)
#' @param drOnset The starting point of controlled drift rate (i.e., "target" information) relative to automatic ("distractor" incormation) (> 0 ms)
#' @param drDist The drift rate (dr) distribution type (0 = constant, 1 = beta, 2 = uniform)
#' @param drShape The drift rate (dr) shape parameter
#' @param drLim The drift rate (dr) range
#' @param bndsRate Collapsing bounds rate. 0 (default) = fixed bounds
#' @param bndsSaturation Collapsing bounds saturation
#' @param rtMax The limit on simulated RT (decision + non-decisional components)
#' @param subjects NULL (aggregated data across all subjects) or integer for subject number
#' @param printInputArgs TRUE (default) /FALSE
#' @param printResults TRUE/FALSE (default)
#' @param optimControl Additional control parameters passed to optim (see optim details section)
#' @param numCores Number of cores to use
#'
#' @return dmcFitSubject returns a list of objects of class "dmcfit"
#'
#' @examples
#' \donttest{
#' # Code below can exceed CRAN check time limit, hence donttest
#' # Example 1: Flanker data from Ulrich et al. (2015)
#' fit <- dmcFitSubject(flankerData, nTrl = 1000, subjects = c(1, 2));
#' plot(fit, flankerData, subject = 1)
#' plot(fit, flankerData, subject = 2)
#' summary(fit)
#'
#' }
#'
#' @export
dmcFitSubject <- function(resOb,
                          nTrl = 100000,
                          startVals = list(),
                          minVals = list(),
                          maxVals = list(),
                          fixedFit = list(),
                          freeCombined = list(),
                          fitInitialGrid = TRUE,
                          fitInitialGridN = 10, # reduce if grid search 3/4+ parameters
                          fixedGrid = list(), # default only initial tau search
                          nCAF = 5,
                          nDelta = 19,
                          pDelta = vector(),
                          tDelta = 1,
                          deltaErrors = FALSE,
                          costFunction = "RMSE",
                          spDist = 1,
                          drOnset = 0,
                          drDist = 0,
                          drShape = 3,
                          drLim = c(0.1, 0.7),
                          bndsRate = 0,
                          bndsSaturation = 0,
                          rtMax = 5000,
                          subjects = c(),
                          printInputArgs = TRUE,
                          printResults = FALSE,
                          optimControl = list(),
                          numCores = 2) {

  if (is(resOb, "dmcob")) {
    resOb <- list(resOb)
  }

  if (length(subjects) == 0) {
    subjects <- unique(resOb[[1]]$summarySubject$Subject) # fit all individual subjects in data
  }

  dmcfit <- vector("list", max(subjects))
  for (subject in subjects) {

    resObSubject <- list()
    for (i in 1:length(resOb)) {
      if (is.null(resOb[[i]]$data)) {
        resObSubject[[i]] <- list(
          deltaAgg = resOb[[i]]$deltaSubject[resOb[[i]]$deltaSubject$Subject == subject, ],
          cafAgg = resOb[[i]]$cafSubject[resOb[[i]]$cafSubject$Subject == subject, ]
        )
      } else {
        resObSubject[[i]] <- list(
          data = resOb[[i]]$data[resOb[[i]]$data$Subject == subject, ],
          deltaAgg = resOb[[i]]$deltaSubject[resOb[[i]]$deltaSubject$Subject == subject, ],
          cafAgg = resOb[[i]]$cafSubject[resOb[[i]]$cafSubject$Subject == subject, ]
        )
      }
    }

    dmcfit[[subject]] <- dmcFit(resObSubject,
                                nTrl            = nTrl,
                                startVals       = startVals,
                                minVals         = minVals,
                                maxVals         = maxVals,
                                fixedFit        = fixedFit,
                                freeCombined    = freeCombined,
                                fitInitialGrid  = fitInitialGrid,
                                fitInitialGridN = fitInitialGridN, # reduce if grid search 3/4+ parameters
                                fixedGrid       = fixedGrid, # only fit tau
                                nCAF            = nCAF,
                                nDelta          = nDelta,
                                pDelta          = pDelta,
                                tDelta          = tDelta,
                                deltaErrors     = deltaErrors,
                                costFunction    = costFunction,
                                spDist          = spDist,
                                drOnset         = drOnset,
                                drDist          = drDist,
                                drShape         = drShape,
                                drLim           = drLim,
                                rtMax           = rtMax,
                                printInputArgs  = printInputArgs,
                                printResults    = printResults,
                                optimControl    = optimControl,
                                numCores        = numCores
    )
  }

  if (length(resOb) == 1) {
    class(dmcfit) <- "dmcfit_subject"
  } else {
    class(dmcfit) <- "dmcfits_subject"
  }

  return(dmcfit)
}


#' @title dmcFitSubjectDE
#'
#' @description Fit theoretical data generated from dmcSim to observed data by
#' minimizing the root-mean-square error (RMSE) between a weighted combination
#' of the CAF and CDF functions using the R-package DEoptim. Alternative cost functions
#' include squared percentage error ("SPE"), and g-squared statistic ("GS").
#'
#' @param resOb Observed data (see flankerData and simonTask for data format)
#' @param nTrl The number of trials to use within dmcSim.
#' @param minVals Minimum values for the to-be estimated parameters. This is a list with values specified individually
#' for amp, tau, drc, bnds, resMean, resSD, aaShape, spShape, sigm (e.g., minVals = list(amp = 10, tau = 5, drc = 0.1,
#' bnds = 20, resMean = 200, resSD = 5, aaShape = 1, spShape = 2, spBias = -20, sigm = 1, bndsRate=0, bndsSaturation=0)).
#' @param maxVals Maximum values for the to-be estimated parameters. This is a list with values specified individually
#' for amp, tau, drc, bnds, resMean, resSD, aaShape, spShape, sigm (e.g., maxVals = list(amp = 40, tau = 300, drc = 1.0,
#' bnds = 150, bndsRate=1, bndsSaturation=500, resMean = 800, resSD = 100, aaShape = 3, spShape = 4, spBias = 20, sigm = 10))
#' @param fixedFit Fix parameter to starting value. This is a list with bool values specified individually for
#' amp, tau, drc, bnds, resMean, resSD, aaShape, spShape, sigm (e.g., fixedFit = list(amp = F,  tau = F, drc = F,
#' bnds = F, bndsRate=T, bndsSaturation=T, resMean = F, resSD = F, aaShape = F, spShape = F, spBias = T, sigm = T, bndsRate=T, bndsSaturation=T))
#' NB. Value if fixed at midpoint between minVals and maxVals.
#' @param freeCombined If fitting 2+ datasets at once, which parameters are allowed to vary between both
#' fits (default = all parameters fixed between the two fits e.g. parameter = F).
#' This is a list with bool values specified individually for
#' amp, tau, drc, bnds, resMean, resSD, aaShape, spShape, spBias, sigm (e.g., freeCombined = list(amp = F,
#' tau = F, drc = F, bnds = F, bndsRate=F, bndsSaturation=F, resMean = F, resSD = F, aaShape = F, spShape = F, spBias = F, sigm = F))
#' @param nCAF The number of CAF bins.
#' @param nDelta The number of delta bins.
#' @param pDelta An alternative option to nDelta (tDelta = 1 only) by directly specifying required percentile values (vector of values 0-100)
#' @param tDelta The type of delta calculation (1=direct percentiles points, 2=percentile bounds (tile) averaging)
#' @param deltaErrors TRUE/FALSE Calculate delta bins for error trials
#' @param costFunction The cost function to minimise: root mean square error ("RMSE": default),
#' squared percentage error ("SPE"), or likelihood-ratio chi-square statistic ("GS")
#' @param bndsRate Collapsing bounds rate. 0 (default) = fixed bounds
#' @param bndsSaturation Collapsing bounds saturation
#' @param rtMax The limit on simulated RT (decision + non-decisional components)
#' @param bndsRate 0 (default) = fixed bnds
#' @param spDist The starting point distribution (0 = constant, 1 = beta, 2 = uniform)
#' @param drOnset The starting point of controlled drift rate (i.e., "target" information) relative to automatic ("distractor" incormation) (> 0 ms)
#' @param drDist The drift rate (dr) distribution type (0 = constant, 1 = beta, 2 = uniform)
#' @param drShape The drift rate (dr) shape parameter
#' @param drLim The drift rate (dr) range
#' @param subjects NULL (aggregated data across all subjects) or integer for subject number
#' @param deControl Additional control parameters passed to DEoptim (see DEoptim.control)
#' @param numCores Number of cores to use
#'
#' @return dmcFitSubjectDE returns a list of objects of class "dmcfit"
#'
#' @examples
#' \donttest{
#' # Code below can exceed CRAN check time limit, hence donttest
#' # Example 1: Flanker data from Ulrich et al. (2015)
#' fit <- dmcFitSubjectDE(flankerData, nTrl = 1000, subjects = c(1, 2), deControl = list(itermax=30))
#' plot(fit, flankerData, subject = 1)
#' plot(fit, flankerData, subject = 2)
#' summary(fit)
#'
#' }
#'
#' @export
dmcFitSubjectDE <- function(resOb,
                            nTrl = 100000,
                            minVals = list(),
                            maxVals = list(),
                            fixedFit = list(),
                            freeCombined = list(),
                            nCAF = 5,
                            nDelta = 19,
                            pDelta = vector(),
                            tDelta = 1,
                            deltaErrors = FALSE,
                            costFunction = "RMSE",
                            spDist = 1,
                            drOnset = 0,
                            drDist = 0,
                            drShape = 3,
                            drLim = c(0.1, 0.7),
                            bndsRate = 0,
                            bndsSaturation = 0,
                            rtMax = 5000,
                            subjects = c(),
                            deControl = list(),
                            numCores = 2) {

  if (is(resOb, "dmcob")) {
    resOb <- list(resOb)
  }

  if (length(subjects) == 0) {
    subjects <- unique(resOb[[1]]$summarySubject$Subject) # fit all individual subjects in data
  }

  dmcfit <- vector("list", max(subjects))
  for (subject in subjects) {

    resObSubject <- list()
    for (i in 1:length(resOb)) {
      if (is.null(resOb[[i]]$data)) {
        resObSubject[[i]] <- list(
          deltaAgg = resOb[[i]]$deltaSubject[resOb[[i]]$deltaSubject$Subject == subject, ],
          cafAgg = resOb[[i]]$cafSubject[resOb[[i]]$cafSubject$Subject == subject, ]
        )
      } else {
        resObSubject[[i]] <- list(
          data = resOb[[i]]$data[resOb[[i]]$data$Subject == subject, ],
          deltaAgg = resOb[[i]]$deltaSubject[resOb[[i]]$deltaSubject$Subject == subject, ],
          cafAgg = resOb[[i]]$cafSubject[resOb[[i]]$cafSubject$Subject == subject, ]
        )
      }
    }

    dmcfit[[subject]] <- dmcFitDE(resObSubject,
      nTrl         = nTrl,
      minVals      = minVals,
      maxVals      = maxVals,
      fixedFit     = fixedFit,
      freeCombined = freeCombined,
      nCAF         = nCAF,
      nDelta       = nDelta,
      pDelta       = pDelta,
      tDelta       = tDelta,
      deltaErrors  = deltaErrors,
      costFunction = costFunction,
      spDist       = spDist,
      drOnset      = drOnset,
      drDist       = drDist,
      drShape      = drShape,
      drLim        = drLim,
      rtMax        = rtMax,
      deControl    = deControl,
      numCores     = numCores
    )
  }

  if (length(resOb) == 1) {
    class(dmcfit) <- "dmcfit_subject"
  } else {
    class(dmcfit) <- "dmcfits_subject"
  }

  return(dmcfit)
}


#' @title mean.dmcfit
#'
#' @description Aggregate simulation results from dmcFitSubject/dmcFitSubjectDE.
#'
#' @param x Output from dmcFitSubject/dmcFitSubjectDE
#' @param ... pars
#'
#' @return mean.dmcfit return an object of class "dmcfit" with the following components:
#' \item{summary}{DataFrame within aggregated subject data (rtCor, sdRtCor, seRtCor, perErr, sdPerErr, sePerErr, rtErr, sdRtErr, seRtErr) for compatibility condition}
#' \item{delta}{DataFrame within aggregated subject distributional delta analysis data correct trials (Bin, meanComp, meanIncomp, meanBin, meanEffect, sdEffect, seEffect)}
#' \item{caf}{DataFrame within aggregated subject conditional accuracy function (CAF) data (Bin, accPerComp, accPerIncomp, meanEffect, sdEffect, seEffect)}
#' \item{par}{The fitted model parameters + final cost value of the fit}
#'
#' @examples
#' \donttest{
#' # Code below can exceed CRAN check time limit, hence donttest
#' # Example 1: Fit individual data then aggregate
#' fitSubjects <- dmcFitSubject(flankerData, nTrl = 1000, subjects = c(1, 2))
#' fitAgg <- mean(fitSubjects)
#' plot(fitAgg, flankerData)
#' }
#'
#' @export
mean.dmcfit_subject <- function(x, ...) {

  mergeLists <- function(lists, name) {
    return(lapply(c(name), function(x) do.call(rbind, lapply(lists, `[[`, x)))[[1]])
  }

  # mean fit of summary, delta, caf, and parameters
  meanfit <- list()

  meanfit$summary <- mergeLists(x, "summary") %>%
    dplyr::group_by(Comp) %>%
    dplyr::summarise_all(mean)

  meanfit$delta <- mergeLists(x, "delta") %>%
    dplyr::group_by(Bin) %>%
    dplyr::summarize_all(mean)

  meanfit$caf <- mergeLists(x, "caf") %>%
    dplyr::group_by(Bin) %>%
    dplyr::summarize_all(mean)

  meanfit$par <- as.data.frame(mergeLists(x, "par")) %>%
    dplyr::summarize_all(~ mean(unlist(.x)))

  meanfit$sim <- dmcSim(amp = meanfit$par$amp, tau = meanfit$par$tau, drc = meanfit$par$drc,
    bnds = meanfit$par$bnds, resMean = meanfit$par$resMean, resSD = meanfit$par$resSD,
    aaShape = meanfit$par$aaShape, spShape = meanfit$par$spShape,
    spBias = meanfit$par$spBias, sigm = meanfit$par$sigm,
    printInputArgs = FALSE, printResults = FALSE)$sim

  class(meanfit) <- c("dmcfit")

  return(meanfit)
}

# function to minimise
minimizeCostValue <- function(x,
                              costFunction,
                              prms,
                              fixedFit,
                              freeCombined,
                              resOb,
                              nTrl,
                              nDelta,
                              pDelta,
                              tDelta,
                              deltaErrors,
                              nCAF,
                              spDist,
                              drOnset,
                              drDist,
                              drShape,
                              drLim,
                              rtMax,
                              minVals,
                              maxVals,
                              printInputArgs,
                              printResults) {

  nParameters <- sum(as.logical(fixedFit) == FALSE)
  for (i in 1:length(prms)) {
    prms[[i]][!as.logical(fixedFit)] <- x[c(1:nParameters)]
  }
  if (length(prms) >= 2) {
    for (i in 2:length(prms)) {
      prms[[i]][as.logical(freeCombined)] <- x[-c(1:nParameters)]
    }
  }

  # implement bounds
  for (i in 1:length(prms)) {
    prms[[i]] <- as.list(pmax(unlist(prms[[i]]), unlist(minVals)))
    prms[[i]] <- as.list(pmin(unlist(prms[[i]]), unlist(maxVals)))
  }

  resTh <- list()
  cost <- c()
  for (i in 1:length(resOb)) {
    cat(paste0("Data set ", i, ":"))
    resTh[[i]] <- dmcSim(
      amp = prms[[i]]$amp, tau = prms[[i]]$tau, drc = prms[[i]]$drc, bnds = prms[[i]]$bnds,
      resMean = prms[[i]]$resMean, resSD = prms[[i]]$resSD, aaShape = prms[[i]]$aaShape,
      spDist = spDist, spShape = prms[[i]]$spShape, spBias = prms[[i]]$spBias, sigm = prms[[i]]$sigm, spLim = c(-prms[[i]]$bnds, prms[[i]]$bnds),
      bndsRate = prms[[i]]$bndsRate,  bndsSaturation = prms[[i]]$bndsSaturation,
      drOnset = drOnset, drDist = drDist, drShape = drShape, drLim = drLim,
      rtMax = rtMax, nTrl = nTrl, nDelta = nDelta, pDelta = pDelta, tDelta = tDelta, deltaErrors = deltaErrors, nCAF = nCAF,
      printInputArgs = printInputArgs, printResults = printResults
    )
    cost[i] <- costFunction(resTh[[i]], resOb[[i]])
    cat(" | cost:", formatC(cost[i], 3, format = "f"), "\n")
  }

  return(sum(cost))

}

#' @title calculateCostValueRMSE
#'
#' @description Calculate cost value (fit) using root-mean-square error (RMSE) from a combination of RT and error rate.
#'
#' @param resTh list containing caf values for comp/incomp conditions (nbins * 4 columns) and
#' delta values for comp/incomp conditions (nbins * 5 columns). See output from dmcSim (.$caf).
#' @param resOb list containing caf values for comp/incomp conditions (n * 4 columns) and
#' delta values for comp/incomp conditions (nbins * 5 columns). See output from dmcSim (.$delta).
#'
#' @return cost value (RMSE)
#'
#' @examples
#' # Example 1:
#' resTh <- dmcSim()
#' resOb <- dmcSim()
#' cost <- calculateCostValueRMSE(resTh, resOb)
#'
#' # Example 2:
#' resTh <- dmcSim()
#' resOb <- dmcSim(tau = 150)
#' cost <- calculateCostValueRMSE(resTh, resOb)
#' @export
calculateCostValueRMSE <- function(resTh, resOb) {
  n_rt <- nrow(resTh$delta) * 2
  n_err <- nrow(resTh$caf) * 2

  costCAF <- sqrt((1 / n_err) * sum((resTh$caf[c("accPerComp", "accPerIncomp")] - resOb$caf[c("accPerComp", "accPerIncomp")])**2))
  costRT <- sqrt((1 / n_rt) * sum((resTh$delta[c("meanComp", "meanIncomp")] - resOb$delta[c("meanComp", "meanIncomp")])**2))
  weightRT <- n_rt / (n_rt + n_err)
  weightCAF <- (1 - weightRT) * 1500

  costValue <- (weightCAF * costCAF) + (weightRT * costRT)

  return(costValue)
}

#' @title calculateCostValueSPE
#'
#' @description Calculate cost value (fit) using squared percentage errror (SPE) from combination of RT and error rate.
#'
#' @param resTh list containing caf values for comp/incomp conditions (nbins * 4 columns) and
#' delta values for comp/incomp conditions (nbins * 5 columns). See output from dmcSim (.$caf).
#' @param resOb list containing caf values for comp/incomp conditions (n * 4 columns) and
#' delta values for comp/incomp conditions (nbins * 5 columns). See output from dmcSim (.$delta).
#'
#' @return cost value (SPE)
#'
#' @examples
#' # Example 1:
#' resTh <- dmcSim()
#' resOb <- dmcSim()
#' cost <- calculateCostValueSPE(resTh, resOb)
#'
#' # Example 2:
#' resTh <- dmcSim()
#' resOb <- dmcSim(tau = 150)
#' cost <- calculateCostValueSPE(resTh, resOb)
#' @export
calculateCostValueSPE <- function(resTh, resOb) {
  costCAF <- sum(((resOb$caf[, c("accPerComp", "accPerIncomp")] -
    resTh$caf[, c("accPerComp", "accPerIncomp")]) / resOb$caf[, c("accPerComp", "accPerIncomp")])**2)

  costRT <- sum(((resOb$delta[, c("meanComp", "meanIncomp", "meanBin")] -
    resTh$delta[, c("meanComp", "meanIncomp", "meanBin")]) / resOb$delta[, c("meanComp", "meanIncomp", "meanBin")])**2)

  costValue <- costRT + costCAF

  return(costValue)
}


#' @title calculateCostValueCS
#'
#' @description Calculate cost value (fit) using chi-square (CS) from correct and incorrect RT data.
#'
#' @param resTh list containing simulation $sim values (output from dmcSim) for rts_comp, rts_incomp,
#' errs_comp, errs_incomp
#' @param resOb list containing raw observed data (see dmcObservedData with keepRaw = TRUE)
#'
#' @return cost value (CS)
#'
#' @examples
#' # Example 1:
#' resTh <- dmcSim()
#' resOb <- flankerData
#' resOb <- calculateBinProbabilities(resOb)
#' cost <- calculateCostValueCS(resTh, resOb)
#' @export
calculateCostValueCS <- function(resTh, resOb) {
  cs_comp_correct <- cs(resTh$sim$rts_comp, resOb$prob[resOb$prob$Comp == "comp" & resOb$prob$Error == 0, ])
  cs_comp_error <- cs(resTh$sim$errs_comp, resOb$prob[resOb$prob$Comp == "comp" & resOb$prob$Error == 1, ])
  cs_incomp_correct <- cs(resTh$sim$rts_incomp, resOb$prob[resOb$prob$Comp == "incomp" & resOb$prob$Error == 0, ])
  cs_incomp_error <- cs(resTh$sim$errs_incomp, resOb$prob[resOb$prob$Comp == "incomp" & resOb$prob$Error == 1, ])

  return(sum(c(cs_comp_correct, cs_comp_error, cs_incomp_correct, cs_incomp_error), na.rm = TRUE))

}

cs <- function(th, ob) {
  if ((length(th) == 0 | nrow(ob) == 0)) {
    return(NA) # no observations (can happen, esp. for error trials in comp conditions)
  }
  probE <- c(0.1, 0.2, 0.2, 0.2, 0.2, 0.1) # TO DO hard coded?
  nBin <- table(factor(.bincode(th, breaks = c(0, ob$boundary, Inf)), levels = 1:6))
  pBin <- nBin / sum(nBin) # sum(pBin) == 1
  cs <- sum(ob$nTrials[1] * (pBin - probE)**2 / probE)
  return(cs)
}



#' @title calculateCostValueGS
#'
#' @description Calculate cost value (fit) using likelihood-ratio chi-square statistic (GS) from correct and incorrect RT data.
#'
#' @param resTh list containing simulation $sim values (output from dmcSim) for rts_comp, rts_incomp,
#' errs_comp, errs_incomp
#' @param resOb list containing raw observed data (see dmcObservedData with keepRaw = TRUE)
#'
#' @return cost value (GS)
#'
#' @examples
#' # Example 1:
#' resTh <- dmcSim()
#' resOb <- flankerData
#' resOb <- calculateBinProbabilities(resOb)
#' cost  <- calculateCostValueGS(resTh, resOb)
#' @export
calculateCostValueGS <- function(resTh, resOb) {
  nComp <- nrow(resOb$data[resOb$data$Comp == "comp" & resOb$data$outlier == 0, ])
  gsCompCorrect <- gs(resTh$sim$rts_comp, resOb$prob[resOb$prob$Comp == "comp" & resOb$prob$Error == 0, ])
  gsCompError <- gs(resTh$sim$errs_comp, resOb$prob[resOb$prob$Comp == "comp" & resOb$prob$Error == 1, ])
  gsComp <- nComp * (sum(gsCompCorrect, na.rm = TRUE) + sum(gsCompError, na.rm = TRUE))

  nIncomp <- nrow(resOb$data[resOb$data$Comp == "incomp" & resOb$data$outlier == 0, ])
  gsIncompCorrect <- gs(resTh$sim$rts_incomp, resOb$prob[resOb$prob$Comp == "incomp" & resOb$prob$Error == 0, ])
  gsIncompError <- gs(resTh$sim$errs_incomp, resOb$prob[resOb$prob$Comp == "incomp" & resOb$prob$Error == 1, ])
  gsIncomp <- nIncomp * (sum(gsIncompCorrect, na.rm = TRUE) + sum(gsIncompError, na.rm = TRUE))

  costValue <- 2 * (gsComp + gsIncomp)

  return(costValue)
}


gs <- function(th, ob) {
  if ((length(th) == 0 | nrow(ob) == 0)) {
    return(NA) # no observations (can happen, esp. for error trials in comp conditions)
  }
  probE <- c(0.1, 0.2, 0.2, 0.2, 0.2, 0.1) # TO DO hard coded?
  nBin <- table(factor(.bincode(th, c(0, ob$boundary, Inf)), levels = 1:6))
  pBin <- nBin / sum(nBin) # sum(pBin) == 1
  gs <- pBin * log(pBin / probE)
  return(gs)
}


costValueFunction <- function(costFunction) {
  if (is.character(costFunction)) {
    if (tolower(costFunction) == "rmse") {
      return(calculateCostValueRMSE)
    } else if (tolower(costFunction) == "spe") {
      return(calculateCostValueSPE)
    } else if (tolower(costFunction) == "gs") {
      return(calculateCostValueGS)
    } else if (tolower(costFunction) == "cs") {
      return(calculateCostValueCS)
    }
  } else if (is.function(costFunction)) {
    return(costFunction)
  }
  stop("costFunction must be one of 'rmse', 'spe', 'gs', 'cs', or custom function")
}

#' @title calculateBinProbabilities
#'
#' @description Calculate bin probabilities in observed data
#'
#' @param resOb Observed data (see dmcObservedData)
#' @param quantileType Argument (1-9) from R function quantile specifying the algorithm (?quantile)
#'
#' @return resOb Observed data with additional $probSubject/$prob table
#'
#' @examples
#' # Example 1:
#' resOb <- flankerData
#' resOb <- calculateBinProbabilities(resOb)
#' resOb$prob
#' @export
calculateBinProbabilities <- function(resOb, quantileType = 5) {

  # Some subjects are likely not to have 5+ error trials, esp. in compatible conditions!
  probs <- c(0.1, 0.3, 0.5, 0.7, 0.9)

  # dplyr 1.1.0 reframe replaces functionality from summarize
  # https://www.tidyverse.org/blog/2023/02/dplyr-1-1-0-pick-reframe-arrange/
  if (packageVersion("dplyr") >= "1.1.0") {
    resOb$probSubject <- resOb$data %>%
      dplyr::filter(outlier == 0) %>%
      dplyr::group_by(Subject, Comp, Error) %>%
      dplyr::reframe(
        nTrials  = n(),
        prob     = probs,
        boundary = quantile(RT, probs, quantileType = quantileType),
      ) %>%
      dplyr::filter(nTrials >= 5)
  } else {
    resOb$probSubject <- resOb$data %>%
      dplyr::filter(outlier == 0) %>%
      dplyr::group_by(Subject, Comp, Error) %>%
      dplyr::summarize(
        nTrials  = n(),
        prob     = probs,
        boundary = quantile(RT, probs, quantileType = quantileType),
        .groups  = "drop"
      ) %>%
      dplyr::filter(nTrials >= 5)
  }

  resOb$prob <- resOb$probSubject %>%
    dplyr::group_by(Comp, Error, prob) %>%
    dplyr::summarize(
      nSubjects = n(),
      nTrials   = sum(nTrials),
      boundary  = mean(boundary),
      .groups   = "drop"
    )

  return(resOb)
}
igmmgi/DMCfun documentation built on April 14, 2024, 7:23 a.m.