R/commonAnovaBayesian.R

Defines functions .BANOVAestimateModels .BANOVAmissingInteractionCells .BANOVAerrorhandling .BANOVArunAnalysis

#
# Copyright (C) 2013-2022 University of Amsterdam
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#

# Abbreviations:
# HF = helper function

.BANOVArunAnalysis <- function(jaspResults, dataset, options, analysisType = c("ANOVA", "ANCOVA", "RM-ANOVA")) {

  # the main workhorse of the Bayesian ANOVA, ANCOVA, and Repeated Measures.

  analysisType <- match.arg(analysisType)
  dataset  <- .BANOVAreadData(dataset, options, analysisType)
  if (isTryError(dataset))
    .quitAnalysis(gettext("Error while loading data. Please verify your repeated measures observations."))

  dataInfo <- .BANOVAerrorhandling(dataset, options, analysisType)

  model <- .BANOVAestimateModels(jaspResults, dataset, options, dataInfo, analysisType)
  model[["posteriors"]] <- .BANOVAestimatePosteriors(jaspResults, dataset, options, model)

  .BANOVAeffectsTable  (jaspResults, options, model)
  .BANOVAestimatesTable(jaspResults, options, model)
  .BANOVArsquaredTable (jaspResults, options, model)

  # model averaged plots
  .BANOVAposteriorPlot(jaspResults, dataset, options, model)
  .BANOVAqqplot       (jaspResults, options, model)
  .BANOVArsqplot      (jaspResults, options, model)

  .BANOVAnullControlPostHocTable(jaspResults, dataset, options, model)

  # single model plots and tables
  .BANOVAsmi(jaspResults, dataset, options, model)

  # descriptives
  .BANOVAdescriptives(jaspResults, dataset, options, dataInfo, analysisType)

  return()
}

.BANOVAerrorhandling <- function(dataset, options, analysisType) {

  customChecks <- list()

  if (analysisType != "RM-ANOVA") {
    hasDV <- options$dependent != ""
    hasIV <- any(lengths(options[c("fixedFactors", "covariates")]) != 0)
    fixed <- options$fixedFactors
    noVariables <- !(hasDV && hasIV)
    target <- c(options$covariates, options$dependent)

    # check if the last model has an interaction effect
    mostComplexModel <- NULL
    if (length(options[["modelTerms"]]) > 0L) {
      mostComplexModel <- options[["modelTerms"]][[length(options[["modelTerms"]])]][["components"]]
      mostComplexModel <- intersect(mostComplexModel, fixed)
    }

    if (length(mostComplexModel) > 1L)
      customChecks[["missingInteractionCells"]] <- .BANOVAmissingInteractionCells

    if (!noVariables) {
      .hasErrors(
        dataset = dataset,
        type    = c("infinity", "observations", "variance", "factorLevels", "duplicateColumns"),
        infinity.target     = target,
        variance.target     = target,
        observations.target = target,
        observations.amount = paste("<", length(options$modelTerms) + 1),
        factorLevels.target = fixed,
        factorLevels.amount = " < 2",
        # custom checks
        custom              = customChecks,
        missingInteractionCells.target = mostComplexModel,
        exitAnalysisIfErrors = TRUE
      )
    }

  } else {
    hasDV  <- !any(options$repeatedMeasuresCells == "")
    hasIV  <- any(lengths(options[c("betweenSubjectFactors", "covariates")]) != 0)
    fixed  <- unlist(options$betweenSubjectFactors)
    covariates <- unlist(options$covariates)
    noVariables <- !hasDV
    target <- c(covariates, fixed)

    # check if the last model has an interaction effect
    mostComplexModel <- NULL
    if (length(options[["modelTerms"]]) > 0L) {
      mostComplexModel <- options[["modelTerms"]][[length(options[["modelTerms"]])]][["components"]]
      mostComplexModel <- intersect(mostComplexModel, fixed)
    }

    if (length(mostComplexModel) > 1L)
      customChecks[["missingInteractionCells"]] <- .BANOVAmissingInteractionCells

    if (length(target) > 0L) {
      .hasErrors(
        dataset = dataset,
        type    = c("infinity", "observations", "variance", "factorLevels", "duplicateColumns"),
        infinity.target     = target,
        variance.target     = covariates,
        observations.target = target,
        observations.amount = paste("<", length(options$modelTerms) + 1),
        factorLevels.target = fixed,
        factorLevels.amount = " < 2",
        duplicateColumns.target = target,
        # custom checks
        custom              = customChecks,
        missingInteractionCells.target = mostComplexModel,
        exitAnalysisIfErrors = TRUE
      )
    }

    if (!noVariables) {

      # messages from commonmessages.R
      customChecks <- list(
        infinity = function() {
          x <- dataset[, .BANOVAdependentName]
          if (any(is.infinite(x)))
            return(gettext("Infinity found in repeated measures cells."))
          return(NULL)
        },
        observations = function() {
          x <- dataset[, .BANOVAdependentName]
          nObs <- length(options$modelTerms) + 1
          if (length(na.omit(x)) <= nObs)
            return(gettextf("Number of observations is < %s in repeated measures cells", nObs))
          return(NULL)
        },
        variance = function() {
          x <- dataset[, .BANOVAdependentName]
          validValues <- x[is.finite(x)]
          variance <- 0
          if (length(validValues) > 1)
            variance <- stats::var(validValues)
          if (variance == 0)
            return(gettext("The variance in the repeated measures cells is 0."))
          return(NULL)
        },
        duplicateColumns = function() {
          cols <- c(.BANOVAsubjectName, target)
          if (length(cols) == 1L)
            return()

          datasetList <- as.list(dataset[, cols])
          duplicatedCols <- duplicated(datasetList) | duplicated(datasetList, fromLast = TRUE)
          if (any(duplicatedCols)) {
            if (duplicatedCols[1L]) {
              msg <- gettextf("Duplicate variables encountered in repeated measures cells, %s",
                              paste(target[duplicatedCols[-1L]], collapse = ", "))
            } else {
              msg <- gettextf("Duplicate variables encountered in %s",
                              paste(target[duplicatedCols[-1L]], collapse = ", "))
            }
            return(msg)
          }
          return(NULL)
        },
        redundantParticipantColumn = function() {

          if (nlevels(dataset[[.BANOVAsubjectName]]) == nlevels(dataset[[target]])) {
            return(gettextf("Duplicate participant column added to model terms, %s", target))
          } else {
            return()
          }

        }
      )

      .hasErrors(
        dataset = dataset,
        target = target,
        custom = customChecks,
        exitAnalysisIfErrors = TRUE
      )

    }

  }

  return(list(noVariables = noVariables, hasIV = hasIV, hasDV = hasDV))
}

.BANOVAmissingInteractionCells <- function(dataset, target) {

  # sorts a data frame by all its columns
  sortDataFrame <- function(df) {
    df[do.call(order, df), ]
  }

  uniqueObservedCombinations <- sortDataFrame(unique(dataset[, target, drop = FALSE]))
  uniqueExpectedCombinations <- sortDataFrame(do.call(expand.grid, c(lapply(uniqueObservedCombinations, unique), stringsAsFactors = FALSE)))

  # if the rows match then it must be
  if (nrow(uniqueObservedCombinations) == nrow(uniqueExpectedCombinations))
    return(NULL)

  # find missing combinations
  observedStrings <- unname(apply(uniqueObservedCombinations, 1L, paste, collapse = jaspBase::interactionSymbol))
  expectedStrings <- unname(apply(uniqueExpectedCombinations, 1L, paste, collapse = jaspBase::interactionSymbol))
  missingStrings  <- setdiff(expectedStrings, observedStrings)
  missingStringsCollapsed <- paste(missingStrings[1:min(length(missingStrings), 10)], collapse = ", ")

  interactionVariable <- paste(colnames(uniqueObservedCombinations), collapse = jaspBase::interactionSymbol)

  if (length(missingStrings) <= 10) {
    return(gettextf(
      "The interaction effect of %1$s has no observations for the combination(s): %2$s. Please adjust the model and remove any interactions with missing cells",
      interactionVariable, missingStringsCollapsed
    ))
  } else {
    return(gettextf(
      "The interaction effect of %1$s has no observations for the combination(s): %2$s. Only the first 10 missing combinations are shown. Please adjust the model and remove any interactions with missing cells",
      interactionVariable, missingStringsCollapsed
    ))
  }

}

# model comparison ----
.BANOVAestimateModels <- function(jaspResults, dataset, options, errors,
                                  analysisType = c("ANOVA", "ANCOVA", "RM-ANOVA")) {

  # also makes the model comparison table
  stateObj <- jaspResults[["tableModelComparisonState"]]$object
  if (!is.null(jaspResults[["tableModelComparison"]]) && !is.null(stateObj)) {
    stateObj$completelyReused <- TRUE # means that posteriors won't need to be resampled
    return(stateObj)
  } else if (errors$noVariables) {
    modelTable <- .BANOVAinitModelComparisonTable(options)
    jaspResults[["tableModelComparison"]] <- modelTable
    return(list(analysisType = analysisType))
  } else if (!is.null(stateObj) &&
             .BANOVAmodelTermsUnchanged(stateObj, options) &&
             .BANOVAmarginalityOptionsUnchanged(stateObj, options) &&
             (.BANOVAmodelBFTypeOrOrderChanged(stateObj, options) || identical(stateObj[["shownTableSize"]], options[c("modelsShown", "numModelsShown")]))) {

    # if the statement above is TRUE then no new variables were added (or seed changed)
    # and the only change is in the Bayes factor type, the ordering, or the number of models shown
    modelTable <- .BANOVAinitModelComparisonTable(options)
    modelTable[["Models"]] <- .BANOVAgetModelTitlesWithAllTerms(stateObj[["models"]], stateObj[["model.list"]], analysisType, options[["hideNuisanceParameters"]])

    internalTable <- stateObj$internalTableObj$internalTable
    nmodels <- nrow(internalTable)
    .BANOVAestimateModels
    shownTableSize <- .BANOVAgetShownTableSizeAndAddFootnote(modelTable, nmodels, options[["modelsShown"]], options[["numModelsShown"]])

    if (.BANOVAmodelPriorOptionsChanged(stateObj, options)) {

      priorProbs <- .BANOVAcomputePriorModelProbs(stateObj$model.list, stateObj$nuisance, options)
      internalTable[["P(M)"]] <- priorProbs
      stateObj$completelyReused <- FALSE # model-averaged posteriors need to be resampled

    } else {

      stateObj$completelyReused <- TRUE # model-averaged posteriors do not need to be resampled

    }

    internalTableObj <- .BANOVAfinalizeInternalTable(options, internalTable)
    stateObj$postProbs        <- internalTableObj$internalTable[["P(M|data)"]]
    stateObj$priorProbs       <- internalTableObj$internalTable[["P(M)"]]
    stateObj$internalTableObj <- internalTableObj
    modelTable$setData(internalTableObj$table[seq_len(shownTableSize), ])
    jaspResults[["tableModelComparison"]] <- modelTable

    return(stateObj)

  }

  modelTerms    <- options$modelTerms
  dependent     <- options$dependent
  if (analysisType == "RM-ANOVA") {
    modelTerms[[length(modelTerms) + 1L]] <- list(components = .BANOVAsubjectName, isNuisance = TRUE)
    dependent <- .BANOVAdependentName
  }
  fixedFactors  <- options$fixedFactors
  randomFactors <- .BANOVAgetRandomFactors(options, analysisType)

  tempRScale    <- .BANOVAgetRScale(options, analysisType)
  rscaleFixed   <- tempRScale[["rscaleFixed"]]
  rscaleRandom  <- tempRScale[["rscaleRandom"]]
  rscaleCont    <- tempRScale[["rscaleCont"]]
  rscaleEffects <- tempRScale[["rscaleEffects"]]

  tmp <- .BANOVAcreateModelFormula(dependent, modelTerms)
  model.formula <- tmp$model.formula
  nuisance      <- tmp$nuisance
  effects       <- tmp$effects

  if (analysisType == "RM-ANOVA") {
    legacy    <- options[["legacyResults"]]
    rmFactors <- vapply(options[["repeatedMeasuresFactors"]], `[[`, character(1L), "name")
  } else {
    legacy    <- FALSE
    rmFactors <- NULL
  }

  #Make a list of models to compare
  temp <- .BANOVAgenerateAllModelFormulas(
    formula                                   = model.formula,
    nuisance                                  = nuisance,
    analysisType                              = analysisType,
    enforcePrincipleOfMarginalityFixedEffects = options[["enforcePrincipleOfMarginalityFixedEffects"]],
    enforcePrincipleOfMarginalityRandomSlopes = options[["enforcePrincipleOfMarginalityRandomSlopes"]],
    rmFactors                                 = rmFactors,
    legacy                                    = legacy
  )
  model.list           <- temp$modelList
  nuisance             <- temp$nuisance
  nuisanceRandomSlopes <- temp$nuisanceRandomSlopes

  if (length(model.list) == 1L) {
    modelTable <- .BANOVAinitModelComparisonTable(options)
    modelTable$setError(gettext("Bayes factor is undefined -- all effects are specified as nuisance."))
    jaspResults[["tableModelComparison"]] <- modelTable
    return(list(analysisType = analysisType))
  }

  # TODO: discuss if we want a hard limit if length(model.list) > ...
  # that would avoid peoples computers from slowing down tremendously.
  # JCG: A hard limit would be a bit cruel right? How about a warning instead? (in QML already?)

  #Run all other models and store the results in the model.object
  neffects <- length(effects)
  nmodels <- length(model.list)
  modelObject <- vector("list", nmodels)
  reorderedEffects  <- .BANOVAreorderTerms(effects)
  reorderedNuisance <- if (is.null(nuisance)) NULL else .BANOVAreorderTerms(nuisance)
  if (nmodels > 0L && neffects > 0L) {

    # effects.matrix contains for each row (model), which predictors (columns) are in the model
    # interactions.matrix contains for each predictor (column) what predictors (rows) is it composed of

    interactions.matrix <- .BANOVAcomputeInteractionsMatrix(effects)

    effects.matrix <- matrix(data = FALSE, nrow = nmodels, ncol = neffects)
    colnames(effects.matrix) <- effects

    for (m in seq_len(nmodels)) {
      modelObject[[m]] <- list()
      if (m == 1L) {
        if (is.null(nuisance)) { # intercept only
          modelObject[[m]]$title <- gettext("Null model")
          next # all effects are FALSE anyway
        } else {
          if (analysisType == "RM-ANOVA" && !legacy) {
            tempNuisance <- setdiff(nuisance, nuisanceRandomSlopes)
            modelObject[[m]]$title <- sprintf(ngettext(
              length(tempNuisance),
              "Null model (incl. %s and random slopes)",
              "Null model (incl. %s, and random slopes)"
            ), paste(.BANOVAdecodeNuisance(tempNuisance), collapse = ", "))
          } else {
            modelObject[[m]]$title <- gettextf("Null model (incl. %s)", paste(.BANOVAdecodeNuisance(nuisance), collapse = ", "))
          }
        }
      }
      model.effects <- .BANOVAgetFormulaComponents(model.list[[m]])

      reorderedModelEffects <- .BANOVAreorderTerms(model.effects)
      idx <- match(reorderedModelEffects, reorderedEffects, nomatch = 0L)
      idx <- idx[!is.na(idx)]
      effects.matrix[m, idx] <- TRUE

      if (m > 1L) {
        model.title <- model.effects[match(reorderedModelEffects, reorderedNuisance, 0L) == 0L]
        modelObject[[m]]$title <- jaspBase::gsubInteractionSymbol(paste(model.title, collapse = " + "))
      }
    }

    rownames(effects.matrix) <- sapply(modelObject, `[[`, "title")
  }

  modelTable <- .BANOVAinitModelComparisonTable(options)
  modelNames <- .BANOVAgetModelTitlesWithAllTerms(modelObject, model.list, analysisType, options[["hideNuisanceParameters"]])

  shownTableSize <- .BANOVAgetShownTableSizeAndAddFootnote(modelTable, nmodels, options[["modelsShown"]], options[["numModelsShown"]])

  modelTable[["Models"]] <- modelNames[seq_len(shownTableSize)]
  jaspResults[["tableModelComparison"]] <- modelTable
  # internalTable is an internal representation of the model comparison table
  # internalTable <- matrix(NA, nmodels, 6L, dimnames = list(modelNames, c("Models", "P(M)", "P(M|data)", "BFM", "BF10", "error %")))
  internalTable <- data.frame(
    Models      = character(nmodels),
    `P(M)`      = numeric(nmodels),
    `P(M|data)` = numeric(nmodels),
    BFM         = numeric(nmodels),
    BF10        = numeric(nmodels),
    `error %`   = numeric(nmodels),
    check.names = FALSE
  )
  # set model names, BF null model and p(M)
  internalTable[["Models"]] <- modelNames
  internalTable[1L, "BF10"] <- 0
  internalTable[["P(M)"]] <- .BANOVAcomputePriorModelProbs(model.list, nuisance, options)

  #Now compute Bayes Factors for each model in the list, and populate the tables accordingly
  startProgressbar(nmodels, gettext("Computing Bayes factors"))

  # check if any models can be resued from the state
  if (!is.null(stateObj[["modelTerms"]])) {

    oldFormulas <- sapply(stateObj$model.list, .BANOVAreorderFormulas)
    newFormulas <- sapply(model.list, .BANOVAreorderFormulas)

    reuseable   <- match(newFormulas, oldFormulas)

  } else {
    reuseable <- rep(NA, nmodels)
  }

  if (analysisType == "RM-ANOVA") {
    # the default null-model contains subject
    anyNuisance <- TRUE
  } else {
    # the default null-model is intercept only
    anyNuisance <- length(nuisance) > 0L
  }

  # without these there is no error
  bfIterations <- if (options[["samplingMethodNumericAccuracy"]] == "auto") 1e4L else options[["samplesNumericAccuracy"]]

  if (options[["integrationMethod"]] == "automatic") {
    bfIntegrationMethod <- "auto"
  } else {
    bfIntegrationMethod <- "laplace"
    modelTable$addFootnote(gettext("The Laplace approximation is faster but also less accurate than the automatic method. This is fine when exploring the data and obtaining a rough estimate. However, we recommend using the automatic integration method to obtain the final results."))
    modelTable$addFootnote(gettext("The Laplace approximation does not always provide an error estimate."), colNames = "error %")
  }

  for (m in seq_len(nmodels)) {
    # loop over all models, where the first is the null-model and the last the most complex model
    if (is.na(reuseable[m])) {
      if (!is.null(model.list[[m]])) {
        .setSeedJASP(options)
        bf <- try(BayesFactor::lmBF(
          formula       = model.list[[m]],
          data          = dataset,
          whichRandom   = randomFactors,
          progress      = FALSE,
          posterior     = FALSE,
          rscaleFixed   = rscaleFixed,
          rscaleRandom  = rscaleRandom,
          rscaleCont    = rscaleCont,
          rscaleEffects = rscaleEffects,
          iterations    = bfIterations,
          method        = bfIntegrationMethod
        ))
        if (isTryError(bf)) {
          modelName <- strsplit(.BANOVAas.character.formula(model.list[[m]]), "~ ")[[1L]][2L]
          .quitAnalysis(gettextf("Bayes factor could not be computed for model: %1$s.\nThe error message was: %2$s.",
                                .BANOVAdecodeSubject(modelName), .extractErrorMessage(bf)))
        } else {
          # delete the data object -- otherwise it gets saved in the state
          bf@data <- data.frame()
        }
      } else {
        bf <- NULL
      }
    } else {
      bf <- stateObj$models[[reuseable[m]]]$bf
    }
    modelObject[[m]]$bf <- bf

    if (!is.null(bf)) {
      # if (isTryError(bf)) {
      #   message <- .extractErrorMessage(bf)
      #   modelObject[[m]]$error.message <- paste("Bayes factor computation failed with error:", message)
      # } else {
        # bfObj can have a modified denominator, but the saved objects are always compared against intercept only
      bfObj <- modelObject[[m]]$bf
      if (anyNuisance)
        bfObj <- bfObj / modelObject[[1L]]$bf

      internalTable[m, "BF10"]    <- bfObj@bayesFactor[, "bf"] # always LogBF10!
      internalTable[m, "error %"] <- bfObj@bayesFactor[, "error"] # always NA if bfIntegrationMethod == "laplace"
      # }

      # disable filling of Bayes factor column (see https://github.com/jasp-stats/jasp-test-release/issues/1018 for discussion)
      # modelTable[["BF10"]]    <- .recodeBFtype(internalTable[["BF10"]],
      #                                          newBFtype = options$bayesFactorType,
      #                                          oldBFtype = "LogBF10")
      modelTable[["error %"]] <- internalTable[seq_len(shownTableSize), "error %"]
    }
    progressbarTick()
  }

  internalTableObj <- .BANOVAfinalizeInternalTable(options, internalTable)
  modelTable$setData(internalTableObj$table[seq_len(shownTableSize), ])
  if (length(internalTableObj[["footnotes"]]) > 0L) {
    idxRow <- internalTableObj[["footnotes"]][["rows"]]
    idxCol <- internalTableObj[["footnotes"]][["cols"]]
    tb <- internalTableObj$table
    modelTable$addFootnote(message  = internalTableObj[["footnotes"]][["message"]],
                           symbol   = " <sup>&#10013;</sup>", # latin cross
                           rowNames = rownames(tb)[idxRow],
                           colNames = colnames(tb)[idxCol])
  }

  if (anyNuisance) {
    if (analysisType == "RM-ANOVA" && !options[["legacyResults"]]) {
      message <- gettextf("All models include %s, and random slopes for all repeated measures factors.",
               paste0(.BANOVAdecodeNuisance(setdiff(nuisance, nuisanceRandomSlopes)), collapse = ", "))

    } else {

      if (analysisType == "RM-ANOVA")
        modelTable$addFootnote(message = gettext("Random slopes for repeated measures factors are excluded."), symbol = .BANOVAGetWarningSymbol())

      message <- gettextf("All models include %s.", paste0(.BANOVAdecodeNuisance(nuisance), collapse = ", "))
    }
    modelTable$addFootnote(message = message)

  }

  model <- list(
    models               = modelObject,
    postProbs            = internalTableObj$internalTable[["P(M|data)"]],
    priorProbs           = internalTableObj$internalTable[["P(M)"]],
    internalTableObj     = internalTableObj,
    effects              = effects.matrix,
    interactions.matrix  = interactions.matrix,
    nuisance             = nuisance,
    nuisanceRandomSlopes = nuisanceRandomSlopes,
    analysisType         = analysisType,
    # these are necessary for partial reusage of the state (e.g., when a fixedFactor is added/ removed)
    model.list           = model.list,
    fixedFactors         = fixedFactors,
    randomFactors        = if (analysisType == "RM-ANOVA") randomFactors else options[["randomFactors"]], # stored because they are modified in RM-ANOVA
    modelTerms           = modelTerms,
    covariates           = if (analysisType == "ANOVA") NULL else options[["covariates"]],
    seed                 = options[["seed"]],
    setSeed              = options[["setSeed"]],
    reuseable            = reuseable,
    RMFactors            = options[["repeatedMeasuresFactors"]],
    modelPriorOptions    = options[.BANOVAmodelSpaceDependencies(options[["modelPrior"]])],
    hideNuisanceEffects  = options[["hideNuisanceEffects"]],
    shownTableSize       = options[c("modelsShown", "numModelsShown")]
  )

  # save state
  stateObj <- createJaspState(object = model, dependencies = c(
    # does NOT depend on any factors or covariates, to facilitate reusing previous models
    "dependent", "repeatedMeasuresCells", "samplingMethodNumericAccuracy", "samplesNumericAccuracy", "seed", "setSeed",
    .BANOVArscaleDependencies(options[["priorSpecificationMode"]])

  ))

  jaspResults[["tableModelComparisonState"]] <- stateObj

  return(model)
}

.BANOVAeffectsTable <- function(jaspResults, options, model) {

  if (!is.null(jaspResults[["tableEffects"]]) || !options[["effects"]])
    return()

  # isTRUE should handle a state issue, see https://github.com/jasp-stats/jasp-test-release/issues/839
  if (isTRUE(model[["analysisType"]] != "RM-ANOVA" && options[["dependent"]] != "")) {
    title <- gettextf("Analysis of Effects - %s", options[["dependent"]])
  } else {
    title <- gettext("Analysis of Effects")
  }

  effectsTable <- createJaspTable(title = title)
  effectsTable$position <- 2
  effectsTable$dependOn(c(
    .BANOVAdataDependencies(),
    "effects", "effectsType",
    "samplingMethodNumericAccuracy", "samplesNumericAccuracy", "bayesFactorType",
    "integrationMethod",
    .BANOVAmodelSpaceDependencies(options[["modelPrior"]]),
    .BANOVArscaleDependencies(options[["priorSpecificationMode"]])
  ))

  effectsTable$addCitation(.BANOVAcitations[1:2])

  inclusion.title <- switch(
    options$bayesFactorType,
    "LogBF10" = gettext("Log(BF<sub>incl</sub>)"),
    "BF01"    = gettext("BF<sub>excl</sub>"),
    "BF10"    = gettext("BF<sub>incl</sub>")
  )

  effectsTable$addColumnInfo(name = "Effects",      title = gettext("Effects"),      type = "string")
  effectsTable$addColumnInfo(name = "P(incl)",      title = gettext("P(incl)"),      type = "number")
  effectsTable$addColumnInfo(name = "P(excl)",      title = gettext("P(excl)"),      type = "number")
  effectsTable$addColumnInfo(name = "P(incl|data)", title = gettext("P(incl|data)"), type = "number")
  effectsTable$addColumnInfo(name = "P(excl|data)", title = gettext("P(excl|data)"), type = "number")
  effectsTable$addColumnInfo(name = "BFInclusion",  title = inclusion.title,         type = "number")

  if (options$effectsType == "matchedModels") {
    effectsTable$addFootnote(gettextf("Compares models that contain the effect to equivalent models stripped of the effect. Higher-order interactions are excluded. Analysis suggested by Sebastiaan Math%st.", "\u00F4"))
  }

  if (is.null(model$models)) {
    jaspResults[["tableEffects"]] <- effectsTable
    return()
  }

  effects.matrix <- model$effects
  no.effects <- ncol(effects.matrix)
  effectNames <- colnames(model$effects)

  idxNotNan <- c(TRUE, !is.nan(model$postProbs[-1L]))
  if (options$effectsType == "allModels") {

    if (any(!idxNotNan)) {
      # renormalize the prior and posterior probabilities
      priorInclProb <- crossprod(effects.matrix[idxNotNan, , drop = FALSE], model[["priorProbs"]][idxNotNan] / sum(model[["priorProbs"]][idxNotNan]))
      postInclProb  <- crossprod(effects.matrix[idxNotNan, , drop = FALSE], model[["postProbs"]][idxNotNan]  / sum(model[["postProbs"]][idxNotNan]))
    } else {
      # do not renormalize
      priorInclProb <- crossprod(effects.matrix, model[["priorProbs"]])
      postInclProb  <- crossprod(effects.matrix, model[["postProbs"]])
    }

    # deal with numerical error
    postInclProb[postInclProb > 1] <- 1
    postInclProb[postInclProb < 0] <- 0
    bfIncl <- (postInclProb / (1 - postInclProb)) / (priorInclProb / (1 - priorInclProb))

    priorExclProb <- 1 - priorInclProb
    postExclProb  <- 1 - postInclProb

  } else {

    tmp <- BANOVAcomputMatchedInclusion(
      effectNames, effects.matrix, model$interactions.matrix,
      model$priorProbs, model$postProbs
    )
    priorInclProb <- tmp[["priorInclProb"]]
    postInclProb  <- tmp[["postInclProb"]]
    bfIncl        <- tmp[["bfIncl"]]
    priorExclProb <- tmp[["priorExclProb"]]
    postExclProb  <- tmp[["postExclProb"]]

    # show BFinclusion for nuisance predictors as 1, rather than NaN
    priorInclIs1 <- is.nan(bfIncl) & abs(1 - priorInclProb) <= sqrt(.Machine$double.eps)
    bfIncl[priorInclIs1] <- 1

  }

  if (sum(!idxNotNan) > 1L) { # null model is always omitted, so 2 or more omitted indicates some models failed
    effectsTable$addFootnote(message = gettext("Some Bayes factors could not be calculated. Inclusion probabilities and Bayes factors are calculated while excluding these models. The results may be uninterpretable!"),
      symbol = .BANOVAGetWarningSymbol()
    )
  }

  effectsTable[["Effects"]]      <- jaspBase::gsubInteractionSymbol(effectNames)
  effectsTable[["P(incl)"]]      <- priorInclProb
  effectsTable[["P(incl|data)"]] <- postInclProb
  effectsTable[["BFInclusion"]] <- switch(
    options$bayesFactorType,
    "LogBF10" = log(bfIncl),
    "BF01"    = 1 / bfIncl,
    "BF10"    = bfIncl
  )

  effectsTable[["P(excl)"]]      <- priorExclProb
  effectsTable[["P(excl|data)"]] <- postExclProb

  jaspResults[["tableEffects"]] <- effectsTable
  return()
}

BANOVAcomputMatchedInclusion <- function(effectNames, effects.matrix, interactions.matrix,
                                          priorProbs, postProbs) {
  # this method is inspired by this post: https://www.cogsci.nl/blog/interpreting-bayesian-repeated-measures-in-jasp

  priorInclProb <- postInclProb <- bfIncl <- priorExclProb <- postExclProb <-
    numeric(length(effectNames))

  for (i in seq_along(effectNames)) {
    effect <- effectNames[i]

    # get all higher order interactions of which this effect is a component
    # e.g., V1 is a component of V1:V2
    idx1 <- interactions.matrix[effect, ]

    # get all models that exclude the predictor, but that always include the lower order main effects
    # e.g., V1:V2 is compared against models that always include V1 and V2
    idx2 <- interactions.matrix[, effect] # if effect is V1:V2, idx2 contains c(V1, V2)

    # idx3 is FALSE if a model contains higher order interactions of effect, TRUE otherwise
    idx3 <- !matrixStats::rowAnys(effects.matrix[, idx1, drop = FALSE])

    # all models that include the effect, without higher order interactions
    idx4 <- idx3 & effects.matrix[, i]
    priorInclProb[i] <- sum(idx4 * priorProbs)
    postInclProb[i]  <- sum(idx4 * postProbs)

    # the models to consider for the prior/ posterior exclusion probability.
    # idx5 includes models that have: all subcomponents & no higher order interaction & not the effect
    idx5 <- matrixStats::rowAlls(effects.matrix[, idx2, drop = FALSE]) & idx3 & !effects.matrix[, i]

    priorExclProb[i] <- sum(idx5 * priorProbs)
    postExclProb[i]  <- sum(idx5 * postProbs)

    # compute inclusion BF
    bfIncl[i]     <- (postInclProb[i] / postExclProb[i]) / (priorInclProb[i] / priorExclProb[i])
  }
  return(list(
    priorInclProb = priorInclProb,
    postInclProb  = postInclProb,
    priorExclProb = priorExclProb,
    postExclProb  = postExclProb,
    bfIncl        = bfIncl
  ))
}

.BANOVAinitModelComparisonTable <- function(options) {

  # function that creates an empty JASP table to be filled later
  modelTable <- createJaspTable(title = gettext("Model Comparison"))
  modelTable$position <- 1L
  modelTable$addCitation(.BANOVAcitations[1:2])
  modelTable$dependOn(c(
    .BANOVAdataDependencies(),
    "samplingMethodNumericAccuracy", "samplesNumericAccuracy", "integrationMethod",
    "bayesFactorType", "bayesFactorOrder",
    "hideNuisanceParameters", "legacyResults",
    "modelsShown",
    .BANOVAmodelSpaceDependencies(options[["modelPrior"]]),
    .BANOVArscaleDependencies(options[["priorSpecificationMode"]])
  ))
  if (options[["modelsShown"]] == "limited")
    modelTable$dependOn("numModelsShown")

  switch(options$bayesFactorType,
    BF10 = {
      bfm.title <- gettext("BF<sub>M</sub>")
      bf.title  <- gettext("BF<sub>10</sub>")
    },
    BF01 = {
      bfm.title <- gettext("BF<sub>M</sub>")
      bf.title  <- gettext("BF<sub>01</sub>")
    },
    LogBF10 = {
      bfm.title <- gettext("Log(BF<sub>M</sub>)")
      bf.title  <- gettext("Log(BF<sub>10</sub>)")
    }
  )

  modelTable$addColumnInfo(name = "Models",    title = gettext("Models"),    type = "string")
  modelTable$addColumnInfo(name = "P(M)",      title = gettext("P(M)"),      type = "number")
  modelTable$addColumnInfo(name = "P(M|data)", title = gettext("P(M|data)"), type = "number")
  modelTable$addColumnInfo(name = "BFM",       title = bfm.title,            type = "number")
  modelTable$addColumnInfo(name = "BF10",      title = bf.title,             type = "number")
  modelTable$addColumnInfo(name = "error %",   title = gettextf("error %%"), type = "number")

  return(modelTable)
}

.BANOVAfinalizeInternalTable <- function(options, internalTable) {

  # function that actually fills in the table created by .BANOVAinitModelComparisonTable
  footnotes <- NULL

  logSumExp <- matrixStats::logSumExp
  logbfs <- internalTable[["BF10"]]
  logprior <- log(internalTable[["P(M)"]])
  if (!anyNA(internalTable[["BF10"]])) {
    # no errors, proceed normally and complete the table

    logsumbfs <- logSumExp(logbfs + logprior)
    internalTable[["P(M|data)"]] <-  exp(logbfs + logprior - logsumbfs)

    nmodels <- nrow(internalTable)
    for (i in seq_len(nmodels)) {
      internalTable[i, "BFM"] <- logbfs[i] - logSumExp(logbfs[-i]) + log(nmodels - 1L)
    }

  } else {
    # create table excluding failed models

    idxGood <- !is.na(logbfs)
    widxGood <- which(idxGood)
    logsumbfs <- logSumExp(logbfs[idxGood])
    internalTable[["P(M|data)"]] <-  exp(logbfs - logsumbfs)

    nmodels <- sum(idxGood)
    widxBad <- which(!idxGood)
    for (i in widxGood) {
      internalTable[["BFM"]][i] <- logbfs[i] - logSumExp(logbfs[-c(i, widxBad)]) + log(nmodels - 1L)
    }

    internalTable[widxBad, "P(M|data)"] <- NaN
    internalTable[widxBad, "BFM"]       <- NaN
    internalTable[widxBad, "BF10"]      <- NaN
    footnotes <- list(
      message = gettext("<b><em>Warning.</em></b> Some Bayes factors could not be calculated. Multi model inference is carried out while excluding these models. The results may be uninterpretable!")
    )
  }

  # create the output table
  table <- as.data.frame(internalTable)
  if (options[["bayesFactorType"]] == "LogBF10") {
    table[["BFM"]]  <- internalTable[["BFM"]]
  } else {
    table[["BFM"]]  <- exp(internalTable[["BFM"]])
  }

  o <- order(table[["BF10"]], decreasing = TRUE)
  table <- table[o, ]
  idxNull <- which(o == 1L)
  if (options[["bayesFactorOrder"]] == "nullModelTop") {
    table[idxNull, "error %"] <- NA
    table <- table[c(idxNull, seq_len(nrow(table))[-idxNull]), ]
  } else {

    table[["BF10"]] <- table[["BF10"]] - table[1L, "BF10"]

    # recompute error (see BayesFactor:::`.__T__/:base`$`BFBayesFactor#BFBayesFactor`)
    table[idxNull, "error %"] <- 0
    table[["error %"]] <- sqrt(table[["error %"]]^2 + table[["error %"]][1L]^2)
    table[1L, "error %"] <- NA
  }

  table[["BF10"]] <- .recodeBFtype(table[["BF10"]], newBFtype = options[["bayesFactorType"]], oldBFtype = "LogBF10")
  table[["error %"]] <- 100 * table[["error %"]]

  if (!is.null(footnotes)) {
    idxNan <- which(do.call(cbind, lapply(table[-ncol(table)], is.nan)), arr.ind = TRUE)
    footnotes[["rows"]] <- idxNan[, "row"]
    footnotes[["cols"]] <- idxNan[, "col"]
  }

  return(list(table = table, internalTable = internalTable, footnotes = footnotes))

}

# posterior inference ----
.BANOVAestimatePosteriors <- function(jaspResults, dataset, options, model) {

  userNeedsPosteriorSamples <- options$posteriorEstimates || options$modelAveragedPosteriorPlot || options$qqPlot ||
    options$rsqPlot || options$criTable
  if (is.null(model$models) || !userNeedsPosteriorSamples)
    return()

  # model[["completelyReused"]] is needed because it can happen that some posterior samples can be reused (e.g.,
  # when the modelTerms change)
  posteriors <- jaspResults[["statePosteriors"]]$object
  if (!is.null(posteriors) && isTRUE(model[["completelyReused"]])) { # can all posterior be reused?

    posteriorsCRI <- jaspResults[["statePosteriorsCRI"]]$object
    if (is.null(posteriorsCRI)) {# do we need to recompute credible intervals?
      posteriorsCRI <- .BANOVAcomputePosteriorCRI(dataset, options, model, posteriors)
      statePosteriorsCRI <- createJaspState(object = posteriorsCRI)
      statePosteriorsCRI$dependOn(options = "credibleInterval",
                                  optionsFromObject = jaspResults[["tableModelComparisonState"]])
      jaspResults[["statePosteriorCRI"]] <- statePosteriorsCRI
    }

  } else { # compute posteriors and credible intervals
    posteriors    <- .BANOVAsamplePosteriors(jaspResults, dataset, options, model, posteriors)
    posteriorsCRI <- .BANOVAcomputePosteriorCRI(dataset, options, model, posteriors)

    statePosteriors <- createJaspState(object = posteriors)
    statePosteriors$dependOn(
      c(.BANOVAmodelSpaceDependencies(options[["modelPrior"]]), "samplingMethodMCMC", "samplesMCMC"),
      optionsFromObject = jaspResults[["tableModelComparisonState"]]
    )
    jaspResults[["statePosteriors"]] <- statePosteriors

    statePosteriorsCRI <- createJaspState(object = posteriorsCRI)
    statePosteriorsCRI$dependOn(options = "credibleInterval",
                                optionsFromObject = jaspResults[["statePosteriors"]])
    jaspResults[["statePosteriorCRI"]] <- statePosteriorsCRI

  }

  .BANOVASamplingIssuesTable(jaspResults, posteriors[["samplingIssues"]])

  return(c(posteriors, posteriorsCRI))
}

.BANOVAestimatesTable <- function(jaspResults, options, model) {

  if (!is.null(jaspResults[["tablePosteriorEstimates"]]) || !options[["posteriorEstimates"]])
    return()

  estsTable <- createJaspTable(title = gettext("Model Averaged Posterior Summary"))
  estsTable$position <- 3
  estsTable$dependOn(c(
    .BANOVAdataDependencies(),
    "posteriorEstimates",
    "samplingMethodMCMC", "samplesMCMC", "credibleInterval",
    .BANOVAmodelSpaceDependencies(options[["modelPrior"]]),
    .BANOVArscaleDependencies(options[["priorSpecificationMode"]])
  ))

  overTitle <- gettextf("%s%% Credible Interval", format(100 * options[["credibleInterval"]], digits = 3))
  estsTable$addColumnInfo(name = "Variable", type = "string")
  estsTable$addColumnInfo(name = "Level",    type = "string")
  estsTable$addColumnInfo(name = "Mean",     type = "number")
  estsTable$addColumnInfo(name = "SD",       type = "number")
  estsTable$addColumnInfo(name = "Lower",    type = "number", overtitle = overTitle)
  estsTable$addColumnInfo(name = "Upper",    type = "number", overtitle = overTitle)

  if (!is.null(model[["posteriors"]])) {
    .BANOVAfillEstimatesTable(
      jaspTable   = estsTable,
      mus         = model$posteriors$weightedMeans,
      sds         = model$posteriors$weightedSds,
      cri         = model$posteriors$weightedCRIs,
      hasNoLevels = model$posteriors$allContinuous,
      isRandom    = model$posteriors$isRandom
    )
  }
  jaspResults[["tablePosteriorEstimates"]] <- estsTable
  return()
}

.BANOVAfillEstimatesTable <- function(jaspTable, mus, sds, cri, hasNoLevels, isRandom = NULL) {

  if (!is.null(isRandom) && any(isRandom)) {
    # remove random effects
    mus <- mus[!isRandom]
    sds <- sds[!isRandom]
    cri <- cri[!isRandom, ]
  }

  table <- .BANOVAgetLevelsFromParamNames(names(mus))
  colnames(table) <- c("Variable", "Level")

  # set repeated parameter names to ""
  idxDup <- duplicated(table[, "Variable"])
  table[idxDup, "Variable"]  <- ""
  # decode base64 variables
  table[!idxDup, "Variable"] <- jaspBase::gsubInteractionSymbol(table[!idxDup, "Variable"])
  # rename mu to Intercept
  table[1L, "Variable"] <- "Intercept"
  # attach posterior means and sds
  table <- cbind(as.data.frame(table),
                 "Mean"  = mus,
                 "SD"    = sds,
                 "Lower" = cri[, 1L],
                 "Upper" = cri[, 2L])
  if (hasNoLevels)
    table <- table[, -2L]
  jaspTable$setData(table)
  return()
}

.BANOVArsquaredTable <- function(jaspResults, options, model) {

  if (!is.null(jaspResults[["tableBMACRI"]]) || !options[["criTable"]])
    return()

  criTable <- createJaspTable(title = gettextf("Model Averaged R%s", "\u00B2"))
  criTable$position <- 3.5
  criTable$dependOn(c(
    .BANOVAdataDependencies(),
    "criTable",
    "samplingMethodMCMC", "samplesMCMC", "bayesFactorType", "credibleInterval",
    .BANOVAmodelSpaceDependencies(options[["modelPrior"]]),
    .BANOVArscaleDependencies(options[["priorSpecificationMode"]])
  ))

  overTitle <- gettextf("%s%% Credible Interval", format(100 * options[["credibleInterval"]], digits = 3))
  criTable$addColumnInfo(name = "rsq",   type = "string", title = "")
  criTable$addColumnInfo(name = "Mean",  type = "number")
  criTable$addColumnInfo(name = "Lower", type = "number", overtitle = overTitle)
  criTable$addColumnInfo(name = "Upper", type = "number", overtitle = overTitle)

  if (!is.null(model[["posteriors"]])) {
    cri <- model[["posteriors"]][["weightedRsqCri"]]
    df <- data.frame(
      rsq   = "R\u00B2",
      Mean  = model[["posteriors"]][["weightedRsqMean"]],
      Lower = cri[1L],
      Upper = cri[2L]
    )
    criTable$setData(df)
  } else {
    criTable[["rsq"]] <- "R\u00B2"
  }
  jaspResults[["tableBMACRI"]] <- criTable
  return()

}

.BANOVASamplingIssuesTable <- function(jaspResults, samplingIssues) {

  if (is.null(samplingIssues) || length(samplingIssues) == 0L || !is.null(jaspResults[["tableSamplingIssues"]]))
    return()

  issuesTable <- createJaspTable(title = gettext("<b><em>Warning: sampling issues encountered!</em></b>"),
                           # below model and effects table (which may be unaffected) but above estimates table
                           position = 2.5)

  issuesTable$addColumnInfo(name = "Models",           title = gettext("Affected model"),      type = "string")
  issuesTable$addColumnInfo(name = "percentageSucces", title = gettextf("%% useful samples"),  type = "number")
  issuesTable$addColumnInfo(name = "noSuccess",        title = gettext("Useful samples"),      type = "integer")
  issuesTable$addColumnInfo(name = "total",            title = gettext("Total samples drawn"), type = "integer")

  df <- data.frame(
    Models    = vapply(samplingIssues, `[[`, FUN.VALUE = character(1L), "model"),
    noSuccess = vapply(samplingIssues, `[[`, FUN.VALUE = integer(1L),   "remainingRows"),
    total     = vapply(samplingIssues, `[[`, FUN.VALUE = integer(1L),   "originalRows")
  )
  df[["percentageSucces"]] <- df[["noSuccess"]] / df[["total"]]

  if (any(df[["percentageSucces"]] < 0.25) || any(df[["remainingRows"]] < 1000L))
    issuesTable$addFootnote(
      gettextf("For some affected models, more than 75%% of the posterior samples failed, or fewer than 1000 samples remained for subsequent results. All model-averaged output may be biased and uninterpretable. Check the model specification and data for any odd patterns."),
      symbol = .BANOVAGetWarningSymbol()
    )

  issuesTable$setData(df)
  issuesTable$dependOn(
    # these options correspond to userNeedsPosteriorSamples inside .BANOVAestimatePosteriors
    options           = c("posteriorEstimates", "modelAveragedPosteriorPlot", "qqPlot", "rsqPlot", "criTable", "modelTerms"),
    optionsFromObject = jaspResults[["statePosteriors"]]
  )
  jaspResults[["tableSamplingIssues"]] <- issuesTable

}

.BANOVAGetWarningSymbol <- function() {
  gettext("<b><em>Warning.</em></b>")
}

# Plots wrappers ----
.BANOVAposteriorPlot <- function(jaspResults, dataset, options, model) {

  # meta wrapper for model averaged posterior plots
  if (!is.null(jaspResults[["posteriorPlot"]]) || !options$modelAveragedPosteriorPlot)
    return()

  posteriorPlotContainer <- createJaspContainer(title = gettext("Model Averaged Posterior Distributions"))
  jaspResults[["posteriorPlot"]] <- posteriorPlotContainer
  posteriorPlotContainer$dependOn(c("modelAveragedPosteriorPlot", "modelTerms", "credibleInterval", "groupPosterior",
                                    "repeatedMeasuresCells", "dependent"))
  posteriorPlotContainer$position <- 4

  if (is.null(model$models)) {
    posteriorPlotContainer[["dummyplot"]] <- createJaspPlot(title = gettext("Posterior distribution"), width = 400, height = 400,
                                                            plot = NULL)
  } else {
    posteriorPlotContainer$dependOn(optionsFromObject = jaspResults[["tableModelComparisonState"]])
    .BANOVAfillPosteriorPlotContainer(
      container       = posteriorPlotContainer,
      densities       = model$posterior$weightedDensities[, -1L, ], # omit intercept
      cris            = model$posterior$weightedCRIs[-1L, ],        # omit intercept
      isRandom        = model$posteriors$isRandom[-1L],             # omit intercept
      groupParameters = identical(options[["groupPosterior"]], "grouped")
    )
  }
  return()
}

.BANOVAfillPosteriorPlotContainer <- function(container, densities, cris, isRandom = NULL, groupParameters = FALSE) {

  allParamNames <- colnames(densities)

  tmp <- .BANOVAgetLevelsFromParamNames(allParamNames)
  plotTitles <- jaspBase::gsubInteractionSymbol(tmp[, "parameter"])
  xNames <- tmp[, "level"]

  if (is.null(isRandom)) {
    indices <- seq_along(allParamNames)
  } else {
    indices <- which(!isRandom)
  }

  if (groupParameters) {

    indices <- split(indices, plotTitles[indices])[order(unique(plotTitles[indices]))]
    nms <- names(indices)
    for (i in seq_along(indices)) {

      ind <- indices[[i]]

      if (all(c(densities[, ind, "y"]) == 0)) { # only TRUE is never sampled and set to 0
        plot <- createJaspPlot(title = nms[i], width = 400, height = 400)
        plot$setError(gettext("Could not plot posterior distribution. Check if an error occurred for models including this parameter."))
      } else {
        # make prior posterior plot
        dfLines <- data.frame(x = c(densities[, ind, "x"]),
                              y = c(densities[, ind, "y"]),
                              g = rep(xNames[ind], each = nrow(densities)))

        xBreaks <- jaspGraphs::getPrettyAxisBreaks(dfLines$x)
        yBreaks <- jaspGraphs::getPrettyAxisBreaks(c(0, dfLines$y))
        breaksYmax <- yBreaks[length(yBreaks)]
        obsYmax <- max(dfLines$y)
        newymax <- max(1.25 * obsYmax, breaksYmax)

        lInd <- length(ind)
        if (lInd == 1L) {
          plusmin <- 0
          showLegend <- FALSE
        } else {
          # some jitter in CRI height
          showLegend <- TRUE
          plusmin <- rep(-1, lInd)
          plusmin[seq(2, lInd, 2)] <- 0
          if (lInd > 2)
            plusmin[seq(3, lInd, 3)] <- 1
        }

        dfCri <- data.frame(
          xmin = cris[ind, 1L],
          xmax = cris[ind, 2L],
          y = (newymax - obsYmax)/2 + obsYmax + plusmin * (newymax - obsYmax) / 10,
          g = xNames[ind]
        )

        if (showLegend) {
          # if two distributions are remarkably similar, i.e., have nearly identical credible intervals,
          # we add a linetype aestethic
          tol <- 1e-4
          distMin <- which(stats::dist(dfCri$xmin) < tol)
          distMax <- which(stats::dist(dfCri$xmin) < tol)
          distBoth <- intersect(distMin, distMax)
          if (length(distBoth) > 0) {
            aesLine <- ggplot2::aes(x = x, y = y, g = g, color = g, linetype = g)
            aesErrorbar <- ggplot2::aes(xmin = xmin, xmax = xmax, y = y, group = g, color = g, linetype = g)
          } else {
            aesLine <- ggplot2::aes(x = x, y = y, g = g, color = g)
            aesErrorbar <- ggplot2::aes(xmin = xmin, xmax = xmax, y = y, group = g, color = g)
          }
        } else {
          # don't use color for single density plots (covariates)
          aesLine <- ggplot2::aes(x = x, y = y, g = g)
          aesErrorbar <- ggplot2::aes(xmin = xmin, xmax = xmax, y = y, group = g)
        }


        maxheight <- min(newymax - dfCri$y[1:min(lInd, 3)])
        xlab <- nms[i]
        # ggplot doesn't like our fancy unicode * so we need to escape it
        xlab <- stringi::stri_escape_unicode(xlab)
        # change escaped version into x
        xlab <- gsub(pattern = "\\u2009\\u273b\\u2009", replacement = " x ", x = xlab, fixed = TRUE)

        ncolLegend <- ceiling(lInd / 14)
        guideLegend <- ggplot2::guide_legend(title = gettext("Level"), keywidth = 0.25, keyheight = 0.1, default.unit = "inch",
                                             ncol = ncolLegend)
        p <- ggplot2::ggplot(data = dfLines, mapping = aesLine) +
          ggplot2::geom_line(size = 1.1) +
          ggplot2::geom_errorbarh(data = dfCri, mapping = aesErrorbar, height = maxheight, size = 1.1,
                                  inherit.aes = FALSE) +
          ggplot2::scale_x_continuous(name = xlab,      breaks = xBreaks, limits = range(xBreaks)) +
          ggplot2::scale_y_continuous(name = gettext("Density"), breaks = yBreaks, limits = c(0, newymax)) +
          colorspace::scale_color_discrete_qualitative() +
          ggplot2::scale_linetype() +
          ggplot2::guides(color = guideLegend, linetype = guideLegend)
        p <- jaspGraphs::themeJasp(p, legend.position = if (showLegend) "right" else "none")

        plot <- createJaspPlot(title = nms[i], width = 400, height = 400, plot = p)
      }
      container[[allParamNames[i]]] <- plot
    }
  } else {
    for (i in indices) {

      # make prior posterior plot
      df <- data.frame(x = densities[, i, "x"],
                       y = densities[, i, "y"])

      p <- jaspGraphs::PlotPriorAndPosterior(
        dfLines    = df,
        xName      = xNames[i],
        CRI        = cris[i, ],
        drawCRItxt = FALSE
      )

      plot <- createJaspPlot(title = plotTitles[i], width = 400, height = 400, plot = p)
      container[[allParamNames[i]]] <- plot
    }
  }
  return()

}

.BANOVAqqplot <- function(jaspResults, options, model) {

  if (!is.null(jaspResults[["QQplot"]]) || !options[["qqPlot"]])
    return()

  plot <- createJaspPlot(
    title       = gettext("Model Averaged Q-Q Plot"),
    width       = 400,
    height      = 400,
    aspectRatio = 1
  )

  if (!is.null(model[["models"]])) {
    plot$plotObject <- jaspGraphs::plotQQnorm(
      residuals = model[["posteriors"]][["weightedResidSumStats"]][,"mean"],
      lower     = model[["posteriors"]][["weightedResidSumStats"]][,"cri.2.5%"],
      upper     = model[["posteriors"]][["weightedResidSumStats"]][,"cri.97.5%"],
      ablineColor = "darkred"
    )
    plot$dependOn(optionsFromObject = jaspResults[["tableModelComparisonState"]])
  }

  plot$dependOn(c("qqPlot", "modelTerms", "credibleInterval", "repeatedMeasuresCells", "dependent"))
  plot$position <- 5
  jaspResults[["QQplot"]] <- plot
  return()
}

.BANOVArsqplot <- function(jaspResults, options, model) {

  if (!is.null(jaspResults[["rsqplot"]]) || !options[["rsqPlot"]])
    return()

  plot <- createJaspPlot(
    title       = gettextf("Model Averaged Posterior R%s","\u00B2"),
    width       = 400,
    height      = 400,
    aspectRatio = 1
  )

  if (!is.null(model[["models"]])) {
    dd     <- model[["posteriors"]][["weightedRsqDens"]]
    rsqCri <- model[["posteriors"]][["weightedRsqCri"]]

    df <- data.frame(x = dd$x, y = dd$y)
    xName <- expression(R^2)
    plot$plotObject <- jaspGraphs::PlotPriorAndPosterior(dfLines = df, xName = xName, CRI = rsqCri, drawCRItxt = FALSE)
    plot$dependOn(optionsFromObject = jaspResults[["tableModelComparisonState"]])
  }

  plot$dependOn(c("rsqPlot", "modelTerms", "credibleInterval", "repeatedMeasuresCells", "dependent"))
  plot$position <- 6
  jaspResults[["rsqplot"]] <- plot
  return()
}

# Post hoc comparison ----
.BANOVAnullControlPostHocTable <- function(jaspResults, dataset, options, model) {

  if (length(options$postHocTerms) == 0L)
    return()

  postHocCollection <- jaspResults[["collectionPosthoc"]]
  if (is.null(postHocCollection)) {
    postHocCollection <- createJaspContainer(title = gettext("Post Hoc Tests"))
    postHocCollection$position <- 8
    postHocCollection$addCitation(.BANOVAcitations[3:4])
    postHocCollection$dependOn(c("dependent", "repeatedMeasuresCells", "postHocNullControl", "bayesFactorType"))
    jaspResults[["collectionPosthoc"]] <- postHocCollection
  }

  # the same footnote for all the tables
  footnote <- gsub("[\r\n\t]", "",
    gettext("The posterior odds have been corrected for multiple testing by fixing to 0.5 the prior probability that the null hypothesis holds across all comparisons (Westfall, Johnson, & Utts, 1997). Individual comparisons are based on the default t-test with a Cauchy (0, r = 1/sqrt(2)) prior. The \"U\" in the Bayes factor denotes that it is uncorrected."))

  bfTxt <- if (options[["postHocNullControl"]]) ", U" else ""

  switch(options[["bayesFactorType"]],
    BF10    = { bf.title <- paste0("BF<sub>10",     bfTxt, "</sub>")  },
    BF01    = { bf.title <- paste0("BF<sub>01",     bfTxt, "</sub>")  },
    LogBF10 = { bf.title <- paste0("Log(BF<sub>10", bfTxt, "</sub>)") }
   )

  priorWidth <- 1 / sqrt(2)
  posthoc.variables <- unlist(options[["postHocTerms"]])
  if (model[["analysisType"]] == "RM-ANOVA") {
    dependent <- .BANOVAdependentName
  } else {
    dependent <- options[["dependent"]]
  }

  for (posthoc.var in posthoc.variables) {

    # does the table already exist?
    if (!is.null(postHocCollection[[paste0("postHoc_", posthoc.var)]]))
      next

    postHocTable <- createJaspTable(title = paste0("Post Hoc Comparisons - ", posthoc.var))

    postHocTable$addColumnInfo(name = "(I)",            type = "string", title = "", combine=TRUE)
    postHocTable$addColumnInfo(name = "(J)",            type = "string", title = "")
    postHocTable$addColumnInfo(name = "Prior Odds",     type = "number", title = gettext("Prior Odds"))
    postHocTable$addColumnInfo(name = "Posterior Odds", type = "number", title = gettext("Posterior Odds"))
    postHocTable$addColumnInfo(name = "BF",             type = "number", title = bf.title)
    postHocTable$addColumnInfo(name = "error %",        type = "number", title = gettextf("error %%"))

    postHocTable$dependOn(optionContainsValue = list("postHocTerms" = posthoc.var))

    if (options[["postHocNullControl"]])
      postHocTable$addFootnote(footnote)

    if (is.null(model$models)) { # only show empty table
      postHocCollection[[paste0("postHoc_", posthoc.var)]] <- postHocTable
      next
    }

    fixed <- unlist(c(options$fixedFactors, sapply(options$repeatedMeasuresFactors, `[[`, "name")))
    if (model$analysisType == "RM-ANOVA" && posthoc.var %in% fixed && !posthoc.var %in% options$betweenSubjectFactors) {
      variable.levels <- options$repeatedMeasuresFactors[[which(lapply(options$repeatedMeasuresFactors, function(x) x$name) == posthoc.var)]]$levels
      paired <- TRUE
    } else if (posthoc.var %in% c(options$fixedFactors, options$betweenSubjectFactors, options$randomFactors)) {
      variable.levels <- levels(dataset[[posthoc.var]])
      paired <- FALSE
    } else {
      next
    }

    if (length(variable.levels) < 2L)
      next

    pairs <- utils::combn(variable.levels, 2)

    splitname <- if (model[["analysisType"]] == "RM-ANOVA") .BANOVAdependentName else dependent
    allSplits <- split(dataset[[splitname]], dataset[[posthoc.var]])

    errMessages <- NULL
    for (i in 1:ncol(pairs)) {

      if (!is.null(model$models)) {

        x <- na.omit(allSplits[[pairs[1L, i]]])
        y <- na.omit(allSplits[[pairs[2L, i]]])

        ttest <- try(BayesFactor::ttestBF(x = x, y = y, rscale = priorWidth, paired = paired), silent=TRUE)

        if (isTryError(ttest)) {

          priorOdds <- postOdds <- logBF <- error <- "NaN"
          errorMsg <- .extractErrorMessage(ttest)
          if (errorMsg %in% names(errMessages)) {
            errMessages[[errorMsg]][["row_names"]] <- c(errMessages[[errorMsg]][["row_names"]], i)
          } else {
            errMessages[[errorMsg]] <- list(
              message = if (endsWith(errorMsg, ".")) errorMsg else paste0(errorMsg, "."),
              row_names = i
            )
          }

        } else {

          if (options[["postHocNullControl"]]) {
            pH0 <- 0.5^(2 / length(variable.levels))
          } else {
            pH0 <- 0.5
          }

          logBF <- ttest@bayesFactor$bf # <- log(BF10)
          if (options[["bayesFactorType"]] == "BF01") {
            priorOdds <- pH0 / (1 - pH0)
            logBF <- -logBF
          } else {
            priorOdds <- (1 - pH0) / pH0
          }

          postOdds <- log(priorOdds) + logBF
          postOdds <- exp(postOdds)
          if (options[["bayesFactorType"]] != "LogBF10")
            logBF <- exp(logBF)

          error <- ttest@bayesFactor$error * 100
        }
      }

      row <- list(
        "(I)"            = pairs[1L, i],
        "(J)"            = pairs[2L, i],
        "Prior Odds"     = priorOdds,
        "Posterior Odds" = postOdds,
        "BF"             = logBF,
        "error %"        = error
      )
      postHocTable$addRows(row, rowNames = paste0("row", i))
    }

    if (!is.null(errMessages)) {
      for (i in seq_along(errMessages))
        postHocTable$addFootnote(message  = errMessages[[i]][["message"]],
                                 rowNames = paste0("row", errMessages[[i]][["row_names"]]))

    }
    postHocCollection[[paste0("postHoc_", posthoc.var)]] <- postHocTable
  }
  return()
}

# Data reading ----
.BANOVAreadData <- function(dataset, options, analysisType) {

  if (is.null(dataset)) {
    if (analysisType == "RM-ANOVA")
      return(.BANOVAreadRManovaData(dataset, options))

    numeric.vars <- c(unlist(options$covariates), unlist(options$dependent))
    numeric.vars <- numeric.vars[numeric.vars != ""]

    factor.vars <- c(unlist(options$fixedFactors), unlist(options$randomFactors))
    factor.vars <- factor.vars[factor.vars != ""]

    dataset <- .readDataSetToEnd(
      columns.as.numeric  = numeric.vars,
      columns.as.factor   = factor.vars,
      exclude.na.listwise = c(numeric.vars, factor.vars)
    )
  }

  return(dataset)
}

.BANOVAdependentName <- "JaspColumn_.dependent._Encoded"
.BANOVAsubjectName <- "JaspColumn_.subject._Encoded"

.BANOVAdecodeSubject <- function(string) {
  return(gsub(.BANOVAsubjectName, "subject", string))
}

.BANOVAdecodeNuisance <- function(nuisance) {
  # .BANOVAsubjectName needs to be handled separately
  idx <- nuisance == .BANOVAsubjectName
  nuisance[idx]  <- "subject"
  nuisance[!idx] <- jaspBase::gsubInteractionSymbol(nuisance[!idx])
  return(nuisance)
}

.BANOVAreadRManovaData <- function(dataset, options) {

  if (!("" %in% options$repeatedMeasuresCells)) {
    rm.vars       <- options$repeatedMeasuresCells

    bs.factors    <- options$betweenSubjectFactors
    bs.covariates <- options$covariates
    rm.factors    <- options$repeatedMeasuresFactors
    all.variables <- c (bs.factors, bs.covariates, rm.vars)

    dataset <- .readDataSetToEnd(
      columns.as.numeric  = c(rm.vars, bs.covariates),
      columns.as.factor   = bs.factors,
      exclude.na.listwise = all.variables
    )
    dataset <- try(
      .shortToLong(dataset, rm.factors, rm.vars, c(bs.factors, bs.covariates), dependentName = .BANOVAdependentName, subjectName = .BANOVAsubjectName),
      silent = TRUE
    )

  }
  return(dataset)
}

# Descriptives ----
.BANOVAdescriptives <- function(jaspResults, dataset, options, errors, analysisType, ready = TRUE, position = 9001) {
  if (!ready)
    return()

  # the main use of this function is that descriptives can now be reused for the frequentist ANOVAs
  # without the container, the position could mess things up
  descriptivesContainer <- jaspResults[["descriptivesContainer"]]
  if (is.null(descriptivesContainer)) {
    descriptivesContainer <- createJaspContainer(title = gettext("Descriptives"))
    descriptivesContainer$dependOn(c("dependent", "repeatedMeasuresCells"))
    descriptivesContainer$position <- position

    jaspResults[["descriptivesContainer"]] <- descriptivesContainer
  }

  .BANOVAdescriptivesTable   (descriptivesContainer, dataset, options, errors, analysisType)
  .BANOVAdescriptivesPlots   (descriptivesContainer, dataset, options, errors, analysisType)
  .BANOVAbarPlots            (descriptivesContainer, dataset, options, errors, analysisType)
  .BANOVArainCloudPlots      (descriptivesContainer, dataset, options, errors, analysisType)
  return()

}

.BANOVAdescriptivesTable <- function(jaspContainer, dataset, options, errors, analysisType) {

  if (!options[["descriptives"]] || !is.null(jaspContainer[["tableDescriptives"]]))
    return()

  if (analysisType == "RM-ANOVA") {
    dependent <- .BANOVAdependentName
    fixed <- unlist(c(lapply(options[["repeatedMeasuresFactors"]], `[[`, "name"), options[["betweenSubjectFactors"]]))
    title <- gettext("Descriptives")
  } else {
    dependent <- options[["dependent"]]
    fixed <- options[["fixedFactors"]]
    if (!is.null(dependent) && dependent != "") {
      title <- gettextf("Descriptives - %s", options$dependent)
    } else {
      title <- gettext("Descriptives")
    }
  }

  descriptivesTable <- createJaspTable(title = title)
  descriptivesTable$position <- 1
  descriptivesTable$dependOn(c("dependent", "fixedFactors", "betweenSubjectFactors", "descriptives",
                               "credibleInterval"))

  # internal names use base64 so they don't have " " which R changes into "." because R does that to dataframe names.
  # Also adds a . in case the base64 is magically "Mean", "SD" or "N"
  fixedDot <- paste0(fixed, ".")
  for (i in seq_along(fixed))
    descriptivesTable$addColumnInfo(name = fixedDot[i], type = "string", title = fixed[i], combine = TRUE)

  descriptivesTable$addColumnInfo(name = "N",               title = gettext("N"),                        type = "integer")
  descriptivesTable$addColumnInfo(name = "Mean",            title = gettext("Mean"),                     type = "number")
  descriptivesTable$addColumnInfo(name = "SD",              title = gettext("SD"),                       type = "number")
  descriptivesTable$addColumnInfo(name = "SE",              title = gettext("SE"),                       type = "number")
  descriptivesTable$addColumnInfo(name = "coefOfVariation", title = gettext("Coefficient of variation"), type = "number")

  if (!is.null(options[["credibleInterval"]])) {
    overTitle <- gettextf("%s%% Credible Interval", format(100 * options[["credibleInterval"]], digits = 3))
    descriptivesTable$addColumnInfo(name = gettext("Lower"), type = "number", overtitle = overTitle)
    descriptivesTable$addColumnInfo(name = gettext("Upper"), type = "number", overtitle = overTitle)
  }

  descriptivesTable$showSpecifiedColumnsOnly <- TRUE
  jaspContainer[["tableDescriptives"]] <- descriptivesTable

  if (errors$noVariables)
    return()

  # order the data to show
  dataset2 <- dataset[do.call(order, dataset[, fixed, drop = FALSE]), c(dependent, fixed)]

  # by pasting the fixedFactors together we obtain the unique indices to group on. This excludes
  # non-existent combinations. A "." is added to deal with the level "".
  ind <- apply(dataset2[, fixed, drop = FALSE], 1L, paste0, ".", collapse = "")

  # temporary function to calculate all descriptives
  tmpFun <- function(data, fixed, dependent, cri) {

    row <- list()
    for (j in fixed)
      row[[paste0(j, ".")]] <- as.character(data[1L, j])

    N <- nrow(data)
    row[["N"]] <- N

    if (N == 0L) {

      row[["Mean"]] <- row[["SD"]] <- row[["SE"]] <- row[["coefOfVariation"]] <- NA

    } else if (N == 1L) {

      row[["Mean"]] <- data[[dependent]]
      row[["SD"]] <- row[["SE"]] <- row[["coefOfVariation"]] <- row[["Lower"]] <- row[["Upper"]] <- NA

    } else {

      row[["Mean"]] <- mean(data[[dependent]])
      row[["SD"]]   <- stats::sd(data[[dependent]])
      row[["SE"]]   <- stats::sd(data[[dependent]]) / sqrt(N)
      row[["coefOfVariation"]] <- stats::sd(data[[dependent]]) / mean(data[[dependent]])


      tmp <- jaspTTests::.posteriorSummaryGroupMean(data[[dependent]], cri)
      row[["Lower"]] <- tmp[["ciLower"]]
      row[["Upper"]] <- tmp[["ciUpper"]]

    }
    return(row)
  }

  # apply tempFun on each subset defined by ind
  rows <- by(dataset2, ind, tmpFun, fixed = fixed,
             dependent = dependent, cri = options[["credibleInterval"]])

  # do.call(rbind, rows) turns rows into a data.frame (from a list) for jaspResults
  data <- do.call(rbind.data.frame, rows)

  # the results of `by` are in alphabetical order, so we reorder the data.frame to match the factor level order. Fixes https://github.com/jasp-stats/jasp-issues/issues/1741
  newOrder <- match(unique(ind), rownames(data))
  data <- data[newOrder, ]

  # add footnote if there are unobserved combinations
  nObserved <- nrow(data)
  nPossible <- prod(sapply(dataset2[, fixed, drop = FALSE], nlevels))
  if (nObserved != nPossible) {
    descriptivesTable$addFootnote(
      message = gettextf(
        "Some combinations of factors are not observed and hence omitted (%1$g out of %2$g combinations are unobserved).",
        nPossible - nObserved, nPossible
      )
    )
  }

  descriptivesTable$setData(data)

  return()
}

.BANOVAdescriptivesPlots <- function(jaspContainer, dataset, options, errors, analysisType) {

  if (length(options[["descriptivePlotHorizontalAxis"]]) == 0L
      || options[["descriptivePlotHorizontalAxis"]] == ""
      || !is.null(jaspContainer[["containerDescriptivesPlots"]]))
    return()

  descriptivesPlotContainer <- createJaspContainer(title = gettext("Descriptives plots"))
  descriptivesPlotContainer$position <- 2
  jaspContainer[["containerDescriptivesPlots"]] <- descriptivesPlotContainer

  # either Bayesian or Frequentist anova
  if (is.null(options$descriptivePlotErrorBarType)) { # TRUE implies Bayesian
    plotErrorBars <- options$descriptivePlotCi
    errorBarType  <- "ci"
    conf.interval <- options$descriptivePlotCiLevel
    moreyCorrection <- TRUE
    descriptivesPlotContainer$dependOn(c("dependent", "descriptivePlotCi", "descriptivePlotCiLevel"))

  } else {
    plotErrorBars <- options$descriptivePlotErrorBar
    errorBarType  <- options$descriptivePlotErrorBarType
    conf.interval <- options$descriptivePlotCiLevel
    moreyCorrection <-  if (is.null(options[["applyMoreyCorrectionErrorBars"]])) FALSE else options[["applyMoreyCorrectionErrorBars"]]
    descriptivesPlotContainer$dependOn(c("dependent", "descriptivePlotErrorBar", "descriptivePlotErrorBarType", "descriptivePlotCiLevel",
                                         "descriptivePlotErrorBarPooled", "applyMoreyCorrectionErrorBars"))

  }
  usePooledSE <- if (is.null(options[["descriptivePlotErrorBarPooled"]])) FALSE else options[["descriptivePlotErrorBarPooled"]]

  descriptivesPlotContainer$dependOn(c("descriptivePlotHorizontalAxis", "descriptivePlotSeparateLines", "descriptivePlotSeparatePlot", "descriptivePlotYAxisLabel"))

  if (errors$noVariables) {
    descriptivesPlotContainer[["dummyplot"]] <- createJaspPlot(title = gettext("Descriptives Plot"))
    return()
  }

  groupVars <- c(options$descriptivePlotHorizontalAxis, options$descriptivePlotSeparateLines, options$descriptivePlotSeparatePlot)
  groupVars <- groupVars[groupVars != ""]
  if (analysisType == "RM-ANOVA") {
    dependent <- .BANOVAdependentName
    yLabel <- options[["descriptivePlotYAxisLabel"]]
  } else {
    dependent<- options$dependent
    yLabel <- options[["dependent"]]
  }

  betweenSubjectFactors <- groupVars[groupVars %in% options$betweenSubjectFactors]
  repeatedMeasuresFactors <- groupVars[groupVars %in% sapply(options$repeatedMeasuresFactors, `[[`, "name")]

  if (length(repeatedMeasuresFactors) == 0) {
    summaryStat <- jaspTTests::.summarySE(as.data.frame(dataset), measurevar = dependent, groupvars = groupVars,
                                         conf.interval = conf.interval, na.rm = TRUE, .drop = FALSE,
                                         errorBarType = errorBarType, dependentName = .BANOVAdependentName,
                                         subjectName = .BANOVAsubjectName)
  } else {
    summaryStat <- jaspTTests::.summarySEwithin(as.data.frame(dataset), measurevar= .BANOVAdependentName,
                                               betweenvars = betweenSubjectFactors,
                                               withinvars = repeatedMeasuresFactors,
                                               idvar = .BANOVAsubjectName,
                                               conf.interval = conf.interval,
                                               na.rm=TRUE, .drop = FALSE, errorBarType = errorBarType,
                                               usePooledSE = usePooledSE,
                                               useMoreyCorrection = moreyCorrection,
                                               dependentName = .BANOVAdependentName,
                                               subjectName = .BANOVAsubjectName)
  }

  if (options[["descriptivePlotHorizontalAxis"]] %in% options[["covariates"]]) {
    splitScatterOptions                                       <- options
    splitScatterOptions[["colorPalette"]]                     <- "colorblind3"
    splitScatterOptions[["scatterPlotLegend"]]                <- TRUE
    splitScatterOptions[["scatterPlotRegressionLine"]]        <- TRUE
    splitScatterOptions[["scatterPlotRegressionLineCi"]]      <- plotErrorBars
    splitScatterOptions[["scatterPlotRegressionLineType"]]    <- "linear"
    splitScatterOptions[["scatterPlotGraphTypeAbove"]]        <- "none"
    splitScatterOptions[["scatterPlotGraphTypeRight"]]        <- "none"
    splitScatterOptions[["scatterPlotRegressionLineCiLevel"]] <- options[["descriptivePlotCiLevel"]]

    if (options$descriptivePlotSeparatePlot != "") {

      for (thisLevel in levels(dataset[[options[["descriptivePlotSeparatePlot"]]]])) {

        subData <- dataset[dataset[[options[["descriptivePlotSeparatePlot"]]]] == thisLevel, ]
        thisPlotName <- paste0(options[["descriptivePlotHorizontalAxis"]], " - ", options[["dependent"]], ": ",
                               options[["descriptivePlotSeparatePlot"]], " = ", thisLevel)
        jaspDescriptives::.descriptivesScatterPlots(descriptivesPlotContainer, subData, c(options[["descriptivePlotHorizontalAxis"]], options[["dependent"]]),
                                  split = options[["descriptivePlotSeparateLines"]], options = splitScatterOptions, name = thisPlotName,
                                  dependOnVariables = FALSE)
      }

    } else {
      jaspDescriptives::.descriptivesScatterPlots(descriptivesPlotContainer, dataset, c(options[["descriptivePlotHorizontalAxis"]], options[["dependent"]]),
                                split = options[["descriptivePlotSeparateLines"]], options = splitScatterOptions, dependOnVariables = FALSE)
    }

    return()

  }

  colnames(summaryStat)[colnames(summaryStat) == dependent] <- "dependent"

  if (options$descriptivePlotHorizontalAxis != "") {
    colnames(summaryStat)[colnames(summaryStat) == options$descriptivePlotHorizontalAxis] <- "descriptivePlotHorizontalAxis"
  }

  if (options$descriptivePlotSeparateLines != "") {
    colnames(summaryStat)[colnames(summaryStat) == options$descriptivePlotSeparateLines] <- "descriptivePlotSeparateLines"
  }

  if (options$descriptivePlotSeparatePlot != "") {
    colnames(summaryStat)[colnames(summaryStat) == options$descriptivePlotSeparatePlot] <- "descriptivePlotSeparatePlot"
  }

  base_breaks_x <- function(x){
    b <- unique(as.numeric(x))
    d <- data.frame(y=-Inf, yend=-Inf, x=min(b), xend=max(b))
    list(ggplot2::geom_segment(data=d, ggplot2::aes(x=x, y=y, xend=xend, yend=yend), inherit.aes=FALSE, size = 1))
  }

  base_breaks_y <- function(x, plotErrorBars){
    if (plotErrorBars) {
      ci.pos <- c(x[,"dependent"], x[,"dependent"]-x[,"ci"],x[,"dependent"]+x[,"ci"])
      b <- pretty(ci.pos)
      d <- data.frame(x=-Inf, xend=-Inf, y=min(b), yend=max(b))
      list(ggplot2::geom_segment(data=d, ggplot2::aes(x=x, y=y, xend=xend, yend=yend), inherit.aes=FALSE, size = 1),
           ggplot2::scale_y_continuous(breaks=c(min(b),max(b))))
    } else {
      b <- pretty(x[,"dependent"])
      d <- data.frame(x=-Inf, xend=-Inf, y=min(b), yend=max(b))
      list(ggplot2::geom_segment(data=d, ggplot2::aes(x=x, y=y, xend=xend, yend=yend), inherit.aes=FALSE, size = 1),
           ggplot2::scale_y_continuous(breaks=c(min(b),max(b))))
    }
  }

  if (options$descriptivePlotSeparatePlot != "") {
    subsetPlots <- levels(summaryStat[,"descriptivePlotSeparatePlot"])
    nPlots <- length(subsetPlots)
  } else {
    nPlots <- 1
  }

  for (i in seq_len(nPlots)) {

    if (nPlots > 1L) {
      title <- paste(options$descriptivePlotSeparatePlot,": ",subsetPlots[i], sep = "")
    } else {
      title <- ""
    }
    descriptivesPlot <- createJaspPlot(title = title)
    descriptivesPlotContainer[[title]] <- descriptivesPlot

    descriptivesPlot$height <- 300
    if (options$descriptivePlotSeparateLines != "") {
      descriptivesPlot$width <- 430
    } else {
      descriptivesPlot$width <- 300
    }

    if (options$descriptivePlotSeparatePlot != "") {
      summaryStatSubset <- subset(summaryStat,summaryStat[,"descriptivePlotSeparatePlot"] == subsetPlots[i])
    } else {
      summaryStatSubset <- summaryStat
    }

    if (options$descriptivePlotSeparateLines == "") {

      p <- ggplot2::ggplot(summaryStatSubset, ggplot2::aes(x=descriptivePlotHorizontalAxis,
                                                           y=dependent,
                                                           group=1))

    } else {

      p <- ggplot2::ggplot(summaryStatSubset, ggplot2::aes(x=descriptivePlotHorizontalAxis,
                                                           y=dependent,
                                                           group=descriptivePlotSeparateLines,
                                                           shape=descriptivePlotSeparateLines,
                                                           fill=descriptivePlotSeparateLines))

    }

    if (plotErrorBars && !(options[["descriptivePlotHorizontalAxis"]] %in% options[["covariates"]])) {

      pd <- ggplot2::position_dodge(.2)
      p = p + ggplot2::geom_errorbar(ggplot2::aes(ymin=ciLower,
                                                  ymax=ciUpper),
                                     colour="black", width=.2, position=pd)

    } else {

      pd <- ggplot2::position_dodge(0)

    }

    guideLegend <- ggplot2::guide_legend(nrow = min(10, nlevels(summaryStatSubset$descriptivePlotSeparateLines)),
                                         title = options$descriptivePlotSeparateLines, keywidth = 0.1, keyheight = 0.3,
                                         default.unit = "inch")

    if (options[["descriptivePlotHorizontalAxis"]] %in% options[["covariates"]]) {
      line <- ggplot2::geom_smooth(method = "lm", size = .7, color = "black", se = FALSE)
      addHorizontalVar <- summaryStatSubset[,"descriptivePlotHorizontalAxis"]
    } else {
      line <- ggplot2::geom_line(position=pd, size = .7)
    }

    if (plotErrorBars) {
      ci.pos <- c(summaryStatSubset[,"dependent"],
                  summaryStatSubset[,"dependent"]-summaryStatSubset[,"ci"],
                  summaryStatSubset[,"dependent"]+summaryStatSubset[,"ci"],
                  min(summaryStatSubset[,"dependent"])*1.1,
                  max(summaryStatSubset[,"dependent"])*1.1)
      yBreaks <- jaspGraphs::getPrettyAxisBreaks(ci.pos)
    } else {
      yBreaks <- jaspGraphs::getPrettyAxisBreaks(c(summaryStatSubset[,"dependent"],
                                                   min(summaryStatSubset[,"dependent"])*1.1,
                                                   max(summaryStatSubset[,"dependent"])*1.1))
    }

    if (options[["descriptivePlotHorizontalAxis"]] %in% options[["covariates"]]) {
      ggXaxis <- ggplot2::scale_x_continuous(breaks = jaspGraphs::getPrettyAxisBreaks(summaryStatSubset[,"descriptivePlotHorizontalAxis"]))
    } else {
      ggXaxis <- ggplot2::scale_x_discrete(breaks = jaspGraphs::getPrettyAxisBreaks(summaryStatSubset[,"descriptivePlotHorizontalAxis"]))
    }

    p <- p + line +
      ggplot2::geom_point(position=pd, size=4) +
      ggplot2::scale_fill_manual(values = c(rep(c("white","black"),5),rep("grey",100)), guide=guideLegend) +
      ggplot2::scale_shape_manual(values = c(rep(c(21:25),each=2),21:25,7:14,33:112), guide=guideLegend) +
      ggplot2::scale_color_manual(values = rep("black",200),guide=guideLegend) +
      ggplot2::labs(y = yLabel, x = options[["descriptivePlotHorizontalAxis"]]) +
      ggplot2::scale_y_continuous(breaks = yBreaks, limits = range(yBreaks)) +
      ggXaxis +
      jaspGraphs::geom_rangeframe() +
      jaspGraphs::themeJaspRaw(legend.position = "right")

    descriptivesPlot$plotObject <- p
  }
  return()
}

.BANOVAbarPlots <- function(jaspContainer, dataset, options, errors, analysisType) {

  if (length(options[["barPlotHorizontalAxis"]]) == 0L
      || options[["barPlotHorizontalAxis"]] == ""
      || !is.null(jaspContainer[["containerBarPlots"]]))
    return()

  barPlotContainer <- createJaspContainer(title = gettext("Bar plots"))
  barPlotContainer$position <- 3
  jaspContainer[["containerBarPlots"]] <- barPlotContainer

  plotErrorBars <- options[["barPlotErrorBars"]]
  errorBarType  <- options[["barPlotErrorBarType"]]
  confInterval <- options[["barPlotCiInterval"]]
  barPlotContainer$dependOn(c("dependent", "barPlotErrorBars", "barPlotErrorBarType", "applyMoreyCorrectionErrorBarsBarplot",
                              "barPlotHorizontalZeroFix", "barPlotCiInterval", "usePooledStandErrorCITwo"))

  usePooledSE <- if (is.null(options[["usePooledStandErrorCITwo"]])) FALSE else options[["usePooledStandErrorCITwo"]]
  useMoreyCorrection <- if (is.null(options[["applyMoreyCorrectionErrorBarsBarplot"]])) TRUE else options[["applyMoreyCorrectionErrorBarsBarplot"]]

  barPlotHorizontalZeroFix <- options[["barPlotHorizontalZeroFix"]]
  barPlotContainer$dependOn(c("barPlotHorizontalAxis", "barPlotSeparatePlots", "labelYAxisTwo"))

  if (errors$noVariables) {
    barPlotContainer[["dummyplot"]] <- createJaspPlot(title = gettext("Bar Plot"))
    return()
  }

  groupVars <- c(options[["barPlotHorizontalAxis"]], options[["barPlotSeparatePlots"]])
  groupVars <- groupVars[groupVars != ""]
  if (analysisType == "RM-ANOVA") {
    dependent <- .BANOVAdependentName
    yLabel <- options[["labelYAxisTwo"]]
  } else {
    dependent<- options[["dependent"]]
    yLabel <- options[["dependent"]]
  }

  betweenSubjectFactors <- groupVars[groupVars %in% options[["betweenSubjectFactors"]]]
  repeatedMeasuresFactors <- groupVars[groupVars %in% sapply(options[["repeatedMeasuresFactors"]], `[[`, "name")]

  if (length(repeatedMeasuresFactors) == 0) {
    summaryStat <- jaspTTests::.summarySE(as.data.frame(dataset), measurevar = dependent, groupvars = groupVars,
                                         conf.interval = confInterval, na.rm = TRUE, .drop = FALSE,
                                         errorBarType = errorBarType, dependentName = .BANOVAdependentName,
                                         subjectName = .BANOVAsubjectName)
  } else {
    summaryStat <- jaspTTests::.summarySEwithin(as.data.frame(dataset), measurevar = .BANOVAdependentName,
                                               betweenvars = betweenSubjectFactors,
                                               withinvars = repeatedMeasuresFactors,
                                               idvar = .BANOVAsubjectName,
                                               conf.interval = confInterval,
                                               na.rm=TRUE, .drop = FALSE, errorBarType = errorBarType,
                                               usePooledSE = usePooledSE,
                                               useMoreyCorrection =  useMoreyCorrection,
                                               dependentName = .BANOVAdependentName,
                                               subjectName = .BANOVAsubjectName)
  }

  colnames(summaryStat)[colnames(summaryStat) == dependent] <- "dependent"

  if (options[["barPlotHorizontalAxis"]] != "") {
    colnames(summaryStat)[colnames(summaryStat) == options[["barPlotHorizontalAxis"]]] <- "barPlotHorizontalAxis"
  }

  if (options[["barPlotSeparatePlots"]] != "") {
    colnames(summaryStat)[colnames(summaryStat) == options[["barPlotSeparatePlots"]]] <- "barPlotSeparatePlots"
    subsetPlots <- levels(summaryStat[, "barPlotSeparatePlots"])
    nPlots <- length(subsetPlots)
  } else {
    nPlots <- 1L
  }

  for (i in seq_len(nPlots)) {

    title <- if (nPlots > 1L) {
      paste0(options[["barPlotSeparatePlots"]], ": ", subsetPlots[i])
    } else {
      ""
    }
    barPlot <- createJaspPlot(title = title)
    barPlotContainer[[title]] <- barPlot

    barPlot$height <- 500
    barPlot$width <- 500

    if (options[["barPlotSeparatePlots"]] != "") {
      summaryStatSubset <- subset(summaryStat, summaryStat[, "barPlotSeparatePlots"] == subsetPlots[i])
    } else {
      summaryStatSubset <- summaryStat
    }

    error <- NULL
    if (plotErrorBars) {
      pd <- ggplot2::position_dodge(.2)
      error <- ggplot2::geom_errorbar(ggplot2::aes(ymin = ciLower, ymax = ciUpper),
                                      colour = "black", width = .2, position = pd)
    }

    values <- 1.1 * range(summaryStat[["dependent"]])
    if (barPlotHorizontalZeroFix)
      values <- c(0, values)

    if (plotErrorBars) {
      ci.pos <- c(summaryStat[,"dependent"],
                  summaryStat[,"dependent"]-summaryStat[,"ci"],
                  summaryStat[,"dependent"]+summaryStat[,"ci"],
                  min(summaryStat[,"dependent"])*1.1,
                  max(summaryStat[,"dependent"])*1.1)
      yBreaks <- jaspGraphs::getPrettyAxisBreaks(c(values, ci.pos))
    } else {
      yBreaks <- jaspGraphs::getPrettyAxisBreaks(values)
    }
    pd2 <- ggplot2::position_dodge2(preserve = "single")

    p <- ggplot2::ggplot(summaryStatSubset, ggplot2::aes(x = barPlotHorizontalAxis, y = dependent, group = 1)) +
      ggplot2::geom_hline(yintercept = 0, color = "#858585", size = 0.3) +
      ggplot2::geom_bar(stat = "identity", fill = "grey", col = "black", width = .6, position = pd2) +
      error +
      ggplot2::labs(y = yLabel, x = options[["barPlotHorizontalAxis"]]) +
      ggplot2::scale_y_continuous(breaks = yBreaks, limits = range(yBreaks), oob = scales::rescale_none) +
      ggplot2::scale_x_discrete(breaks = jaspGraphs::getPrettyAxisBreaks(summaryStatSubset[, "barPlotHorizontalAxis"])) +
      jaspGraphs::geom_rangeframe(sides = "l") +
      jaspGraphs::themeJaspRaw()

    barPlot$plotObject <- p
  }
  return()
}

.BANOVArainCloudPlots <- function(jaspContainer, dataset, options, errors, analysisType) {

  if (length(options[["rainCloudHorizontalAxis"]]) == 0L
      || options[["rainCloudHorizontalAxis"]] == ""
      || !is.null(jaspContainer[["containerRainCloudPlots"]]))
    return()

  rainCloudPlotsContainer <- createJaspContainer(title = gettext("Raincloud plots"))
  rainCloudPlotsContainer$position <- 4
  jaspContainer[["containerRainCloudPlots"]] <- rainCloudPlotsContainer
  rainCloudPlotsContainer$dependOn(c("dependent", "rainCloudHorizontalAxis", "rainCloudSeparatePlots",
                                     "rainCloudYAxisLabel", "rainCloudHorizontalDisplay"))

  if (errors$noVariables) {
    rainCloudPlotsContainer[["dummyplot"]] <- createJaspPlot(title = "")
    return()
  }

  groupVar <- options[["rainCloudHorizontalAxis"]]
  if (analysisType == "RM-ANOVA") {
    addLines   <- !(groupVar %in% unlist(options[["betweenSubjectFactors"]]))
    dependentV <- .BANOVAdependentName

    yLabel     <- options[["rainCloudYAxisLabel"]]
    if (trimws(yLabel) == "") {
      title <- gettext("Dependent")
      yLabel <- NULL
    } else {
      title <- yLabel
    }
  } else {
    addLines   <- FALSE
    dependentV <- options[["dependent"]]
    yLabel     <- options[["dependent"]]
    title      <- options[["dependent"]]
  }

  if (!is.null(options$rainCloudHorizontalDisplay) && options$rainCloudHorizontalDisplay)
    horiz <- TRUE
  else
    horiz <- FALSE


  if (options$rainCloudSeparatePlots != "") {
    for (thisLevel in levels(dataset[[options[["rainCloudSeparatePlots"]]]])) {
      subData      <- dataset[dataset[[options[["rainCloudSeparatePlots"]]]] == thisLevel, ]
      thisPlotName <- paste0(title, ": ", options[["rainCloudSeparatePlots"]], ": ", thisLevel)
      subPlot      <- createJaspPlot(title = thisPlotName, width = 480, height = 320)
      rainCloudPlotsContainer[[thisLevel]] <- subPlot
      p <- try(jaspTTests::.descriptivesPlotsRainCloudFill(subData, dependentV, groupVar, yLabel, groupVar, addLines, horiz, NULL))
      if(isTryError(p))
        subPlot$setError(.extractErrorMessage(p))
      else
        subPlot$plotObject <- p
    }
  } else {
    singlePlot <- createJaspPlot(title = title, width = 480, height = 320)
    rainCloudPlotsContainer[["rainCloudPlotSingle"]] <- singlePlot
    p <- try(jaspTTests::.descriptivesPlotsRainCloudFill(dataset, dependentV, groupVar, yLabel, groupVar, addLines, horiz, NULL))
    if(isTryError(p))
      singlePlot$setError(.extractErrorMessage(p))
    else
      singlePlot$plotObject <- p
  }
  return()
}

# Sample posteriors ----
.BANOVAsamplePosteriors <- function(jaspResults, dataset, options, model, state) {

  # TODO: the density approximation can become more efficient with a fast parametric density approximation
  # TODO: Bayes factor samples unobserved interaction levels, what to do?

  # if the most complex model is retrieved from the state?
  nIter <- if (options[["samplingMethodMCMC"]] == "auto") 1e4L else options[["samplesMCMC"]]
  nmodels    <- length(model[["models"]])
  postProbs  <- model[["postProbs"]]
  statistics <- vector("list", nmodels)

  randomFactors <- .BANOVAgetRandomFactors(options, model[["analysisType"]])
  dataTypes     <- .BANOVAgetDataTypes(dataset, model[["model.list"]][[nmodels]], randomFactors)
  levelInfo     <- .BANOVAgetLevelInfo(dataset, model[["model.list"]][[nmodels]], dataTypes)

  renameFrom <- renameTo <- NULL
  if (!is.null(state)) { # can we reuse some posteriors?
    reuseable <- model[["reuseable"]]

    # it's possible that a user just renames a level of a repeated measures factor
    # in that case everything can be reused, but we have to rename some parameters.
    # the same may happen when a variable becomes a nuisance parameter, because then
    # BayesFactor changes its order in the names.

    oldLevelInfo <- state[["levelInfo"]]$levelNames
    newLevelInfo <- levelInfo$levelNames

    sortTerms <- function(x) {
      # split b:a to c("b", "a"), sort it, and then paste it back
      # otherwise posteriors samples between different runs are not correctly retrieved from the state.
      sapply(strsplit(x, ":", fixed = TRUE), function(x) paste(sort(x), collapse = ":"))
    }

    tmp_old <- sortTerms(names(oldLevelInfo))
    tmp_new <- sortTerms(names(newLevelInfo))
    matches <- match(tmp_new, tmp_old)
    for (i in seq_along(matches)) {
      if (is.na(matches[i]))
        next

      j <- matches[i]
      if (!identical(oldLevelInfo[[j]], newLevelInfo[[i]])) {
        if (length(oldLevelInfo[[j]]) == length(newLevelInfo[[i]])) {
          renameFrom <- c(renameFrom, oldLevelInfo[[j]])
          renameTo   <- c(renameTo,   newLevelInfo[[i]])
        }
      }
    }

  } else {
    reuseable <- rep(NA, nmodels)
  }

  # NOTE: some code checks require saving all samples. To do so, change all samples to samples[[i]] and uncomment:
  # samples <- vector("list", nmodels)

  allParamNames <- c("mu", unlist(levelInfo$levelNames))
  nparam <- length(allParamNames)
  weightedMeans <- weights <- numeric(nparam)
  names(weights) <- names(weightedMeans) <- allParamNames
  allContinuous <- TRUE

  h <- (1 - options[["credibleInterval"]]) / 2
  probs <- c(h, 1-h)

  originalFun <- BayesFactor:::makeChainNeater
  on.exit(jaspBase:::assignFunctionInPackage(originalFun, "makeChainNeater", "BayesFactor"))
  jaspBase:::assignFunctionInPackage(.BANOVAmakeChainNeater, "makeChainNeater", "BayesFactor")

  samplingIssues <- list()
  startProgressbar(nmodels, gettext("Sampling posteriors"))
  for (i in seq_len(nmodels)) {
    # check if the state is reuseable and if it's not NULL, which means it didn't crash
    if (is.na(reuseable[i]) || is.null(state$statistics[[reuseable[i]]])) {

      if (i == 1L && is.null(model[["models"]][[i]][["bf"]])) {

        # NULL model only contains an intercept, use custom sampler
        # NOTE: RM-ANOVA never enters here (and would crash if it did)
        .setSeedJASP(options)
        samples <- .BANOVAsampleNullModel(dataset[[options[["dependent"]]]], nsamples = nIter)
        types <- NULL

      } else {

        # NOTE: we have to sample the random effects, otherwise we cant make predictions (needed for residuals and R^2)
        # put the dataset back in
        .setSeedJASP(options)
        bfObj <- model[["models"]][[i]][["bf"]]
        bfObj@data <- dataset
        samples <- try(BayesFactor::posterior(bfObj, iterations = nIter))
        if (isTryError(samples)) {
          # this may error whenever the Bayes factor couldn't be calculated
          next
        }
        types <- samples@model@dataTypes

        # BayesFactor stores an internal copy of the dataset, so it can still have old names
        if (length(renameFrom) > 0) {
          cnms <- colnames(samples)
          cnms <- plyr::mapvalues(cnms, renameFrom, renameTo, warn_missing = FALSE) # NOTE: dependency could be removed
          colnames(samples) <- cnms
        }

        # keep only relevant columns, drop sig2, g_xxx, ...
        idx <- match(allParamNames, colnames(samples), nomatch = 0L)
        samples <- samples[, idx, drop = FALSE]

        # for some odd reason, Bayesfactor uses as column name contcor1-contcor1
        # if there is a covariate AND fixed factors, but only contcor1 if all variables are continuous...
        if (all(types == "continuous")) {
          cnms <- colnames(samples)[-1L] # omit the intercept (mu) which is not changed by Bayesfactor
          colnames(samples)[-1L] <- paste0(cnms, "-", cnms)
        } else {
          allContinuous <- FALSE
        }

        # so some models may yield a bunch of NAs, for example,
        # BayesFactor::lmBF(contNormal ~ facGender + contGamma + facGender * contGamma, data = "debug.csv", whichRandom = "facGender")
        # we add a footnote and try to not to crash.
        if (anyNA(samples)) {

          originalRows <- nrow(samples)
          samples  <- samples[complete.cases(samples), , drop = FALSE]
          remainingRows     <- nrow(samples)

          samplingIssues[[length(samplingIssues) + 1L]] <- list(
            model         = model[["models"]][[i]][["title"]],
            originalRows  = originalRows,
            remainingRows = remainingRows
          )
          next
        }

      }

      # although matrixStats::colMeans2 is faster than .colMeans the cost of matrixStats:: is not worth it.
      nms <- colnames(samples)
      statistics[[i]]$names  <- nms # <- these are the names for all objects within one sublist
      statistics[[i]]$mean   <- .colMeans                (samples, m = nIter, n = NCOL(samples))
      statistics[[i]]$var    <- matrixStats::colVars     (samples)
      statistics[[i]]$cri    <- matrixStats::colQuantiles(samples, probs = probs)

      statistics[[i]]$approx <- .BANOVAfitDensity(samples = samples)
      statistics[[i]]$types  <- types

    } else { # reuse state
      statistics[[i]] <- state$statistics[[reuseable[i]]]
      nms             <- statistics[[i]]$names
      types           <- statistics[[i]]$types
      if (length(renameFrom) > 0) {
        nms <- plyr::mapvalues(nms, renameFrom, renameTo, warn_missing = FALSE) # NOTE: dependency could be removed
        statistics[[i]]$names <- nms
      }
      if (allContinuous)
        allContinuous <- all(types == "continuous")
    }

    # compute model averaged posterior means
    weightedMeans[nms] <- weightedMeans[nms] + postProbs[i] * statistics[[i]]$mean
    weights      [nms] <- weights      [nms] + postProbs[i]
    progressbarTick()
  }

  # find out which parameters are random -- this uses types from the last iteration above
  isRandom <- logical(nparam)
  idx <- which(types == "random")
  for (i in idx)
    isRandom <- isRandom | startsWith(names(weights), names(types)[i])

  # the weights used above don't sum to 1 because we consider a subset of the models.
  # Now we renormalize to ensure the weights used in each weighted mean do sum to 1.
  weightedMeans <- weightedMeans / weights

  # given the model averaged posterior means calculate the weighted posterior standard deviations
  weightedSds <- 0 * weightedMeans # keeps the names
  r <- nIter / (nIter - 1)
  # statistics has length 0 iff the posterior sampling errored
  widxGoodStatistics <- which(lengths(statistics) > 0L)
  for (i in widxGoodStatistics) {
    nms <- statistics[[i]]$names

    var <- statistics[[i]]$var # ~ sum((x - mean(x))^2
    mu  <- statistics[[i]]$mean

    # cc = sum((x - y)^2) - sum((x - mean(x))^2), where y is the weighted mean
    # hence, we can get the weighted sum of squares from the individual posterior variances
    cc <- weightedMeans[nms]^2 + mu^2 - 2 * mu * weightedMeans[nms]
    weightedSds[nms] <- weightedSds[nms] + postProbs[i] * (var + cc * r)

  }
  weightedSds <- sqrt(weightedSds / weights)

  # the loops above are optimized versions of the code below that also calculates the
  # weighted mean and weighted sd, but needs to store all samples at once.
  # xx <- lapply(samples, function(x, y) {
  #   if (y %in% colnames(x)) {dd <- model$posteriors$weightedRsqDens
  #     x[, y]
  #   } else {
  #     NULL
  #   }
  # }, y = "XY29udEJpbm9t-1") # XY29udEJpbm9t = contBinom
  # idx <- lengths(xx) > 0
  # xx <- unlist(xx[idx])
  # w <- postProbs[idx]
  # sum(w) # equals weights[2]
  # weighted.mean(xx,  rep(w / sum(w), each = nIter)) # equals weightedMeans[2]
  # sqrt(Hmisc::wtd.var(xx, rep(w / sum(w), each = nIter), method = "unbiased")) # equals weightedSds[2]

  # compute model averaged densities
  steps <- 2^9 # grid size for densities is 2^steps
  weightedDensities <- array(0, dim = c(steps, nparam, 2), dimnames = list(NULL, names(weightedMeans), c("x", "y")))
  ranges <- matrix(0, nparam, 2L, dimnames = list(allParamNames, NULL))

  # get the outermost x-values for each densities this could be vectorized with matrixStats::rowRanges if the
  # data are stored as a matrix but that is memory inefficient
  for (i in widxGoodStatistics) {
    nms <- statistics[[i]]$names
    indices <- which(nms %in% allParamNames)
    for (j in indices) {
      ranges[j, ] <- range(ranges[j, ], statistics[[i]]$approx$xRanges[j, ])
    }
  }

  # construct one common grid for each observed density
  for (i in seq_len(nparam)) {
    weightedDensities[, i, 1L] <- seq(ranges[i, 1], ranges[i, 2], length.out = steps)
  }

  for (i in widxGoodStatistics) {
    nms <- statistics[[i]]$names
    ind <- match(nms, allParamNames)
    for (j in seq_along(ind)) {
      # approximate all distributions on a common grid
      ap <- approx(x    = statistics[[i]]$approx$fit[, j,      1L],
                   y    = statistics[[i]]$approx$fit[, j,      2L],
                   xout = weightedDensities         [, ind[j], 1L],
                   # not observed is approximated to 0
                   yleft = 0, yright = 0)

      weightedDensities[, ind[j], 2L] <- weightedDensities[, ind[j], 2L] + ap$y * postProbs[i]

    }
  }
  # postProbs don't sum to one so we renormalize the densities
  weightedDensities[, , 2L] <- sweep(weightedDensities[, , 2L], 2, weights, FUN = `/`)

  # if all model with one parameter failed, the line above introduces NaNs since 0 / 0 is NaN
  weightedDensities[is.nan(weightedDensities)] <- 0

  # # compute weighted CRIs
  # weightedCRIs <- matrix(NA, nparam, 2L, dimnames = list(names(weights), NULL))
  # cri <- options[["credibleInterval"]]
  # for (i in seq_len(nparam)) {
  #   weightedCRIs[i, ] <- .BANOVAapproxCRI(weightedDensities[, i, 1L], weightedDensities[, i, 2L], cri)
  # }
  #
  # # compute residuals and r-squared
  # # sample from the joint posterior over models and parameters
  # tmp  <- .BANOVAgetSMIResidRsq(weightedDensities, dataset, model$model.list[[nmodels]], nIter, weights)
  # means  <- rowMeans(tmp$resid)
  # h <- (1 - cri) / 2
  # quants <- matrixStats::rowQuantiles(tmp$resid, probs = c(h, 1 - h))
  #
  # # the code above is equivalent to the code below, but the code below needs to keep all posterior samples of
  # # all models in memory.
  # # weights <- rep(postProbs, each = nIter)
  # # independentVariable <- all.vars(.BANOVAgetModelFormulaFromBFobj(model$models[[2L]]))[1L]
  # # resids <- matrix(NA, nrow(dataset), 0)
  # # rsq <- vector("list", nmodels)
  # # # get residuals of all models individually
  # # for (i in seq_len(nmodels)) {
  # #   if (is.null(model$models[[i]]$bf)) {
  # #     tmp2    <- .BANOVAresidualsNullModel(nIter, dataset[[independentVariable]])
  # #   } else {
  # #     tmp2 <- .BANOVAgetSMIResidRsq(
  # #       posterior = samples[[i]],
  # #       dataset   = dataset,
  # #       formula   = .BANOVAgetModelFormulaFromBFobj(model$models[[i]])
  # #     )
  # #   }
  # #   resids <- cbind(resids, tmp2$resid)
  # #   rsq[[i]] <- tmp2$rsq
  # # }
  # # # compute weighted mean for each row
  # # means2 <- tcrossprod(weights / nIter, resids)
  # # plot(means, means2); abline(0, 1)
  # # quants2 <- apply(resids, 1L, Hmisc::wtd.quantile, weights = weights, probs = c(0.025, 0.975))
  # # plot(quants[, 1], quants2[1, ]); abline(0, 1)
  # # plot(quants[, 2], quants2[2, ]); abline(0, 1)
  #
  # # all information for q-q plot of residuals
  # weightedResidSumStats <- matrix(c(means, quants), nrow = length(means), ncol = 3L,
  #                                 dimnames = list(NULL, c("mean", "cri.2.5%", "cri.97.5%")))
  #
  # # all information for r-squared density plot
  # weightedRsqDens <- density(tmp$rsq, n = 2^11, from = 0, to = 1)
  # weightedRsqCri <- quantile(tmp$rsq, probs   = c(h, 1 - h))

  return(list(
    statistics = statistics, weights = weights, weightedMeans = weightedMeans, weightedSds = weightedSds,
    weightedDensities = weightedDensities,
    #weightedCRIs = weightedCRIs, weightedResidSumStats = weightedResidSumStats,
    #weightedRsqDens = weightedRsqDens, weightedRsqCri = weightedRsqCri,
    allContinuous = allContinuous, isRandom = isRandom, levelInfo = levelInfo,
    samplingIssues = samplingIssues
  ))
}

.BANOVAcomputePosteriorCRI <- function(dataset, options, model, posterior) {

  nIter <- if (options[["samplingMethodMCMC"]] == "auto") 1e4L else options[["samplesMCMC"]]
  weightedDensities <- posterior[["weightedDensities"]]
  weights <- posterior[["weights"]]
  nmodels    <- length(model[["models"]])
  nparam <- length(weights)

  # compute weighted CRIs
  weightedCRIs <- matrix(0, nparam, 2L, dimnames = list(names(weights), NULL))
  cri <- options[["credibleInterval"]]
  for (i in seq_len(nparam)) {
    if (!all(weightedDensities[, i, 2L] == 0)) { # only TRUE if we explicitly set it to 0
      weightedCRIs[i, ] <- .BANOVAapproxCRI(weightedDensities[, i, 1L], weightedDensities[, i, 2L], cri)
    }
  }

  # compute residuals and r-squared
  # sample from the joint posterior over models and parameters
  tmp  <- .BANOVAgetSMIResidRsq(weightedDensities, dataset, model$model.list[[nmodels]], nIter, model, options)
  means  <- rowMeans(tmp$resid)
  h <- (1 - cri) / 2
  quants <- matrixStats::rowQuantiles(tmp$resid, probs = c(h, 1 - h))

  # the code above is equivalent to the code below, but the code below needs to keep all posterior samples of
  # all models in memory. plug this body of this function into the call inside .BANOVAsamplePosteriors() to check
  # weights <- rep(postProbs, each = nIter)
  # independentVariable <- all.vars(.BANOVAgetModelFormulaFromBFobj(model$models[[2L]]))[1L]
  # resids <- matrix(NA, nrow(dataset), 0)
  # rsq <- vector("list", nmodels)
  # # get residuals of all models individually
  # for (i in seq_len(nmodels)) {
  #   if (is.null(model$models[[i]]$bf)) {
  #     tmp2    <- .BANOVAresidualsNullModel(nIter, dataset[[independentVariable]])
  #   } else {
  #     tmp2 <- .BANOVAgetSMIResidRsq(
  #       posterior = samples[[i]],
  #       dataset   = dataset,
  #       formula   = .BANOVAgetModelFormulaFromBFobj(model$models[[i]])
  #     )
  #   }
  #   resids <- cbind(resids, tmp2$resid)
  #   rsq[[i]] <- tmp2$rsq
  # }
  # # compute weighted mean for each row
  # means2 <- tcrossprod(weights / nIter, resids)
  # plot(means, means2); abline(0, 1)
  # quants2 <- apply(resids, 1L, Hmisc::wtd.quantile, weights = weights, probs = c(0.025, 0.975))
  # plot(quants[, 1], quants2[1, ]); abline(0, 1)
  # plot(quants[, 2], quants2[2, ]); abline(0, 1)

  # all information for q-q plot of residuals
  weightedResidSumStats <- matrix(c(means, quants), nrow = length(means), ncol = 3L,
                                  dimnames = list(NULL, c("mean", "cri.2.5%", "cri.97.5%")))

  # all information for r-squared density plot
  weightedRsqDens <- density(tmp$rsq, n = 2^11, from = 0, to = 1)
  weightedRsqCri <- quantile(tmp$rsq, probs   = c(h, 1 - h))
  weightedRsqMean <- mean(tmp$rsq)

  return(list(
    weightedCRIs = weightedCRIs, weightedResidSumStats = weightedResidSumStats,
    weightedRsqDens = weightedRsqDens, weightedRsqCri = weightedRsqCri, weightedRsqMean = weightedRsqMean
  ))
}

.BANOVAsampleNullModel <- function(nsamples, dependent) {

  # sample from posterior under NULL model, needed to compute model averaged residuals.

  rt.scaled <- function (n, df, mean = 0, sd = 1, ncp) {
    mean + sd * stats::rt(n, df, ncp = ncp)
  }

  # sample from the marginal posterior distribution of the mean t-distribution, based on Murphy (2007)
  n      <- length(dependent)
  muObs  <- mean(dependent)
  varObs <- var(dependent)

  # uninformative priors
  k0  <- 1e-6
  a0  <- 1e-5
  b0  <- 1e-5

  an  <- a0 + n / 2
  bn  <- b0 + 0.5 * (n - 1L) * varObs + k0 * n * muObs / (2 * (k0 + n))
  kn  <- k0 + n
  mun <- n * muObs / kn

  samples <- matrix(rt.scaled(n = nsamples, df = 2 * an, mean = mun, sd = bn / (an * kn), ncp = 0), nsamples,
                    1L, dimnames = list(NULL, "mu"))
  return(samples)
}

.BANOVAresidualsNullModel <- function(nsamples, dependent) {

  # sample from posterior under NULL model, needed to compute model averaged residuals.

  rt.scaled <- function (n, df, mean = 0, sd = 1, ncp) {
    mean + sd * stats::rt(n, df, ncp = ncp)
  }

  # sample from the marginal posterior distribution of the mean t-distribution, based on Murphy (2007)
  n      <- length(dependent)
  muObs  <- mean(dependent)
  varObs <- var(dependent)

  # uninformative priors
  k0  <- 1e-6
  a0  <- 1e-5
  b0  <- 1e-5

  an  <- a0 + n / 2
  bn  <- b0 + 0.5 * (n - 1L) * varObs + k0 * n * muObs / (2 * (k0 + n))
  kn  <- k0 + n
  mun <- n * muObs / kn

  samples <- rt.scaled(n = nsamples, df = 2 * an, mean = mun, sd = bn / (an * kn), ncp = 0)

  # compute all pairwise differences (residuals)
  preds <- tcrossprod(rep(1, n), samples)

  resids <- dependent - preds

  rsq <- .BANOVAcomputeRsq(dependent, preds)

  return(list(resids = resids, rsq = rsq))

}

.BANOVAfitDensity <- function(samples, gridsize = 100L, getXRange = TRUE) {

  # ideally we don't do kernel density estimation but instead use some parametric approximation that is suited
  # for unimodal distributions. It should also be fast, e.g., fit using method of moments (or another analytic method)
  nc <- ncol(samples)
  fits  <- array(
    data     = NA,
    dim      = c(gridsize, nc, 2),
    dimnames = list(NULL, colnames(samples), c("x", "y"))
  )

  n <- nrow(samples)
  del0 <- (1/(4 * pi))^(1/10) # normal kernel
  for (i in seq_len(nc)) {
    # bandwidth definition from KernSmooth::bkde, but adjusted when sqrt(var(samples[, i])) is infinite
    vx <- sqrt(var(samples[, i]))
    if (is.infinite(vx)) # contWide can cause the variance to be infinite
      vx <- sqrt(.Machine$double.xmax) # some big number as a fallback
    bandwidth <- del0 * (243/(35 * n))^(1/5) * vx
    fits[, i, ] <- do.call(cbind, KernSmooth::bkde(samples[, i], gridsize = gridsize, bandwidth = bandwidth))
  }
  if (getXRange) {
    if (nc == 1L) {
      xRanges <- matrix(range(fits[, , "x"]), 1L, 2L)
    } else {
      xRanges <- matrixStats::colRanges(fits[, , "x"])
    }
    rownames(xRanges) <- colnames(samples)
  } else {
    xRanges <- NULL
  }
  return(list(fit = fits, xRanges = xRanges))
}

.BANOVAapproxCRI <- function(x, y, cri = 0.95) {

  # approximate cdf
  y2 <- cumsum(y)
  y2 <- y2 / y2[length(y2)]

  h <- (1 - cri) / 2
  # find closest observed match
  idx <- c(
    which.min(abs(y2 - h)),
    which.min(abs(y2 - 1 + h))
  )
  return(x[idx])
}

# plot posteriors ----
.BANOVAgetBMAdensity <- function(samples, weights, fromTo = NULL, n = 2^10) {

  # @param samples, list of samples
  # @param weigths, vector of numeric weights
  # @param fromTo vector of length 2 specifying lower and upper bound (optional).
  # @return a list with $x the x-coordinates and $y the y-coordinates.

  if (length(samples) != length(weights))
    stop(gettext("length of samples must be equal to length of weights!"))

  # remove NULL indices
  idxNonNull <- lengths(samples) > 0
  weights <- weights[idxNonNull]
  samples <- samples[idxNonNull]

  # renormalize the weights so output is a proper density function (TODO: do we want this, or BAS style?)
  weights <- weights / sum(weights)

  # create x-grid for density
  if (is.null(fromTo))
    fromTo <- range(sapply(samples, range))
  xs <- seq(fromTo[1L], fromTo[2L], length.out = 2^10)

  # compute weighted density
  ys <- numeric(n)
  for (i in seq_along(samples)) {
    ys <- ys + weights[i] * density(samples[[i]], from = fromTo[1L], to = fromTo[2L], n = n)$y
  }

  return(list(x = xs, y = ys))
}

.BANOVAreSample <- function(n, x, y, prop0 = NULL) {

  cdf <- cumsum(y)
  max <- cdf[length(cdf)]
  if (max == 0) # can't salvage this
    return(numeric(n))
  cdf <- cdf / max
  if (!is.null(prop0) && prop0 <= (1 - sqrt(.Machine$double.eps))) {
    i1 <- runif(n) <= prop0
    samples <- numeric(n)
    samples[i1] <- approx(cdf, x, runif(sum(i1)), rule = 2)$y
  } else {
    samples <- approx(cdf, x, runif(n), rule = 2)$y
  }
  return(samples)
}

.BANOVAgetSMIResidRsq <- function(posterior, dataset, formula, nIter, model, options) {

  # @param posterior  object from Bayesfactor package, or SxPx2 array of weighted densities
  # @param dataset    dataset
  # @param formula    the formula for this specific model. Supply the formula of the most complex model for BMA inference.
  # @param nIter      number of posterior samples
  # @param model      model estimates
  # @param levelInfo  for each variable, how many levels there are.
  #
  # @return           a list with residuals, predictions, and r-squared

  # @details the matrix multiplication in this function allocates an array of nobs * nsamples, which can be enormous.
  # if is more memory efficient to use for loops (but slower in R) to calculate predictions for each observation
  # and posterior sample. This could be done in the future if performance is an issue. However, this likely cannot be
  # done efficiently in R. For modelAveraged inference, model and levelInfo need to be given.

  # here we need the data in a one-hot encoded way
  # first one is dependent variable, rest are independent variables
  dvs <- all.vars(formula)[1L]
  ivs <- all.vars(formula)[-1L]

  # idx contains variables that need to be one-hot encoded
  idx <- sapply(dataset[ivs], is.factor)
  # idx <- isFactor & names(isFactor) %in% dvs

  # do one-hot encoding (thank base R for calling a function `contrast` and giving it the argument `contrast`...)
  datOneHot <- model.matrix(formula, data = dataset[c(dvs, ivs)],
                            contrasts.arg = lapply(dataset[ivs][idx], contrasts, contrasts = FALSE))

  nobs <- nrow(datOneHot)

  # possible switch for memory efficient alternatives:
  # size of 1 numeric element in bytes
  # doubleSize <- as.numeric(utils::object.size(double(2)) - utils::object.size(double(1)))
  # 1073741824 is 1   GB in bytes
  # 536870912  is 512 MB in bytes
  # the size of an empty matrix is ignored here
  # useMatmult <- ((doubleSize * nobs * nIter) / 536870912.0) <= 1

  # if (useMatmult) {
  if (length(dim(posterior)) == 3L) {
    # we're doing model averaged inference

    # recompute the levelInfo, this is relatively cheap and we cannot be sure that the levelInfo in the
    # posterior was not retrieved from state (and contains too many or too few variables)
    nmodels   <- length(model[["model.list"]])
    randomFactors <- .BANOVAgetRandomFactors(options, model[["analysisType"]])
    dataTypes <- .BANOVAgetDataTypes(dataset, model[["model.list"]][[nmodels]], randomFactors)
    levelInfo <- .BANOVAgetLevelInfo(dataset, model[["model.list"]][[nmodels]], dataTypes)

    # which parameters are nuisance?
    parameterNames <- c("mu", names(levelInfo[["levelNames"]]))
    # the variables that are nuisance, e.g., contBinom
    isNuisanceVar   <- c(TRUE, parameterNames[-1L] %in% model[["nuisance"]])
    names(isNuisanceVar) <- parameterNames
    # the parameters that are nuisance, e.g., contBinom-0 and contBinom-1
    isNuisanceParam <- rep(isNuisanceVar, c(1, lengths(levelInfo[["levelNames"]])))
    names(isNuisanceParam) <- c("mu", unlist(levelInfo[["levelNames"]]))

    if (!all(names(isNuisanceParam) %in% colnames(posterior)))
      stop("!all(names(isNuisance) %in% colnames(posterior)) was not true.")

    # we cannot use model[["effects"]] because that exludes nuisance parameters, so we rebuild one that doesn't exclude these
    effects <- matrix(FALSE, nmodels, length(isNuisanceParam), dimnames = list(NULL, names(isNuisanceParam)))
    for (i in seq_along(model[["model.list"]])) {
      if (is.null(model[["model.list"]][[i]])) { # implies an intercept-only null model
        cnms <- "mu"
      } else {
        idx <- names(levelInfo[["levelNames"]]) %in% attr(terms(model[["model.list"]][[i]]), "term.labels")
        cnms <- c("mu", unlist(levelInfo[["levelNames"]][idx]))
      }
      effects[i, cnms] <- TRUE#c(TRUE, names(levelInfo[["levelNames"]]) %in% attr(terms(model[["model.list"]][[i]]), "term.labels"))
    }

    # sample from the BMA posterior
    postProbs <- model[["postProbs"]]
    postProbs[is.na(postProbs)] <- 0
    .setSeedJASP(options)

    modelIdx <- sample(length(postProbs), nIter, TRUE, postProbs) # resample the models according to posterior prob
    samples <- matrix(0, nrow = nIter, ncol = ncol(datOneHot), dimnames = list(NULL, names(isNuisanceParam)))

    # resample nuisance parameters
    for (i in which(isNuisanceParam)) {
      .setSeedJASP(options)
      samples[, i] <- .BANOVAreSample(n = nIter, x = posterior[, i, "x"], y = posterior[, i, "y"])
    }

    # resample non nuisance parameters
    for (i in which(!isNuisanceParam)) {
      idx <- names(isNuisanceParam)[i]
      mult <- effects[modelIdx, idx] # how often models with this variable are sampled
      nTemp <- sum(mult)
      if (nTemp > 0L){
        .setSeedJASP(options)
        samples[mult, i] <- .BANOVAreSample(n = sum(mult), x = posterior[, i, "x"], y = posterior[, i, "y"])
      }
    }

    preds <- tcrossprod(datOneHot, samples)

  } else {
    # TODO: ensure that column order is always correct for both!
    # otherwise we're doing inference for a single model
    preds <- tcrossprod(datOneHot, posterior)
  }

  # calculate residuals (correctly recycles dat[[dvs]])
  resid <- dataset[[dvs]] - preds

  rsq <- .BANOVAcomputeRsq(dataset[[dvs]], preds)
    # eta <- .BANOVAcomputeEtasq(dat[[dvs]], preds)

  # } else {
    # slow but memory efficient stuff


  # }

  return(list(resid = resid, preds = preds, rsq = rsq))
}


# HF computation ----
.BANOVAinitBayesFactor <- function() {

  defaults <- list(
    BFMaxModels         = 50000,
    BFpretestIterations = 100,
    BFapproxOptimizer   = "optim",
    BFapproxLimits      = c(-15, 15),
    BFprogress          = interactive(),
    BFfactorsMax        = 5
  )
  idx <- setdiff(names(defaults), names(options()))
  options(defaults[idx])

}

.BANOVAcomputeRsq <- function(obs, preds) {

  # NOTE: R^2 != cor(obs, predict) because the predictions from the posterior samples are not OLS estimates.
  if (is.null(dim(preds)))
    preds <- matrix(preds, ncol = 1L)

  # definition from http://www.stat.columbia.edu/~gelman/research/unpublished/bayes_R2.pdf
  colVars2 <- function(x) {matrixStats::colVars(sweep(x, 2L, matrixStats::colMeans2(x)))}
  ee <- colVars2(preds)
  ff <- colVars2(obs - preds)
  gg <- ee / (ee + ff)
  if (anyNA(gg)) {
    idx <- is.na(gg)
    if (all(idx)) {
      # this means that either the posterior sampling failed or the input data is bogus.
      .quitAnalysis(gettext("All predictions had zero variance. Check the predictors for anomalies or increase the number of posterior samples."))
    } else {
      # this shouldn't happen, but is necessary because otherwise stuff will crash downstream.
      gg[is.na(gg)] <- mean(gg, na.rm = TRUE)
    }
  }
  return(gg)

}

.BANOVAcomputeEtasq <- function(obs, preds) {

  # partial eta^2
  if (is.null(dim(preds)))
    preds <- matrix(preds, ncol = 1L)

  # return(1 - matrixStats::colVars(preds - obs) / var(obs))
  # return(1 - matrixStats::rowVars(preds - obs) / var(obs))
  return(matrixStats::colVars(preds) / var(obs))

}

.BANOVAmakeChainNeater <- function(chains, Xnames, formula, data, dataTypes, gMap, unreduce, continuous, columnFilter) {

  # so BayesFactor:::makeChainNeater doesn't always make things neater
  # see also https://github.com/jasp-stats/jaspAnova/pull/43#discussion_r656430090

  # example where BayesFactor goes wrong:
  # dd <- structure(list(
  #   rm_factor = structure(c(1L, 2L, 1L, 2L, 1L, 2L), .Label = c("Level 1", "Level 2"), class = "factor"),
  #   dependent = c(6, 6, 5, 10, 6, 9),
  #   age = c(59.2161793866222, 59.2161793866222, 53.4457078686802, 53.4457078686802, 50.3248450015473, 50.3248450015473),
  #   subject = structure(c(1L, 1L, 2L, 2L, 3L, 3L), .Label = c("1", "2", "3"), class = "factor")),
  #   row.names = c(NA, 6L), class = "data.frame"
  # )
  #
  # bf_obj_1   <- BayesFactor::lmBF(dependent ~ age + subject,             data = dd, whichRandom = "subject")
  # bf_obj_2   <- BayesFactor::lmBF(dependent ~ age + subject + rm_factor, data = dd, whichRandom = "subject")
  #
  # print(BayesFactor::posterior(bf_obj_1, iterations = 3)) # <- ugly
  # print(BayesFactor::posterior(bf_obj_2, iterations = 3)) # <- good
  #
  # originalFun <- BayesFactor:::makeChainNeater
  # jaspBase:::assignFunctionInPackage(.BANOVAmakeChainNeater, "makeChainNeater", "BayesFactor")
  # print(BayesFactor::posterior(bf_obj_1, iterations = 3)) # <- good
  #
  # jaspBase:::assignFunctionInPackage(originalFun, "makeChainNeater", "BayesFactor")
  # print(BayesFactor::posterior(bf_obj_1, iterations = 3)) # <- ugly

  # this part is identical to BayesFactor:::makeChainNeater
  P = length(gMap)
  nGs = max(gMap) + 1
  factors = BayesFactor:::fmlaFactors(formula, data)[-1]
  dataTypes = dataTypes[names(dataTypes) %in% factors]
  types = BayesFactor:::termTypes(formula, data, dataTypes)
  lastPars = ncol(chains) + (-nGs):0
  if (any(continuous)) {
    gNames = paste("g", c(names(types[types != "continuous"]), "continuous"), sep = "_")
  } else {
    gNames = paste("g", names(types), sep = "_")
  }
  if (is.null(columnFilter)) {
    ignoreCols = ignoreCols = rep(0, P)
  } else {
    ignoreCols = BayesFactor:::filterVectorLogical(columnFilter, names(gMap))
  }
  # here we're also checking here if any dataTypes are "random" instead of only checking for fixed dataTypes
  if (!unreduce | !any(dataTypes %in% c("fixed", "random"))) {
    labels = c("mu", Xnames[!ignoreCols], "sig2", gNames)
    colnames(chains) = labels
    return(chains)
  }
  parLabels = BayesFactor:::makeLabelList(formula, data, dataTypes, unreduce,
                                          columnFilter)
  labels = c("mu", parLabels)
  betaChains = chains[, 1:(ncol(chains) - 2 - nGs) + 1, drop = FALSE]
  betaChains = BayesFactor:::ureduceChains(betaChains, formula, data, dataTypes, gMap, ignoreCols)
  newChains = cbind(chains[, 1], betaChains, chains[, lastPars])
  labels = c(labels, "sig2", gNames)
  colnames(newChains) = labels
  return(newChains)
}

.BANOVAcomputeInteractionsMatrix <- function(effects) {

  neffects <- length(effects)
  interactions.matrix <- matrix(FALSE, nrow = neffects, ncol = neffects)
  rownames(interactions.matrix) <- colnames(interactions.matrix) <- effects
  if (neffects > 1L) {
    effect.components <- sapply(effects, strsplit, split = ":", fixed = TRUE)

    for (e in seq_len(neffects)) {
      interactions.matrix[e, ] <- sapply(1:neffects, function(ee) {
        (sum(effect.components[[e]] %in% effect.components[[ee]]) == length(effect.components[[e]]))
      })
    }
    diag(interactions.matrix) <- FALSE
  }
  return(interactions.matrix)
}

# Model prior ----
.BANOVAcomputePriorModelProbs <- function(models, nuisance, options) {

  if (options[["modelPrior"]] == "uniform") {
    modelprobs <- rep(1 / length(models), length(models))
  } else if (options[["modelPrior"]] == "custom") {

    inclusionProbabilities <- vapply(options[["customPriorSpecification"]], `[[`, FUN.VALUE = numeric(1L), "inclusionProbability")
    modelprobs <- .BANOVAcustomInclusionProbabilitiesToModelProbabilities(models, nuisance, inclusionProbabilities, enforceMarginality = options[["enforcePrincipleOfMarginalityFixedEffects"]])

  } else {

    noPredictorsPerModel <- vapply(models, function(x) if (is.null(x)) 0L else length(attr(terms(x), "term.labels")), FUN.VALUE = integer(1L))
    noNuisancePredictors <- noPredictorsPerModel[1L]
    noPredictorsPerModel <- noPredictorsPerModel - noNuisancePredictors
    totalNoPredictors    <- noPredictorsPerModel[length(noPredictorsPerModel)]

    if (options[["modelPrior"]] %in% c("betaBinomial", "Wilson", "Castillo")) {

      switch (options[["modelPrior"]],
              "betaBinomial"  = {alpha = options[["betaBinomialParameterA"]]; beta = options[["betaBinomialParameterB"]]                    },
              "Wilson"        = {alpha = 1.0;                                 beta = totalNoPredictors * options[["wilsonParameterLambda"]] },
              "Castillo"      = {alpha = 1.0;                                 beta = totalNoPredictors ^ options[["castilloParameterU"]]    }
      )

      modelprobs <- dBetaBinomialModelPrior(noPredictorsPerModel, totalNoPredictors, alpha, beta)

    } else if (options[["modelPrior"]] == "Bernoulli") {
      modelprobs <- dBernoulliModelPrior(noPredictorsPerModel, totalNoPredictors, options[["bernoulliParameter"]])
    }

    # both the betabinomial and bernoulli model priors do not sum to 1 when marginality is respected
    modelprobs <- modelprobs / sum(modelprobs)
  }

  return(modelprobs)
}

dBetaBinomialModelPrior <- function(k, n, alpha = 1.0, beta = 1.0, log = FALSE) {
  # NOTE: this is not the pdf of the betabinomial, but the pdf divided by choose(n, k), so that it's the probabiltiy of a particular model
  logprobability <- lbeta(k + alpha, n - k + beta) - lbeta(alpha, beta)
  if (log)
    return(logprobability)
  return(exp(logprobability))
}

dBernoulliModelPrior <- function(k, n, prob = 0.5, log = FALSE) {
  logprobability <- k * log(prob) + (n - k) * log(1 - prob)
  if (log)
    return(logprobability)
  return(exp(logprobability))
}

.BANOVAcustomInclusionProbabilitiesToModelProbabilities <- function(modelList, nuisance, inclusionProbabilities, enforceMarginality = TRUE) {

  # TODO: discuss with the team:
  # when marginality is enforced, how should the prior on interaction effects be interpreted when a user wants matched models?

  formulaFullModel <- modelList[[length(modelList)]]
  termsFullModel <- terms(formulaFullModel)
  termLabels <- attr(termsFullModel, "term.labels")
  fullNonNuisance <- !(termLabels %in% nuisance)
  termLabels  <- termLabels[fullNonNuisance]

  # the principle of marginality is enforced (or not) through the function getPresentInteractions
  if (enforceMarginality) {
    # in this case, interaction effects do not appear without the main effects, so the user provided probability
    # is interpreted as a conditional probability given that the main effects are included.
    # thus, p(interaction) = 0 if the main effects are missing, and the user provided probability otherwise.
    #
    # for example, consider p(A) = P(B) = .5, P(A:B) = .2
    #
    # model        P(model)
    # A            P(A) * (1 - P(B))                # <- here we do NOT do '* (1 - P(A:B))'
    # A, B         p(A) * P(B)       * (1 - P(A:B))
    # A, B, A:B    p(A) * P(B)       * P(A:B)

    termFactors <- attr(termsFullModel, "factors")[, termLabels, drop = FALSE]
    termFactors <- termFactors[rownames(termFactors) %in% termLabels, , drop = FALSE]
    rnmsTermFactors <- rownames(termFactors)
    termOrders  <- attr(termsFullModel, "order")[fullNonNuisance]

    # create a list where the names refer to the interactions and the elements refer to all children of that interaction
    listOfDescendants <- setNames(lapply(termLabels, function(x) {
      idx <- which(termFactors[, x] == 1L)
      rnmsTermFactors[idx]
    }), termLabels)
    termIsNotInteraction <- termOrders == 1L

    getPresentInteractions <- function(currentTermLabels) {
      vapply(listOfDescendants, function(descendants, currentTermLabels) {
        all(descendants %in% currentTermLabels)
      }, logical(1L), currentTermLabels) | termIsNotInteraction
    }

    # NOTE: the loop below fails for some combinations of user priors, but I can see how this behavior is desirable
    # the users provide the raw p(interaction) however, the code below assumes that this is
    # p(interaction | main effects), so to compensate we divide each interaction by the product of the components.
    # however, for some inclusion probabilities of the main effects you get impossible values, for example:
    # p(A_) = 0.1, p(B) = 0.1, P(A*B) = 0.9 => 0.9 / 0.1^2 = 90, which is bigger than 1 and should be impossible.
    # for (i in which(!termIsNotInteraction)) { # loop over all interaction terms
    #   idxSubterms <- which(termLabels %in% listOfDescendants[[termLabels[i]]])
    #   inclusionProbabilities[i] <- inclusionProbabilities[i] / prod(inclusionProbabilities[idxSubterms])
    # }

  } else {
    # in this case, interaction effects do appear without the main effects, so the user provided probability
    # is interpreted in the same way for interaction effects and fixed effects.
    # it's interpreted as an unconditional inclusion probability
    getPresentInteractions <- function(...) {
      rep(TRUE, length(termLabels))
    }
  }

  exclusionProbabilties <- 1 - inclusionProbabilities

  # so terms(y ~ b + a:b) reorders a:b to b:a but terms(y ~ a + a:b) does not...
  termLabelOrder <- strsplit(termLabels, ":", fixed = TRUE)

  modelProbs <- numeric(length(modelList))
  for (i in seq_along(modelList)) {
    if (is.null(modelList[[i]])) {

      excludedTerms <- rep(TRUE, length(termLabels))
      excludedTerms <- excludedTerms & getPresentInteractions(NULL)
      modelProbs[i] <- prod(exclusionProbabilties)

    } else {

      currentTerms <- terms(modelList[[i]])
      currentTermLabels <- setdiff(labels(currentTerms), nuisance)

      currentTermLabels <- .BANOVAorderTermsByKnownOrder(currentTerms, currentTermLabels, termLabelOrder)

      presentInteractions <- getPresentInteractions(currentTermLabels)

      includedTerms <- termLabels %in% currentTermLabels
      excludedTerms <- !includedTerms

      includedTerms <- includedTerms & presentInteractions
      excludedTerms <- excludedTerms & presentInteractions

      modelProbs[i] <- prod(inclusionProbabilities[includedTerms], exclusionProbabilties[excludedTerms])

    }
  }

  return(modelProbs)

}

# Other ----
.BANOVAgetRScale <- function(options, analysisType) {

  if (options[["priorSpecificationMode"]] == "acrossParameters") {

    rscaleFixed   <- options[["cauchyPriorScaleFixedEffects"]]
    rscaleRandom  <- options[["cauchyPriorScaleRandomEffects"]]
    rscaleCont    <- if (analysisType == "ANOVA") "medium" else options[["cauchyPriorScaleCovariates"]]
    rscaleEffects <- NULL

  } else {

    # NOTE: this still need a value otherwise BayesFactor returns NA for the bf
    # default values of lmBF
    rscaleFixed   <- "medium"
    rscaleRandom  <- "nuisance"
    rscaleCont    <- if (analysisType == "ANOVA") "medium" else options[["cauchyPriorScaleCovariates"]]

    rscaleEffectsNames <- vapply(options[["customPriorSpecification"]], FUN.VALUE = character(1L), function(x) {
      paste(x[["components"]], collapse = ":")
    })
    rscaleEffects <- vapply(options[["customPriorSpecification"]], FUN.VALUE = numeric(1L), `[[`, "scaleFixedEffects")

    rscaleEffectsKeep <- if (analysisType == "ANOVA") {
      rep(TRUE, length(rscaleEffects))
    } else {
      vapply(options[["customPriorSpecification"]], FUN.VALUE = logical(1L), function(x) {
        is.null(options[["covariates"]]) || !any(x[["components"]] %in% options[["covariates"]])
      })
    }

    rscaleEffects <- rscaleEffects[rscaleEffectsKeep]
    names(rscaleEffects) <- rscaleEffectsNames[rscaleEffectsKeep]

  }
  return(list(rscaleFixed = rscaleFixed, rscaleRandom = rscaleRandom, rscaleCont = rscaleCont, rscaleEffects = rscaleEffects))
}

.BANOVAgetShownTableSizeAndAddFootnote <- function(modelTable, nmodels, modelsShown, numModelsShown) {
  if (modelsShown == "unlimited") {
    shownTableSize <- nmodels
  } else {
    shownTableSize <- min(nmodels, numModelsShown)
    if (shownTableSize < nmodels)
      modelTable$addFootnote(gettextf("Showing the best %1$d out of %2$d models.", numModelsShown, nmodels))
  }
  return(shownTableSize)
}

# # .BANOVAupdateRscales <- function() {
#
#   # for some reason, BayesFactor supports custom rscales for but throws a error if you try to set them for continuous parameters
#   # this temporarily overwrite the stop call in this BayesFactor:::createRscales
#   # the function will throw an error if BayesFactor:::createRscales is updated
#
#   # originalCreateRscales <- BayesFactor:::createRscales
#   # originalBody <- body(originalCreateRscales)
#   # if (!identical(originalBody[[17]][[3]][[2]][[1]], quote(invisible))) {
#   #   if (!identical(originalBody[[17]][[3]][[2]][[1]], quote(stop)))
#   #     stop("Bayesfactor got an update, and createRscales/ .BANOVAupdateRscales needs to be fixed!", domain = NA)
#   #   originalBody[[17L]][[3L]][[2L]][[1L]] <- substitute(invisible)
#   #   newCreateRscales <- originalCreateRscales
#   #   body(newCreateRscales) <- originalBody
#   #
#   #   jaspBase::assignFunctionInPackage(newCreateRscales, "createRscales", "BayesFactor")
#   # }
#   #
#   # tmp <- methods::getMethod(BayesFactor::compare, list(numerator="BFlinearModel", denominator="missing", data="data.frame"))
#   # fun <- tmp@.Data
#   # newfun <- fun
#   # body <- body(fun)
#   # if (!identical(body[[19L]][[2L]][[2L]][[4L]][[2L]], quote(FALSE))) {
#   #   if (!identical(body[[19L]][[2L]][[2L]][[4L]][[2L]], quote(all(relevantDataTypes == "continuous"))))
#   #     stop("Bayesfactor got an update, and BayesFactor::`.__T__compare:BayesFactor`$`BFlinearModel#missing#data.frame`/ .BANOVAupdateRscales needs to be fixed!", domain = NA)
#   #   body[[19L]][[2L]][[2L]][[4L]][[2L]] <- substitute(FALSE) # do not take the fast path
#   #   body(newfun) <- body
#   #
#   #   # horrible but necessary
#   #   env <- environment()#getNamespace(jaspAnova)
#   #   assign("compare", BayesFactor::compare, envir = env)
#   #   setMethod("compare", signature(numerator = "BFlinearModel", denominator = "missing", data = "data.frame"), newfun, where = env)
#
#     # env <- getNamespace("BayesFactor")
#     # unlockBinding("compare", env)
#
#     # debugonce(setMethod)
#     # setMethod("compare", signature(numerator = "BFlinearModel", denominator = "missing", data = "data.frame"), newfun, where = env)
#
#     # tmp@.Data <- newfun
#     # s4env <- BayesFactor:::`.__T__compare:BayesFactor`
#     # unlockBinding("BFlinearModel#missing#data.frame", s4env)
#     # s4env$`BFlinearModel#missing#data.frame` <- tmp
#     # lockBinding("BFlinearModel#missing#data.frame", s4env)
#     # jaspBase::assignFunctionInPackage(s4env, ".__T__compare:BayesFactor", "BayesFactor")
#   # }
#
#   # return()
#
#   # we could reset the changes with on.exit, but I'm not sure if it's necessary and doing that cleanly requires withr::defer and adds a dependency
#   # do.call(on.exit({
#   #   jaspBase::assignFunctionInPackage(tempRscales[["originalCreateRscales"]], "createRscales", "BayesFactor")
#   #
#   #   s4env <- BayesFactor:::`.__T__compare:BayesFactor`
#   #   unlockBinding("BFlinearModel#missing#data.frame", s4env)
#   #   s4env$`BFlinearModel#missing#data.frame` <- newfun
#   #   lockBinding("BFlinearModel#missing#data.frame", s4env)
#   #   jaspBase::assignFunctionInPackage(s4env, ".__T__compare:BayesFactor", "BayesFactor")
#   # }, add = TRUE, after = FALSE)
# }

# Dependencies ----
.BANOVAdataDependencies <- function() {
  # seed is in here because basically everything depends on the seed
  c("dependent", "randomFactors", "covariates", "fixedFactors", "betweenSubjectFactors", "repeatedMeasuresFactors", "repeatedMeasuresCells", "modelTerms",
    "seed", "setSeed")
}
.BANOVAmodelSpaceDependencies <- function(modelPrior) {
  c("modelPrior", "betaBinomialParameterA", "betaBinomialParameterB", "wilsonParameterLambda", "castilloParamerU", "enforcePrincipleOfMarginalityFixedEffects", "enforcePrincipleOfMarginalityRandomSlopes",
    if (modelPrior == "custom") "customPriorSpecification"
  )
}
.BANOVArscaleDependencies <- function(priorSpecificationMode) {
  c("cauchyPriorScaleFixedEffects", "cauchyPriorScaleRandomEffects", "cauchyPriorScaleCovariates", "priorSpecificationMode",
    if (priorSpecificationMode == "perTerm") "customPriorSpecification"
  )
}

.BANOVAmodelPriorOptionsChanged <- function(state, options) {
  !identical(state[["modelPriorOptions"]][.BANOVAmodelSpaceDependencies(options[["modelPrior"]])], options[.BANOVAmodelSpaceDependencies(options[["modelPrior"]])])
}

.BANOVAmarginalityOptionsUnchanged <- function(state, options) {
  stateOpts <- state[["modelPriorOptions"]]
  stateOpts[["enforcePrincipleOfMarginalityFixedEffects"]] == options[["enforcePrincipleOfMarginalityFixedEffects"]] &&
    stateOpts[["enforcePrincipleOfMarginalityRandomSlopes"]] == options[["enforcePrincipleOfMarginalityRandomSlopes"]]
}

.BANOVAmodelBFTypeOrOrderChanged <- function(state, options) {
  nms <- c("fixedFactors", "modelTerms", "randomFactors", "covariates", "seed", "setSeed")
  nms <- intersect(names(options), nms) # excludes covariates for ANOVA
  identical(state[nms], options[nms])
}

.BANOVAmodelTermsUnchanged <- function(state, options) {
  identical(state[["modelTerms"]], options[["modelTerms"]])
}

# HF formulas ----
.BANOVAgetFormulaComponents <- function(x, what = c("components", "variables")) {
  what <- match.arg(what)
  if (what == "components") {
    return(colnames(attr(terms(x), "factors")))
  } else {
    return(all.vars(x)[-1L])
  }
}

.BANOVAgetModelTitlesWithAllTerms <- function(modelObjects, modelList, analysisType, hideNuisance) {

  if (hideNuisance)
    return(vapply(modelObjects, FUN = `[[`, FUN.VALUE = character(1L), "title"))

  nullModelName <- gettext("Null model")
  res <- gsub("(.*)~\\s+", "", vapply(modelList, function(x) if (is.null(x)) nullModelName else .BANOVAas.character.formula(x), character(1L)))
  if (analysisType == "RM-ANOVA")
    res <- gsub(.BANOVAsubjectName, "subject", res)
  res[res == ""] <- nullModelName
  return(jaspBase::gsubInteractionSymbol(res))
}

.BANOVAgenerateAllModelFormulas <- function(formula, nuisance = NULL, analysisType = "RM-ANOVA",
                                            enforcePrincipleOfMarginalityFixedEffects = TRUE,
                                            enforcePrincipleOfMarginalityRandomSlopes = FALSE,
                                            rmFactors = NULL, legacy = FALSE
                                            ) {

  # TODO: it might be nicer to represent the NULL model as
  # y ~ 1, rather than NULL. Right now the null model is a formula
  # when there are nuisance variables and otherwise NULL, which leads to a bunch
  # of annoying if (is.null(model[i])) exceptions.

  neverExclude <- paste0("^", nuisance, "$")
  if (!legacy && analysisType == "RM-ANOVA" && is.null(rmFactors))
    stop(".BANOVAgenerateAllModelFormulas called with invalid arguments: analysisType = \"RM-ANOVA\", legacy = FALSE, rmFactors = NULL", domain = NA)

  modelSpace <- try(BayesFactor::enumerateGeneralModels(
    formula,
    whichModels  = if (enforcePrincipleOfMarginalityFixedEffects) "withmain" else "all",
    neverExclude = neverExclude)
  )
  nuisanceRandomSlopes <- NULL

  if (isTryError(modelSpace))
    .quitAnalysis(gettextf("The following error occured in BayesFactor::enumerateGeneralModels: %s",
                           .extractErrorMessage(modelSpace)))

    if (analysisType == "RM-ANOVA" && !legacy) {
    # adjust the nuisance to include the random slopes, only done here because BayesFactor::enumerateGeneralModels
    # does not handle this properly

    allPossibleSlopes <- labels(stats::terms(stats::as.formula(paste("y~", paste(rmFactors, collapse = "*")))))
    # drop the most complex interaction
    allPossibleSlopes <- allPossibleSlopes[-length(allPossibleSlopes)]

    # if at least one interaction among the repeated measures is considered
    if (length(allPossibleSlopes) > 0L) {

      # add interaction with subject
      nuisanceRandomSlopes <- paste0(allPossibleSlopes, ":", .BANOVAsubjectName)

      if (enforcePrincipleOfMarginalityRandomSlopes) {

        for (i in seq_along(modelSpace)) {
          presentLabels <- labels(stats::terms(modelSpace[[i]]))
          termsToAddChar <- intersect(allPossibleSlopes, presentLabels)
          if (length(termsToAddChar) > 0L) {
            termsToAddChar <- paste0(termsToAddChar, ":", .BANOVAsubjectName)
            termsToAdd <- as.formula(paste("~ . +", paste0(termsToAddChar, collapse = " + ")))
            modelSpace[[i]] <- update.formula(modelSpace[[i]], new = termsToAdd)
          }
        }

      } else {

        termsToAdd <- as.formula(paste("~ . +", paste0(nuisanceRandomSlopes, collapse = " + ")))
        modelSpace <- lapply(modelSpace, update.formula, new = termsToAdd)
      }

      # for the reordering done below. termsToAdd always contains the most complex random effects
      formula    <- update.formula(formula, new = termsToAdd)

      # update nuisance
      nuisance <- c(nuisance, nuisanceRandomSlopes)

    }
  }

  if (is.null(nuisance)) {
    return(list(modelList = c(list(NULL), modelSpace), nuisance = nuisance, nuisanceRandomSlopes = nuisanceRandomSlopes))
  } else {
    # put the null-model first
    i <- length(modelSpace)
    modelSpace <- c(modelSpace[[i]], modelSpace[-i])

    # BayesFactor has the nasty habit of changing the order of interactions whenever one of the components is
    # a nuisance variable. Here we ensure the order matches that of the input formula (which matches the order a user entered the factors).
    # see also https://github.com/jasp-stats/jasp-test-release/issues/1181
    originalTerms  <- terms(formula)
    originalOrd    <- attr(originalTerms, "order")
    originalIdxNonInteraction <- which(originalOrd == 1L)

    originalLabels <- attr(originalTerms, "term.labels")
    originalLabelsPieces <- strsplit(originalLabels[originalOrd > 1L], ":")
    originalLabelsPiecesSorted <- lapply(originalLabelsPieces, sort)

    dependent <- all.vars(modelSpace[[1]])[1L]

    for (i in seq_along(modelSpace)) {

      term <- terms(modelSpace[[i]])
      ord  <- attr(term, "order") # interaction order
      idxNonInteraction <- which(ord == 1L)

      termLabels <- attr(term, "term.labels")
      termLabels[idxNonInteraction] <- originalLabels[originalIdxNonInteraction][match(termLabels[idxNonInteraction], originalLabels[originalIdxNonInteraction])]

      for (j in which(ord > 1L)) {
        labelPieces <- strsplit(termLabels[j], ":")[[1L]]
        sortedLabelPieces <- sort(labelPieces)
        for (k in seq_along(originalLabelsPieces))
          if (identical(sortedLabelPieces, originalLabelsPiecesSorted[[k]]))
            termLabels[j] <- paste(originalLabelsPieces[[k]], collapse = ":")
      }

      modelSpace[[i]] <- as.formula(paste(dependent, "~", paste(termLabels, collapse = " + ")))

    }

    return(list(modelList = modelSpace, nuisance = nuisance, nuisanceRandomSlopes = nuisanceRandomSlopes))
  }
}

.BANOVAcreateModelFormula <- function(dependent, modelTerms) {

  rhs <- "" # right hand side of the formula
  nuisance <- NULL
  effects <- NULL
  for (term in modelTerms) {

    comp <- term[["components"]]
    newValue <- paste(comp, collapse = ":")

    rhs <- if (rhs != "") paste0(rhs, " + ", newValue) else newValue

    if (!is.null(term[["isNuisance"]]) && term[["isNuisance"]]) {
      nuisance <- c(nuisance, newValue)
    } else {
      effects <- c(effects, newValue)
    }
  }
  model.formula <- formula(paste(dependent, "~", rhs))

  # this would be cleaner ideal if BayesFactor::enumerateGeneralModels would handle the nuisance properly.
  # if (isRMANOVA && !legacy) {
  #   randomSlopes <- paste0(rmFactors, ":", .BANOVAsubjectName)
  #   termsToAdd <- as.formula(paste("~ . +", paste0(randomSlopes, collapse = " + ")))
  #   model.formula <- update.formula(model.formula, termsToAdd)
  #   nuisance <- c(nuisance, randomSlopes)
  # }

  return(list(model.formula = model.formula, nuisance = nuisance, effects = effects))
}

.BANOVAgetModelFormulaFromBFobj <- function(BayesFactorObj, asCharacter = FALSE) {
  out <- BayesFactorObj$bf@numerator[[1L]]@identifier$formula
  if (asCharacter) {
    return(out)
  } else {
    return(as.formula(out))
  }
}

.BANOVAgetLevelInfo <- function(dataset, formula, dataTypes) {

  levelNames <- .BANOVAmakeLabelList(formula, dataset, dataTypes)
  levelCounts <- lengths(levelNames) # counts of the factors including interaction terms
  return(list(levelCounts = levelCounts, levelNames = levelNames))
}

.BANOVAmakeLabelList <- function(formula, data, dataTypes, unreduce = TRUE, columnFilter = NULL) {

  # from BayesFactor 0.9.12.4.2
  # identical to BayesFactor:::makeLabelList, except that we return a named list
  # with
  #   names:  the variables that make up a particular (interaction) effect
  #   values: the parameter names as returned by BayesFactor::posterior(...)

  terms = attr(terms(formula, data = data), "term.labels")
  if (!is.null(columnFilter))
    terms = terms[!BayesFactor:::filterVectorLogical(columnFilter, terms)]

  if (unreduce)
    dataTypes[dataTypes == "fixed"] = "random"

  labelList = lapply(terms, function(term, data, dataTypes) {
    effects = strsplit(term, ":", fixed = TRUE)[[1]]
    my.names = BayesFactor:::design.names.intList(effects, data, dataTypes)
    return(paste(term, "-", my.names, sep = ""))
  }, data = data, dataTypes = dataTypes)

  # this part is different from BayesFactor
  names(labelList) <- terms
  return(labelList)
}

.BANOVAgetDataTypes <- function(dataset, formula, whichRandom = NULL) {

  # from BayesFactor:::lmBF
  BayesFactor:::createDataTypes(
    formula,
    whichRandom = whichRandom,
    data = dataset,
    analysis = "lm"
  )
}

.BANOVAgetRandomFactors <- function(options, analysisType) {
  if (analysisType == "RM-ANOVA")
    return(.BANOVAsubjectName)
  else
    return(unlist(options[["randomFactors"]]))
}

.BANOVAgetLevelsFromParamNames <- function(names) {

  # NOTE: this works because base64 does not contain "-"
  # split on first "-"; 2 implies output of length 2, i.e., only split once
  out <- do.call(rbind, stringr::str_split(names, "-", 2L))

  # continous variables have as level name the variable name
  idx <- out[, 1L] == out[, 2L]
  out[idx, 2L] <- ""

  # change dots into spaces for aesthetic purposes
  out[, 2L] <- gsub(".", " ", out[, 2L], fixed = TRUE)
  colnames(out) <- c("parameter", "level")
  return(out)

}

.BANOVAas.character.formula <- function(x, ...) {
  # we could also extend the S3 function as.character
  Reduce(paste, trimws(deparse(x)))
}

.BANOVAreorderFormulas <- function(x) {

  # This function reorders the terms of a formula such that they are alphabetical
  # e.g., a ~ c + b + c:b becomes a ~ b + b:c
  # This is necessary because BayesFactor::enumerateGeneralModels always appends the nuisance terms
  # and a ~ b + c != a ~ c + b
  # so without this function the state does not get reused whenever a user modifies the nuisance terms
  if (is.null(x))
    return("NULL") # as a string so it can become one big character vector for match()

  s <- strsplit(attr(stats::terms.formula(x), "term.labels"), ":")
  for (i in which(lengths(s) > 1L))
    s[[i]] <- paste0(sort(s[[i]]), collapse = ":")
  return(paste(all.vars(x)[1L], "~", paste(sort(unlist(s)), collapse = " + ")))
}

.BANOVAreorderTerms <- function(x) {

  # This function reorders the terms of a formula such that they are alphabetical
  # it essentially does the same as .BANOVAreorderFormulas but expects x to be character
  # x should be a character string of the form c("a" "a:b"), so not a ~ b + c:d.
  # For example, output from `all.vars(formula)` or `.BANOVAgetFormulaComponents(formula)`.
  s <- strsplit(x, ":")
  for (i in which(lengths(s) > 1L))
    s[[i]] <- paste0(sort(s[[i]]), collapse = ":")
  return(unlist(s))
}

.BANOVAorderTermsByKnownOrder <- function(currentTerms, currentTermLabels, termLabelOrder) {

  # This function reorders the terms of a formula such that they follow the order of termLabelOrder
  # currentTerms:      output of stats::terms(formula)
  # currentTermLabels: output of labels(stats::formula(formula)), possibly after filtering out nuisance
  # termLabelOrder:    character vector of term labels in the desired order

  currentFactors <- attr(currentTerms, "factors")
  if (any(currentFactors > 1L)) { # TRUE implies may need to reorder some terms according to termLabelOrder
    for (j in grep(":", currentTermLabels, fixed = TRUE)) {
      tmp <- strsplit(currentTermLabels[j], ":", fixed = TRUE)[[1L]]

      for (order in termLabelOrder) {
        if (all(tmp %in% order) && length(tmp) == length(order)) {
          currentTermLabels[j] <- paste(order, collapse = ":")
          break
        }
      }
    }
  }
  return(currentTermLabels)
}

# Single Model Inference (SMI) ----
.BANOVAsmi <- function(jaspResults, dataset, options, model) {

  userWantsSMI <- any(unlist(options[c(
    "singleModelPosteriorPlot", "singleModelQqPlot", "singleModelRsqPlot", "singleModelEstimates", "singleModelCriTable"
  )]))
  if (!userWantsSMI)
    return()

  if (!is.null(jaspResults[["containerSingleModel"]])) {
    singleModelContainer <- jaspResults[["containerSingleModel"]]
  } else {
    singleModelContainer <- createJaspContainer(title = gettext("Single Model Inference"))
    singleModelContainer$dependOn(c(
      "singleModelTerms", "dependent", "samplingMethodMCMC", "samplesMCMC",
      "repeatedMeasuresCells", "seed", "setSeed",
      .BANOVArscaleDependencies(options[["priorSpecificationMode"]])
    ))

    jaspResults[["containerSingleModel"]] <- singleModelContainer
    singleModelContainer$position <- 7
  }

  singleModel <- jaspResults[["singleModelState"]]$object
  if (!is.null(model[["models"]]) && is.null(singleModel) && length(options$singleModelTerms) > 0L && userWantsSMI) {
    singleModel <- try(.BANOVAsmiSamplePosterior(dataset, options, model[["analysisType"]]))
    if (isTryError(singleModel)) {
      singleModelContainer$setError(gettextf("Error in single model inference:\n%s", singleModel))
      singleModel <- NULL
    } else {
      singleModelState <- createJaspState(object = singleModel)
      singleModelState$dependOn(optionsFromObject = singleModelContainer)
      jaspResults[["singleModelState"]] <- singleModelState
    }
  }

  .BANOVAsmiEstimates(singleModelContainer, options, singleModel)
  .BANOVAsmiRsquared (singleModelContainer, options, singleModel)
  .BANOVAsmiQqPlot   (singleModelContainer, options, singleModel)
  .BANOVAsmiRsqPlot  (singleModelContainer, options, singleModel)

  .BANOVAsmiPosteriorPlot(singleModelContainer, dataset, options, singleModel)

  return()

}

.BANOVAsmiSamplePosterior <- function(dataset, options, analysisType) {

  nIter <- if (options[["samplingMethodMCMC"]] == "auto") 1e3L else options[["samplesMCMC"]]
  modelTerms <- options$singleModelTerms

  dependent     <- options$dependent
  if (analysisType == "RM-ANOVA") {
    modelTerms[[length(modelTerms) + 1L]] <- list(components = .BANOVAsubjectName, isNuisance = TRUE)
    dependent <- .BANOVAdependentName
  }

  tempRScale    <- .BANOVAgetRScale(options, analysisType)
  rscaleFixed   <- tempRScale[["rscaleFixed"]]
  rscaleRandom  <- tempRScale[["rscaleRandom"]]
  rscaleCont    <- tempRScale[["rscaleCont"]]
  rscaleEffects <- tempRScale[["rscaleEffects"]]

  formula <- .BANOVAcreateModelFormula(dependent, modelTerms)$model.formula
  randomFactors <- .BANOVAgetRandomFactors(options, analysisType)
  dataTypes     <- .BANOVAgetDataTypes(dataset, formula, randomFactors)
  levelInfo     <- .BANOVAgetLevelInfo(dataset, formula, dataTypes)

  # if all variables are continuous, then continuous variables keep their name.   E.g., "contGamma" stays   "contGamma".
  # if any variables are factors,    then continuous variables names are doubled. E.g., "contGamma" becomes "contGamma-contGamma".
  allParamNames <- c(
    "mu",
    if (all(dataTypes == "continuous")) names(levelInfo$levelNames) else unlist(levelInfo$levelNames)
  )

  .setSeedJASP(options)
  samples <- BayesFactor::lmBF(
    formula       = formula,
    data          = dataset,
    whichRandom   = unlist(randomFactors),
    progress      = TRUE,
    posterior     = TRUE,
    rscaleFixed   = rscaleFixed,
    rscaleRandom  = rscaleRandom,
    rscaleCont    = rscaleCont,
    rscaleEffects = rscaleEffects,
    iterations    = nIter
  )

  types <- samples@model@dataTypes

  # keep only relevant columns, drop sig2, g_xxx, ...
  idx <- match(allParamNames, colnames(samples), nomatch = 0L)
  samples <- samples[, idx, drop = FALSE]

  # for some odd reason, Bayesfactor uses as column name contcor1-contcor1
  # if there is a covariate AND fixed factors, but only contcor1 if all variables are continuous...
  allContinuous <- all(types == "continuous")
  if (allContinuous) {
    cnms <- colnames(samples)[-1L] # omit the intercept (mu) which is not changed by Bayesfactor
    colnames(samples)[-1L] <- paste0(cnms, "-", cnms)
  }

  # find out which parameters are random
  isRandom <- logical(ncol(samples))
  idx <- which(types == "random")
  for (i in idx)
    isRandom <- isRandom | startsWith(colnames(samples), names(types)[i])

  means <- colMeans(samples)
  sds <- matrixStats::colSds(samples)
  names(means) <- names(sds) <- colnames(samples)

  h <- (1 - options[["credibleInterval"]]) / 2
  probs <- c(h, 1-h)
  cri <- matrixStats::colQuantiles(samples, probs = probs)

  densities <- .BANOVAfitDensity(samples, 2^9, FALSE)

  tmp  <- .BANOVAgetSMIResidRsq(samples, dataset, formula, nIter, options = options)
  residmeans  <- rowMeans(tmp$resid)
  residquants <- matrixStats::rowQuantiles(tmp$resid, probs = c(h, 1-h))

  # all information for q-q plot of residuals
  residSumStats <- matrix(c(residmeans, residquants), nrow = length(residmeans), ncol = 3L,
                          dimnames = list(NULL, c("mean", "cri.2.5%", "cri.97.5%")))

  # all information for r-squared density plot
  rsqDens <- density(tmp$rsq, n = 2^11, from = 0, to = 1)
  rsqCri <- quantile(tmp$rsq, probs = probs)
  rsqMean <- mean(tmp$rsq)

  return(list(
    means = means, sds = sds, CRIs = cri, densities = densities$fit,
    residSumStats = residSumStats, rsqDens = rsqDens, rsqCri = rsqCri, rsqMean = rsqMean,
    allContinuous = allContinuous, isRandom = isRandom
  ))
}

.BANOVAsmiEstimates <- function(jaspContainer, options, model) {

  if (!is.null(jaspContainer[["SMItablePosteriorEstimates"]]) || !options[["singleModelEstimates"]])
    return()

  estsTable <- createJaspTable(title = gettext("Single Model Posterior Summary"))
  estsTable$position <- 1
  estsTable$dependOn(c("singleModelEstimates", "credibleInterval"))

  overTitle <- gettextf("%s%% Credible Interval", format(100 * options[["credibleInterval"]], digits = 3))
  estsTable$addColumnInfo(name = "Variable", title = gettext("Variable"), type = "string")
  estsTable$addColumnInfo(name = "Level",    title = gettext("Level"),    type = "string")
  estsTable$addColumnInfo(name = "Mean",     title = gettext("Mean"),     type = "number")
  estsTable$addColumnInfo(name = "SD",       title = gettext("SD"),       type = "number")
  estsTable$addColumnInfo(name = "Lower",    title = gettext("Lower"),    type = "number", overtitle = overTitle)
  estsTable$addColumnInfo(name = "Upper",    title = gettext("Upper"),    type = "number", overtitle = overTitle)

  if (!(is.null(model) || estsTable$getError())) {
    .BANOVAfillEstimatesTable(
      jaspTable   = estsTable,
      mus         = model$means,
      sds         = model$sds,
      cri         = model$CRIs,
      hasNoLevels = model$allContinuous,
      isRandom    = model$isRandom
    )
  }
  jaspContainer[["SMItablePosteriorEstimates"]] <- estsTable
  return()

}

.BANOVAsmiPosteriorPlot <- function(jaspContainer, dataset, options, model) {

  # meta wrapper for model averaged posterior plots, single model posterior plots, and Q-Q plots
  if (!is.null(jaspContainer[["SMIposteriorPlot"]]) || !options$singleModelPosteriorPlot)
    return()

  posteriorPlotContainer <- createJaspContainer(title = gettext("Single Model Posterior Distributions"))
  jaspContainer[["SMIposteriorPlot"]] <- posteriorPlotContainer
  posteriorPlotContainer$position <- 2
  posteriorPlotContainer$dependOn(c("singleModelPosteriorPlot", "singleModelGroupPosterior"))
  if (is.null(model) || posteriorPlotContainer$getError()) {
    posteriorPlotContainer[["dummyplot"]] <- createJaspPlot(title = gettext("Posterior distribution"), width = 400, height = 400,
                                                            plot = NULL)
  } else {
    .BANOVAfillPosteriorPlotContainer(
      container       = posteriorPlotContainer,
      densities       = model$densities[, -1L, ], # omit intercept
      cris            = model$CRIs[-1L, ],        # omit intercept
      isRandom        = model$isRandom[-1L],      # omit intercept
      groupParameters = identical(options[["groupPosterior"]], "grouped")
    )
  }
  return()
}

.BANOVAsmiQqPlot <- function(jaspContainer, options, model) {

  if (!is.null(jaspContainer[["QQplot"]]) || !options$singleModelQqPlot)
    return()

  if (is.null(model) || jaspContainer$getError()) {
    p <- NULL
  } else {
    p <- jaspGraphs::plotQQnorm(
      residuals = model$residSumStats[,"mean"],
      lower     = model$residSumStats[,"cri.2.5%"],
      upper     = model$residSumStats[,"cri.97.5%"],
      ablineColor = "darkred"
    )
  }
  plot <- createJaspPlot(
    title       = gettext("Q-Q Plot"),
    width       = 400,
    height      = 400,
    plot        = p,
    aspectRatio = 1
  )
  plot$dependOn("singleModelQqPlot")
  plot$position <- 3
  jaspContainer[["QQplot"]] <- plot
  return()
}

.BANOVAsmiRsqPlot <- function(jaspContainer, options, model) {

  if (!is.null(jaspContainer[["smirsqplot"]]) || !options$singleModelRsqPlot)
    return()

  if (is.null(model) || jaspContainer$getError()) {
    p <- NULL
  } else {
    dd     <- model$rsqDens
    rsqCri <- model$rsqCri
    df     <- data.frame(x = dd$x, y = dd$y)
    xName <- expression(R^2)
    p <- jaspGraphs::PlotPriorAndPosterior(dfLines = df, xName = xName, CRI = rsqCri, drawCRItxt = FALSE)
  }
  plot <- createJaspPlot(
    title       = gettextf("Posterior R%s", "\u00B2"),
    width       = 400,
    height      = 400,
    plot        = p,
    aspectRatio = 1
  )
  plot$dependOn("singleModelRsqPlot")
  plot$position <- 4
  jaspContainer[["smirsqplot"]] <- plot
  return()
}

.BANOVAsmiRsquared <- function(jaspResults, options, model) {

  if (!is.null(jaspResults[["tableSMICRI"]]) || !options[["singleModelCriTable"]])
    return()

  criTable <- createJaspTable(title = gettextf("Single Model R%s", "\u00B2"))
  criTable$position <- 3.5
  criTable$dependOn(c("singleModelCriTable", "credibleInterval"))

  overTitle <- gettextf("%s%% Credible Interval", format(100 * options[["credibleInterval"]], digits = 3))
  criTable$addColumnInfo(name = "rsq",   title = "",               type = "string")
  criTable$addColumnInfo(name = "Mean",  title = gettext("Mean"),  type = "number")
  criTable$addColumnInfo(name = "Lower", title = gettext("Lower"), type = "number", overtitle = overTitle)
  criTable$addColumnInfo(name = "Upper", title = gettext("Upper"), type = "number", overtitle = overTitle)

  if (!is.null(model)) {
    cri <- model[["rsqCri"]]
    df <- data.frame(
      rsq   = "R\u00B2",
      Mean  = model[["rsqMean"]],
      Lower = cri[1L],
      Upper = cri[2L],
      row.names = NULL
    )
    criTable$setData(df)
  } else {
    criTable[["rsq"]] <- "R\u00B2"
  }
  jaspResults[["tableSMICRI"]] <- criTable
  return()

}

# Citations ----
.BANOVAcitations <- c(
  "MoreyEtal2015"    = "Morey, R. D. & Rouder, J. N. (2015). BayesFactor (Version 0.9.10-2)[Computer software].",
  "RouderEtal2012"   = "Rouder, J. N., Morey, R. D., Speckman, P. L., Province, J. M., (2012) Default Bayes Factors for ANOVA Designs. Journal of Mathematical Psychology. 56. p. 356-374.",
  "Jeffreys1938"     = "Jeffreys, H. (1938). Significance tests when several degrees of freedom arise simultaneously. Proceedings of the Royal Society of London. Series A, Mathematical and Physical Sciences, 165, 161-198.",
  "WestfallEtal1997" = "Westfall, P. H., Johnson, W. O., & Utts, J. M. (1997). A Bayesian perspective on the Bonferroni adjustment. Biometrika, 84, 419-427."
)
jasp-stats/jaspAnova documentation built on June 14, 2024, 6:48 p.m.