#
# Copyright (C) 2013-2022 University of Amsterdam
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Abbreviations:
# HF = helper function
.BANOVArunAnalysis <- function(jaspResults, dataset, options, analysisType = c("ANOVA", "ANCOVA", "RM-ANOVA")) {
# the main workhorse of the Bayesian ANOVA, ANCOVA, and Repeated Measures.
analysisType <- match.arg(analysisType)
dataset <- .BANOVAreadData(dataset, options, analysisType)
if (isTryError(dataset))
.quitAnalysis(gettext("Error while loading data. Please verify your repeated measures observations."))
dataInfo <- .BANOVAerrorhandling(dataset, options, analysisType)
model <- .BANOVAestimateModels(jaspResults, dataset, options, dataInfo, analysisType)
model[["posteriors"]] <- .BANOVAestimatePosteriors(jaspResults, dataset, options, model)
.BANOVAeffectsTable (jaspResults, options, model)
.BANOVAestimatesTable(jaspResults, options, model)
.BANOVArsquaredTable (jaspResults, options, model)
# model averaged plots
.BANOVAposteriorPlot(jaspResults, dataset, options, model)
.BANOVAqqplot (jaspResults, options, model)
.BANOVArsqplot (jaspResults, options, model)
.BANOVAnullControlPostHocTable(jaspResults, dataset, options, model)
# single model plots and tables
.BANOVAsmi(jaspResults, dataset, options, model)
# descriptives
.BANOVAdescriptives(jaspResults, dataset, options, dataInfo, analysisType)
return()
}
.BANOVAerrorhandling <- function(dataset, options, analysisType) {
customChecks <- list()
if (analysisType != "RM-ANOVA") {
hasDV <- options$dependent != ""
hasIV <- any(lengths(options[c("fixedFactors", "covariates")]) != 0)
fixed <- options$fixedFactors
noVariables <- !(hasDV && hasIV)
target <- c(options$covariates, options$dependent)
# check if the last model has an interaction effect
mostComplexModel <- NULL
if (length(options[["modelTerms"]]) > 0L) {
mostComplexModel <- options[["modelTerms"]][[length(options[["modelTerms"]])]][["components"]]
mostComplexModel <- intersect(mostComplexModel, fixed)
}
if (length(mostComplexModel) > 1L)
customChecks[["missingInteractionCells"]] <- .BANOVAmissingInteractionCells
if (!noVariables) {
.hasErrors(
dataset = dataset,
type = c("infinity", "observations", "variance", "factorLevels", "duplicateColumns"),
infinity.target = target,
variance.target = target,
observations.target = target,
observations.amount = paste("<", length(options$modelTerms) + 1),
factorLevels.target = fixed,
factorLevels.amount = " < 2",
# custom checks
custom = customChecks,
missingInteractionCells.target = mostComplexModel,
exitAnalysisIfErrors = TRUE
)
}
} else {
hasDV <- !any(options$repeatedMeasuresCells == "")
hasIV <- any(lengths(options[c("betweenSubjectFactors", "covariates")]) != 0)
fixed <- unlist(options$betweenSubjectFactors)
covariates <- unlist(options$covariates)
noVariables <- !hasDV
target <- c(covariates, fixed)
# check if the last model has an interaction effect
mostComplexModel <- NULL
if (length(options[["modelTerms"]]) > 0L) {
mostComplexModel <- options[["modelTerms"]][[length(options[["modelTerms"]])]][["components"]]
mostComplexModel <- intersect(mostComplexModel, fixed)
}
if (length(mostComplexModel) > 1L)
customChecks[["missingInteractionCells"]] <- .BANOVAmissingInteractionCells
if (length(target) > 0L) {
.hasErrors(
dataset = dataset,
type = c("infinity", "observations", "variance", "factorLevels", "duplicateColumns"),
infinity.target = target,
variance.target = covariates,
observations.target = target,
observations.amount = paste("<", length(options$modelTerms) + 1),
factorLevels.target = fixed,
factorLevels.amount = " < 2",
duplicateColumns.target = target,
# custom checks
custom = customChecks,
missingInteractionCells.target = mostComplexModel,
exitAnalysisIfErrors = TRUE
)
}
if (!noVariables) {
# messages from commonmessages.R
customChecks <- list(
infinity = function() {
x <- dataset[, .BANOVAdependentName]
if (any(is.infinite(x)))
return(gettext("Infinity found in repeated measures cells."))
return(NULL)
},
observations = function() {
x <- dataset[, .BANOVAdependentName]
nObs <- length(options$modelTerms) + 1
if (length(na.omit(x)) <= nObs)
return(gettextf("Number of observations is < %s in repeated measures cells", nObs))
return(NULL)
},
variance = function() {
x <- dataset[, .BANOVAdependentName]
validValues <- x[is.finite(x)]
variance <- 0
if (length(validValues) > 1)
variance <- stats::var(validValues)
if (variance == 0)
return(gettext("The variance in the repeated measures cells is 0."))
return(NULL)
},
duplicateColumns = function() {
cols <- c(.BANOVAsubjectName, target)
if (length(cols) == 1L)
return()
datasetList <- as.list(dataset[, cols])
duplicatedCols <- duplicated(datasetList) | duplicated(datasetList, fromLast = TRUE)
if (any(duplicatedCols)) {
if (duplicatedCols[1L]) {
msg <- gettextf("Duplicate variables encountered in repeated measures cells, %s",
paste(target[duplicatedCols[-1L]], collapse = ", "))
} else {
msg <- gettextf("Duplicate variables encountered in %s",
paste(target[duplicatedCols[-1L]], collapse = ", "))
}
return(msg)
}
return(NULL)
},
redundantParticipantColumn = function() {
if (nlevels(dataset[[.BANOVAsubjectName]]) == nlevels(dataset[[target]])) {
return(gettextf("Duplicate participant column added to model terms, %s", target))
} else {
return()
}
}
)
.hasErrors(
dataset = dataset,
target = target,
custom = customChecks,
exitAnalysisIfErrors = TRUE
)
}
}
return(list(noVariables = noVariables, hasIV = hasIV, hasDV = hasDV))
}
.BANOVAmissingInteractionCells <- function(dataset, target) {
# sorts a data frame by all its columns
sortDataFrame <- function(df) {
df[do.call(order, df), ]
}
uniqueObservedCombinations <- sortDataFrame(unique(dataset[, target, drop = FALSE]))
uniqueExpectedCombinations <- sortDataFrame(do.call(expand.grid, c(lapply(uniqueObservedCombinations, unique), stringsAsFactors = FALSE)))
# if the rows match then it must be
if (nrow(uniqueObservedCombinations) == nrow(uniqueExpectedCombinations))
return(NULL)
# find missing combinations
observedStrings <- unname(apply(uniqueObservedCombinations, 1L, paste, collapse = jaspBase::interactionSymbol))
expectedStrings <- unname(apply(uniqueExpectedCombinations, 1L, paste, collapse = jaspBase::interactionSymbol))
missingStrings <- setdiff(expectedStrings, observedStrings)
missingStringsCollapsed <- paste(missingStrings[1:min(length(missingStrings), 10)], collapse = ", ")
interactionVariable <- paste(colnames(uniqueObservedCombinations), collapse = jaspBase::interactionSymbol)
if (length(missingStrings) <= 10) {
return(gettextf(
"The interaction effect of %1$s has no observations for the combination(s): %2$s. Please adjust the model and remove any interactions with missing cells",
interactionVariable, missingStringsCollapsed
))
} else {
return(gettextf(
"The interaction effect of %1$s has no observations for the combination(s): %2$s. Only the first 10 missing combinations are shown. Please adjust the model and remove any interactions with missing cells",
interactionVariable, missingStringsCollapsed
))
}
}
# model comparison ----
.BANOVAestimateModels <- function(jaspResults, dataset, options, errors,
analysisType = c("ANOVA", "ANCOVA", "RM-ANOVA")) {
# also makes the model comparison table
stateObj <- jaspResults[["tableModelComparisonState"]]$object
if (!is.null(jaspResults[["tableModelComparison"]]) && !is.null(stateObj)) {
stateObj$completelyReused <- TRUE # means that posteriors won't need to be resampled
return(stateObj)
} else if (errors$noVariables) {
modelTable <- .BANOVAinitModelComparisonTable(options)
jaspResults[["tableModelComparison"]] <- modelTable
return(list(analysisType = analysisType))
} else if (!is.null(stateObj) &&
.BANOVAmodelTermsUnchanged(stateObj, options) &&
.BANOVAmarginalityOptionsUnchanged(stateObj, options) &&
(.BANOVAmodelBFTypeOrOrderChanged(stateObj, options) || identical(stateObj[["shownTableSize"]], options[c("modelsShown", "numModelsShown")]))) {
# if the statement above is TRUE then no new variables were added (or seed changed)
# and the only change is in the Bayes factor type, the ordering, or the number of models shown
modelTable <- .BANOVAinitModelComparisonTable(options)
modelTable[["Models"]] <- .BANOVAgetModelTitlesWithAllTerms(stateObj[["models"]], stateObj[["model.list"]], analysisType, options[["hideNuisanceParameters"]])
internalTable <- stateObj$internalTableObj$internalTable
nmodels <- nrow(internalTable)
.BANOVAestimateModels
shownTableSize <- .BANOVAgetShownTableSizeAndAddFootnote(modelTable, nmodels, options[["modelsShown"]], options[["numModelsShown"]])
if (.BANOVAmodelPriorOptionsChanged(stateObj, options)) {
priorProbs <- .BANOVAcomputePriorModelProbs(stateObj$model.list, stateObj$nuisance, options)
internalTable[["P(M)"]] <- priorProbs
stateObj$completelyReused <- FALSE # model-averaged posteriors need to be resampled
} else {
stateObj$completelyReused <- TRUE # model-averaged posteriors do not need to be resampled
}
internalTableObj <- .BANOVAfinalizeInternalTable(options, internalTable)
stateObj$postProbs <- internalTableObj$internalTable[["P(M|data)"]]
stateObj$priorProbs <- internalTableObj$internalTable[["P(M)"]]
stateObj$internalTableObj <- internalTableObj
modelTable$setData(internalTableObj$table[seq_len(shownTableSize), ])
jaspResults[["tableModelComparison"]] <- modelTable
return(stateObj)
}
modelTerms <- options$modelTerms
dependent <- options$dependent
if (analysisType == "RM-ANOVA") {
modelTerms[[length(modelTerms) + 1L]] <- list(components = .BANOVAsubjectName, isNuisance = TRUE)
dependent <- .BANOVAdependentName
}
fixedFactors <- options$fixedFactors
randomFactors <- .BANOVAgetRandomFactors(options, analysisType)
tempRScale <- .BANOVAgetRScale(options, analysisType)
rscaleFixed <- tempRScale[["rscaleFixed"]]
rscaleRandom <- tempRScale[["rscaleRandom"]]
rscaleCont <- tempRScale[["rscaleCont"]]
rscaleEffects <- tempRScale[["rscaleEffects"]]
tmp <- .BANOVAcreateModelFormula(dependent, modelTerms)
model.formula <- tmp$model.formula
nuisance <- tmp$nuisance
effects <- tmp$effects
if (analysisType == "RM-ANOVA") {
legacy <- options[["legacyResults"]]
rmFactors <- vapply(options[["repeatedMeasuresFactors"]], `[[`, character(1L), "name")
} else {
legacy <- FALSE
rmFactors <- NULL
}
#Make a list of models to compare
temp <- .BANOVAgenerateAllModelFormulas(
formula = model.formula,
nuisance = nuisance,
analysisType = analysisType,
enforcePrincipleOfMarginalityFixedEffects = options[["enforcePrincipleOfMarginalityFixedEffects"]],
enforcePrincipleOfMarginalityRandomSlopes = options[["enforcePrincipleOfMarginalityRandomSlopes"]],
rmFactors = rmFactors,
legacy = legacy
)
model.list <- temp$modelList
nuisance <- temp$nuisance
nuisanceRandomSlopes <- temp$nuisanceRandomSlopes
if (length(model.list) == 1L) {
modelTable <- .BANOVAinitModelComparisonTable(options)
modelTable$setError(gettext("Bayes factor is undefined -- all effects are specified as nuisance."))
jaspResults[["tableModelComparison"]] <- modelTable
return(list(analysisType = analysisType))
}
# TODO: discuss if we want a hard limit if length(model.list) > ...
# that would avoid peoples computers from slowing down tremendously.
# JCG: A hard limit would be a bit cruel right? How about a warning instead? (in QML already?)
#Run all other models and store the results in the model.object
neffects <- length(effects)
nmodels <- length(model.list)
modelObject <- vector("list", nmodels)
reorderedEffects <- .BANOVAreorderTerms(effects)
reorderedNuisance <- if (is.null(nuisance)) NULL else .BANOVAreorderTerms(nuisance)
if (nmodels > 0L && neffects > 0L) {
# effects.matrix contains for each row (model), which predictors (columns) are in the model
# interactions.matrix contains for each predictor (column) what predictors (rows) is it composed of
interactions.matrix <- .BANOVAcomputeInteractionsMatrix(effects)
effects.matrix <- matrix(data = FALSE, nrow = nmodels, ncol = neffects)
colnames(effects.matrix) <- effects
for (m in seq_len(nmodels)) {
modelObject[[m]] <- list()
if (m == 1L) {
if (is.null(nuisance)) { # intercept only
modelObject[[m]]$title <- gettext("Null model")
next # all effects are FALSE anyway
} else {
if (analysisType == "RM-ANOVA" && !legacy) {
tempNuisance <- setdiff(nuisance, nuisanceRandomSlopes)
modelObject[[m]]$title <- sprintf(ngettext(
length(tempNuisance),
"Null model (incl. %s and random slopes)",
"Null model (incl. %s, and random slopes)"
), paste(.BANOVAdecodeNuisance(tempNuisance), collapse = ", "))
} else {
modelObject[[m]]$title <- gettextf("Null model (incl. %s)", paste(.BANOVAdecodeNuisance(nuisance), collapse = ", "))
}
}
}
model.effects <- .BANOVAgetFormulaComponents(model.list[[m]])
reorderedModelEffects <- .BANOVAreorderTerms(model.effects)
idx <- match(reorderedModelEffects, reorderedEffects, nomatch = 0L)
idx <- idx[!is.na(idx)]
effects.matrix[m, idx] <- TRUE
if (m > 1L) {
model.title <- model.effects[match(reorderedModelEffects, reorderedNuisance, 0L) == 0L]
modelObject[[m]]$title <- jaspBase::gsubInteractionSymbol(paste(model.title, collapse = " + "))
}
}
rownames(effects.matrix) <- sapply(modelObject, `[[`, "title")
}
modelTable <- .BANOVAinitModelComparisonTable(options)
modelNames <- .BANOVAgetModelTitlesWithAllTerms(modelObject, model.list, analysisType, options[["hideNuisanceParameters"]])
shownTableSize <- .BANOVAgetShownTableSizeAndAddFootnote(modelTable, nmodels, options[["modelsShown"]], options[["numModelsShown"]])
modelTable[["Models"]] <- modelNames[seq_len(shownTableSize)]
jaspResults[["tableModelComparison"]] <- modelTable
# internalTable is an internal representation of the model comparison table
# internalTable <- matrix(NA, nmodels, 6L, dimnames = list(modelNames, c("Models", "P(M)", "P(M|data)", "BFM", "BF10", "error %")))
internalTable <- data.frame(
Models = character(nmodels),
`P(M)` = numeric(nmodels),
`P(M|data)` = numeric(nmodels),
BFM = numeric(nmodels),
BF10 = numeric(nmodels),
`error %` = numeric(nmodels),
check.names = FALSE
)
# set model names, BF null model and p(M)
internalTable[["Models"]] <- modelNames
internalTable[1L, "BF10"] <- 0
internalTable[["P(M)"]] <- .BANOVAcomputePriorModelProbs(model.list, nuisance, options)
#Now compute Bayes Factors for each model in the list, and populate the tables accordingly
startProgressbar(nmodels, gettext("Computing Bayes factors"))
# check if any models can be resued from the state
if (!is.null(stateObj[["modelTerms"]])) {
oldFormulas <- sapply(stateObj$model.list, .BANOVAreorderFormulas)
newFormulas <- sapply(model.list, .BANOVAreorderFormulas)
reuseable <- match(newFormulas, oldFormulas)
} else {
reuseable <- rep(NA, nmodels)
}
if (analysisType == "RM-ANOVA") {
# the default null-model contains subject
anyNuisance <- TRUE
} else {
# the default null-model is intercept only
anyNuisance <- length(nuisance) > 0L
}
# without these there is no error
bfIterations <- if (options[["samplingMethodNumericAccuracy"]] == "auto") 1e4L else options[["samplesNumericAccuracy"]]
if (options[["integrationMethod"]] == "automatic") {
bfIntegrationMethod <- "auto"
} else {
bfIntegrationMethod <- "laplace"
modelTable$addFootnote(gettext("The Laplace approximation is faster but also less accurate than the automatic method. This is fine when exploring the data and obtaining a rough estimate. However, we recommend using the automatic integration method to obtain the final results."))
modelTable$addFootnote(gettext("The Laplace approximation does not always provide an error estimate."), colNames = "error %")
}
for (m in seq_len(nmodels)) {
# loop over all models, where the first is the null-model and the last the most complex model
if (is.na(reuseable[m])) {
if (!is.null(model.list[[m]])) {
.setSeedJASP(options)
bf <- try(BayesFactor::lmBF(
formula = model.list[[m]],
data = dataset,
whichRandom = randomFactors,
progress = FALSE,
posterior = FALSE,
rscaleFixed = rscaleFixed,
rscaleRandom = rscaleRandom,
rscaleCont = rscaleCont,
rscaleEffects = rscaleEffects,
iterations = bfIterations,
method = bfIntegrationMethod
))
if (isTryError(bf)) {
modelName <- strsplit(.BANOVAas.character.formula(model.list[[m]]), "~ ")[[1L]][2L]
.quitAnalysis(gettextf("Bayes factor could not be computed for model: %1$s.\nThe error message was: %2$s.",
.BANOVAdecodeSubject(modelName), .extractErrorMessage(bf)))
} else {
# delete the data object -- otherwise it gets saved in the state
bf@data <- data.frame()
}
} else {
bf <- NULL
}
} else {
bf <- stateObj$models[[reuseable[m]]]$bf
}
modelObject[[m]]$bf <- bf
if (!is.null(bf)) {
# if (isTryError(bf)) {
# message <- .extractErrorMessage(bf)
# modelObject[[m]]$error.message <- paste("Bayes factor computation failed with error:", message)
# } else {
# bfObj can have a modified denominator, but the saved objects are always compared against intercept only
bfObj <- modelObject[[m]]$bf
if (anyNuisance)
bfObj <- bfObj / modelObject[[1L]]$bf
internalTable[m, "BF10"] <- bfObj@bayesFactor[, "bf"] # always LogBF10!
internalTable[m, "error %"] <- bfObj@bayesFactor[, "error"] # always NA if bfIntegrationMethod == "laplace"
# }
# disable filling of Bayes factor column (see https://github.com/jasp-stats/jasp-test-release/issues/1018 for discussion)
# modelTable[["BF10"]] <- .recodeBFtype(internalTable[["BF10"]],
# newBFtype = options$bayesFactorType,
# oldBFtype = "LogBF10")
modelTable[["error %"]] <- internalTable[seq_len(shownTableSize), "error %"]
}
progressbarTick()
}
internalTableObj <- .BANOVAfinalizeInternalTable(options, internalTable)
modelTable$setData(internalTableObj$table[seq_len(shownTableSize), ])
if (length(internalTableObj[["footnotes"]]) > 0L) {
idxRow <- internalTableObj[["footnotes"]][["rows"]]
idxCol <- internalTableObj[["footnotes"]][["cols"]]
tb <- internalTableObj$table
modelTable$addFootnote(message = internalTableObj[["footnotes"]][["message"]],
symbol = " <sup>✝</sup>", # latin cross
rowNames = rownames(tb)[idxRow],
colNames = colnames(tb)[idxCol])
}
if (anyNuisance) {
if (analysisType == "RM-ANOVA" && !options[["legacyResults"]]) {
message <- gettextf("All models include %s, and random slopes for all repeated measures factors.",
paste0(.BANOVAdecodeNuisance(setdiff(nuisance, nuisanceRandomSlopes)), collapse = ", "))
} else {
if (analysisType == "RM-ANOVA")
modelTable$addFootnote(message = gettext("Random slopes for repeated measures factors are excluded."), symbol = .BANOVAGetWarningSymbol())
message <- gettextf("All models include %s.", paste0(.BANOVAdecodeNuisance(nuisance), collapse = ", "))
}
modelTable$addFootnote(message = message)
}
model <- list(
models = modelObject,
postProbs = internalTableObj$internalTable[["P(M|data)"]],
priorProbs = internalTableObj$internalTable[["P(M)"]],
internalTableObj = internalTableObj,
effects = effects.matrix,
interactions.matrix = interactions.matrix,
nuisance = nuisance,
nuisanceRandomSlopes = nuisanceRandomSlopes,
analysisType = analysisType,
# these are necessary for partial reusage of the state (e.g., when a fixedFactor is added/ removed)
model.list = model.list,
fixedFactors = fixedFactors,
randomFactors = if (analysisType == "RM-ANOVA") randomFactors else options[["randomFactors"]], # stored because they are modified in RM-ANOVA
modelTerms = modelTerms,
covariates = if (analysisType == "ANOVA") NULL else options[["covariates"]],
seed = options[["seed"]],
setSeed = options[["setSeed"]],
reuseable = reuseable,
RMFactors = options[["repeatedMeasuresFactors"]],
modelPriorOptions = options[.BANOVAmodelSpaceDependencies(options[["modelPrior"]])],
hideNuisanceEffects = options[["hideNuisanceEffects"]],
shownTableSize = options[c("modelsShown", "numModelsShown")]
)
# save state
stateObj <- createJaspState(object = model, dependencies = c(
# does NOT depend on any factors or covariates, to facilitate reusing previous models
"dependent", "repeatedMeasuresCells", "samplingMethodNumericAccuracy", "samplesNumericAccuracy", "seed", "setSeed",
.BANOVArscaleDependencies(options[["priorSpecificationMode"]])
))
jaspResults[["tableModelComparisonState"]] <- stateObj
return(model)
}
.BANOVAeffectsTable <- function(jaspResults, options, model) {
if (!is.null(jaspResults[["tableEffects"]]) || !options[["effects"]])
return()
# isTRUE should handle a state issue, see https://github.com/jasp-stats/jasp-test-release/issues/839
if (isTRUE(model[["analysisType"]] != "RM-ANOVA" && options[["dependent"]] != "")) {
title <- gettextf("Analysis of Effects - %s", options[["dependent"]])
} else {
title <- gettext("Analysis of Effects")
}
effectsTable <- createJaspTable(title = title)
effectsTable$position <- 2
effectsTable$dependOn(c(
.BANOVAdataDependencies(),
"effects", "effectsType",
"samplingMethodNumericAccuracy", "samplesNumericAccuracy", "bayesFactorType",
"integrationMethod",
.BANOVAmodelSpaceDependencies(options[["modelPrior"]]),
.BANOVArscaleDependencies(options[["priorSpecificationMode"]])
))
effectsTable$addCitation(.BANOVAcitations[1:2])
inclusion.title <- switch(
options$bayesFactorType,
"LogBF10" = gettext("Log(BF<sub>incl</sub>)"),
"BF01" = gettext("BF<sub>excl</sub>"),
"BF10" = gettext("BF<sub>incl</sub>")
)
effectsTable$addColumnInfo(name = "Effects", title = gettext("Effects"), type = "string")
effectsTable$addColumnInfo(name = "P(incl)", title = gettext("P(incl)"), type = "number")
effectsTable$addColumnInfo(name = "P(excl)", title = gettext("P(excl)"), type = "number")
effectsTable$addColumnInfo(name = "P(incl|data)", title = gettext("P(incl|data)"), type = "number")
effectsTable$addColumnInfo(name = "P(excl|data)", title = gettext("P(excl|data)"), type = "number")
effectsTable$addColumnInfo(name = "BFInclusion", title = inclusion.title, type = "number")
if (options$effectsType == "matchedModels") {
effectsTable$addFootnote(gettextf("Compares models that contain the effect to equivalent models stripped of the effect. Higher-order interactions are excluded. Analysis suggested by Sebastiaan Math%st.", "\u00F4"))
}
if (is.null(model$models)) {
jaspResults[["tableEffects"]] <- effectsTable
return()
}
effects.matrix <- model$effects
no.effects <- ncol(effects.matrix)
effectNames <- colnames(model$effects)
idxNotNan <- c(TRUE, !is.nan(model$postProbs[-1L]))
if (options$effectsType == "allModels") {
if (any(!idxNotNan)) {
# renormalize the prior and posterior probabilities
priorInclProb <- crossprod(effects.matrix[idxNotNan, , drop = FALSE], model[["priorProbs"]][idxNotNan] / sum(model[["priorProbs"]][idxNotNan]))
postInclProb <- crossprod(effects.matrix[idxNotNan, , drop = FALSE], model[["postProbs"]][idxNotNan] / sum(model[["postProbs"]][idxNotNan]))
} else {
# do not renormalize
priorInclProb <- crossprod(effects.matrix, model[["priorProbs"]])
postInclProb <- crossprod(effects.matrix, model[["postProbs"]])
}
# deal with numerical error
postInclProb[postInclProb > 1] <- 1
postInclProb[postInclProb < 0] <- 0
bfIncl <- (postInclProb / (1 - postInclProb)) / (priorInclProb / (1 - priorInclProb))
priorExclProb <- 1 - priorInclProb
postExclProb <- 1 - postInclProb
} else {
tmp <- BANOVAcomputMatchedInclusion(
effectNames, effects.matrix, model$interactions.matrix,
model$priorProbs, model$postProbs
)
priorInclProb <- tmp[["priorInclProb"]]
postInclProb <- tmp[["postInclProb"]]
bfIncl <- tmp[["bfIncl"]]
priorExclProb <- tmp[["priorExclProb"]]
postExclProb <- tmp[["postExclProb"]]
# show BFinclusion for nuisance predictors as 1, rather than NaN
priorInclIs1 <- is.nan(bfIncl) & abs(1 - priorInclProb) <= sqrt(.Machine$double.eps)
bfIncl[priorInclIs1] <- 1
}
if (sum(!idxNotNan) > 1L) { # null model is always omitted, so 2 or more omitted indicates some models failed
effectsTable$addFootnote(message = gettext("Some Bayes factors could not be calculated. Inclusion probabilities and Bayes factors are calculated while excluding these models. The results may be uninterpretable!"),
symbol = .BANOVAGetWarningSymbol()
)
}
effectsTable[["Effects"]] <- jaspBase::gsubInteractionSymbol(effectNames)
effectsTable[["P(incl)"]] <- priorInclProb
effectsTable[["P(incl|data)"]] <- postInclProb
effectsTable[["BFInclusion"]] <- switch(
options$bayesFactorType,
"LogBF10" = log(bfIncl),
"BF01" = 1 / bfIncl,
"BF10" = bfIncl
)
effectsTable[["P(excl)"]] <- priorExclProb
effectsTable[["P(excl|data)"]] <- postExclProb
jaspResults[["tableEffects"]] <- effectsTable
return()
}
BANOVAcomputMatchedInclusion <- function(effectNames, effects.matrix, interactions.matrix,
priorProbs, postProbs) {
# this method is inspired by this post: https://www.cogsci.nl/blog/interpreting-bayesian-repeated-measures-in-jasp
priorInclProb <- postInclProb <- bfIncl <- priorExclProb <- postExclProb <-
numeric(length(effectNames))
for (i in seq_along(effectNames)) {
effect <- effectNames[i]
# get all higher order interactions of which this effect is a component
# e.g., V1 is a component of V1:V2
idx1 <- interactions.matrix[effect, ]
# get all models that exclude the predictor, but that always include the lower order main effects
# e.g., V1:V2 is compared against models that always include V1 and V2
idx2 <- interactions.matrix[, effect] # if effect is V1:V2, idx2 contains c(V1, V2)
# idx3 is FALSE if a model contains higher order interactions of effect, TRUE otherwise
idx3 <- !matrixStats::rowAnys(effects.matrix[, idx1, drop = FALSE])
# all models that include the effect, without higher order interactions
idx4 <- idx3 & effects.matrix[, i]
priorInclProb[i] <- sum(idx4 * priorProbs)
postInclProb[i] <- sum(idx4 * postProbs)
# the models to consider for the prior/ posterior exclusion probability.
# idx5 includes models that have: all subcomponents & no higher order interaction & not the effect
idx5 <- matrixStats::rowAlls(effects.matrix[, idx2, drop = FALSE]) & idx3 & !effects.matrix[, i]
priorExclProb[i] <- sum(idx5 * priorProbs)
postExclProb[i] <- sum(idx5 * postProbs)
# compute inclusion BF
bfIncl[i] <- (postInclProb[i] / postExclProb[i]) / (priorInclProb[i] / priorExclProb[i])
}
return(list(
priorInclProb = priorInclProb,
postInclProb = postInclProb,
priorExclProb = priorExclProb,
postExclProb = postExclProb,
bfIncl = bfIncl
))
}
.BANOVAinitModelComparisonTable <- function(options) {
# function that creates an empty JASP table to be filled later
modelTable <- createJaspTable(title = gettext("Model Comparison"))
modelTable$position <- 1L
modelTable$addCitation(.BANOVAcitations[1:2])
modelTable$dependOn(c(
.BANOVAdataDependencies(),
"samplingMethodNumericAccuracy", "samplesNumericAccuracy", "integrationMethod",
"bayesFactorType", "bayesFactorOrder",
"hideNuisanceParameters", "legacyResults",
"modelsShown",
.BANOVAmodelSpaceDependencies(options[["modelPrior"]]),
.BANOVArscaleDependencies(options[["priorSpecificationMode"]])
))
if (options[["modelsShown"]] == "limited")
modelTable$dependOn("numModelsShown")
switch(options$bayesFactorType,
BF10 = {
bfm.title <- gettext("BF<sub>M</sub>")
bf.title <- gettext("BF<sub>10</sub>")
},
BF01 = {
bfm.title <- gettext("BF<sub>M</sub>")
bf.title <- gettext("BF<sub>01</sub>")
},
LogBF10 = {
bfm.title <- gettext("Log(BF<sub>M</sub>)")
bf.title <- gettext("Log(BF<sub>10</sub>)")
}
)
modelTable$addColumnInfo(name = "Models", title = gettext("Models"), type = "string")
modelTable$addColumnInfo(name = "P(M)", title = gettext("P(M)"), type = "number")
modelTable$addColumnInfo(name = "P(M|data)", title = gettext("P(M|data)"), type = "number")
modelTable$addColumnInfo(name = "BFM", title = bfm.title, type = "number")
modelTable$addColumnInfo(name = "BF10", title = bf.title, type = "number")
modelTable$addColumnInfo(name = "error %", title = gettextf("error %%"), type = "number")
return(modelTable)
}
.BANOVAfinalizeInternalTable <- function(options, internalTable) {
# function that actually fills in the table created by .BANOVAinitModelComparisonTable
footnotes <- NULL
logSumExp <- matrixStats::logSumExp
logbfs <- internalTable[["BF10"]]
logprior <- log(internalTable[["P(M)"]])
if (!anyNA(internalTable[["BF10"]])) {
# no errors, proceed normally and complete the table
logsumbfs <- logSumExp(logbfs + logprior)
internalTable[["P(M|data)"]] <- exp(logbfs + logprior - logsumbfs)
nmodels <- nrow(internalTable)
for (i in seq_len(nmodels)) {
internalTable[i, "BFM"] <- logbfs[i] - logSumExp(logbfs[-i]) + log(nmodels - 1L)
}
} else {
# create table excluding failed models
idxGood <- !is.na(logbfs)
widxGood <- which(idxGood)
logsumbfs <- logSumExp(logbfs[idxGood])
internalTable[["P(M|data)"]] <- exp(logbfs - logsumbfs)
nmodels <- sum(idxGood)
widxBad <- which(!idxGood)
for (i in widxGood) {
internalTable[["BFM"]][i] <- logbfs[i] - logSumExp(logbfs[-c(i, widxBad)]) + log(nmodels - 1L)
}
internalTable[widxBad, "P(M|data)"] <- NaN
internalTable[widxBad, "BFM"] <- NaN
internalTable[widxBad, "BF10"] <- NaN
footnotes <- list(
message = gettext("<b><em>Warning.</em></b> Some Bayes factors could not be calculated. Multi model inference is carried out while excluding these models. The results may be uninterpretable!")
)
}
# create the output table
table <- as.data.frame(internalTable)
if (options[["bayesFactorType"]] == "LogBF10") {
table[["BFM"]] <- internalTable[["BFM"]]
} else {
table[["BFM"]] <- exp(internalTable[["BFM"]])
}
o <- order(table[["BF10"]], decreasing = TRUE)
table <- table[o, ]
idxNull <- which(o == 1L)
if (options[["bayesFactorOrder"]] == "nullModelTop") {
table[idxNull, "error %"] <- NA
table <- table[c(idxNull, seq_len(nrow(table))[-idxNull]), ]
} else {
table[["BF10"]] <- table[["BF10"]] - table[1L, "BF10"]
# recompute error (see BayesFactor:::`.__T__/:base`$`BFBayesFactor#BFBayesFactor`)
table[idxNull, "error %"] <- 0
table[["error %"]] <- sqrt(table[["error %"]]^2 + table[["error %"]][1L]^2)
table[1L, "error %"] <- NA
}
table[["BF10"]] <- .recodeBFtype(table[["BF10"]], newBFtype = options[["bayesFactorType"]], oldBFtype = "LogBF10")
table[["error %"]] <- 100 * table[["error %"]]
if (!is.null(footnotes)) {
idxNan <- which(do.call(cbind, lapply(table[-ncol(table)], is.nan)), arr.ind = TRUE)
footnotes[["rows"]] <- idxNan[, "row"]
footnotes[["cols"]] <- idxNan[, "col"]
}
return(list(table = table, internalTable = internalTable, footnotes = footnotes))
}
# posterior inference ----
.BANOVAestimatePosteriors <- function(jaspResults, dataset, options, model) {
userNeedsPosteriorSamples <- options$posteriorEstimates || options$modelAveragedPosteriorPlot || options$qqPlot ||
options$rsqPlot || options$criTable
if (is.null(model$models) || !userNeedsPosteriorSamples)
return()
# model[["completelyReused"]] is needed because it can happen that some posterior samples can be reused (e.g.,
# when the modelTerms change)
posteriors <- jaspResults[["statePosteriors"]]$object
if (!is.null(posteriors) && isTRUE(model[["completelyReused"]])) { # can all posterior be reused?
posteriorsCRI <- jaspResults[["statePosteriorsCRI"]]$object
if (is.null(posteriorsCRI)) {# do we need to recompute credible intervals?
posteriorsCRI <- .BANOVAcomputePosteriorCRI(dataset, options, model, posteriors)
statePosteriorsCRI <- createJaspState(object = posteriorsCRI)
statePosteriorsCRI$dependOn(options = "credibleInterval",
optionsFromObject = jaspResults[["tableModelComparisonState"]])
jaspResults[["statePosteriorCRI"]] <- statePosteriorsCRI
}
} else { # compute posteriors and credible intervals
posteriors <- .BANOVAsamplePosteriors(jaspResults, dataset, options, model, posteriors)
posteriorsCRI <- .BANOVAcomputePosteriorCRI(dataset, options, model, posteriors)
statePosteriors <- createJaspState(object = posteriors)
statePosteriors$dependOn(
c(.BANOVAmodelSpaceDependencies(options[["modelPrior"]]), "samplingMethodMCMC", "samplesMCMC"),
optionsFromObject = jaspResults[["tableModelComparisonState"]]
)
jaspResults[["statePosteriors"]] <- statePosteriors
statePosteriorsCRI <- createJaspState(object = posteriorsCRI)
statePosteriorsCRI$dependOn(options = "credibleInterval",
optionsFromObject = jaspResults[["statePosteriors"]])
jaspResults[["statePosteriorCRI"]] <- statePosteriorsCRI
}
.BANOVASamplingIssuesTable(jaspResults, posteriors[["samplingIssues"]])
return(c(posteriors, posteriorsCRI))
}
.BANOVAestimatesTable <- function(jaspResults, options, model) {
if (!is.null(jaspResults[["tablePosteriorEstimates"]]) || !options[["posteriorEstimates"]])
return()
estsTable <- createJaspTable(title = gettext("Model Averaged Posterior Summary"))
estsTable$position <- 3
estsTable$dependOn(c(
.BANOVAdataDependencies(),
"posteriorEstimates",
"samplingMethodMCMC", "samplesMCMC", "credibleInterval",
.BANOVAmodelSpaceDependencies(options[["modelPrior"]]),
.BANOVArscaleDependencies(options[["priorSpecificationMode"]])
))
overTitle <- gettextf("%s%% Credible Interval", format(100 * options[["credibleInterval"]], digits = 3))
estsTable$addColumnInfo(name = "Variable", type = "string")
estsTable$addColumnInfo(name = "Level", type = "string")
estsTable$addColumnInfo(name = "Mean", type = "number")
estsTable$addColumnInfo(name = "SD", type = "number")
estsTable$addColumnInfo(name = "Lower", type = "number", overtitle = overTitle)
estsTable$addColumnInfo(name = "Upper", type = "number", overtitle = overTitle)
if (!is.null(model[["posteriors"]])) {
.BANOVAfillEstimatesTable(
jaspTable = estsTable,
mus = model$posteriors$weightedMeans,
sds = model$posteriors$weightedSds,
cri = model$posteriors$weightedCRIs,
hasNoLevels = model$posteriors$allContinuous,
isRandom = model$posteriors$isRandom
)
}
jaspResults[["tablePosteriorEstimates"]] <- estsTable
return()
}
.BANOVAfillEstimatesTable <- function(jaspTable, mus, sds, cri, hasNoLevels, isRandom = NULL) {
if (!is.null(isRandom) && any(isRandom)) {
# remove random effects
mus <- mus[!isRandom]
sds <- sds[!isRandom]
cri <- cri[!isRandom, ]
}
table <- .BANOVAgetLevelsFromParamNames(names(mus))
colnames(table) <- c("Variable", "Level")
# set repeated parameter names to ""
idxDup <- duplicated(table[, "Variable"])
table[idxDup, "Variable"] <- ""
# decode base64 variables
table[!idxDup, "Variable"] <- jaspBase::gsubInteractionSymbol(table[!idxDup, "Variable"])
# rename mu to Intercept
table[1L, "Variable"] <- "Intercept"
# attach posterior means and sds
table <- cbind(as.data.frame(table),
"Mean" = mus,
"SD" = sds,
"Lower" = cri[, 1L],
"Upper" = cri[, 2L])
if (hasNoLevels)
table <- table[, -2L]
jaspTable$setData(table)
return()
}
.BANOVArsquaredTable <- function(jaspResults, options, model) {
if (!is.null(jaspResults[["tableBMACRI"]]) || !options[["criTable"]])
return()
criTable <- createJaspTable(title = gettextf("Model Averaged R%s", "\u00B2"))
criTable$position <- 3.5
criTable$dependOn(c(
.BANOVAdataDependencies(),
"criTable",
"samplingMethodMCMC", "samplesMCMC", "bayesFactorType", "credibleInterval",
.BANOVAmodelSpaceDependencies(options[["modelPrior"]]),
.BANOVArscaleDependencies(options[["priorSpecificationMode"]])
))
overTitle <- gettextf("%s%% Credible Interval", format(100 * options[["credibleInterval"]], digits = 3))
criTable$addColumnInfo(name = "rsq", type = "string", title = "")
criTable$addColumnInfo(name = "Mean", type = "number")
criTable$addColumnInfo(name = "Lower", type = "number", overtitle = overTitle)
criTable$addColumnInfo(name = "Upper", type = "number", overtitle = overTitle)
if (!is.null(model[["posteriors"]])) {
cri <- model[["posteriors"]][["weightedRsqCri"]]
df <- data.frame(
rsq = "R\u00B2",
Mean = model[["posteriors"]][["weightedRsqMean"]],
Lower = cri[1L],
Upper = cri[2L]
)
criTable$setData(df)
} else {
criTable[["rsq"]] <- "R\u00B2"
}
jaspResults[["tableBMACRI"]] <- criTable
return()
}
.BANOVASamplingIssuesTable <- function(jaspResults, samplingIssues) {
if (is.null(samplingIssues) || length(samplingIssues) == 0L || !is.null(jaspResults[["tableSamplingIssues"]]))
return()
issuesTable <- createJaspTable(title = gettext("<b><em>Warning: sampling issues encountered!</em></b>"),
# below model and effects table (which may be unaffected) but above estimates table
position = 2.5)
issuesTable$addColumnInfo(name = "Models", title = gettext("Affected model"), type = "string")
issuesTable$addColumnInfo(name = "percentageSucces", title = gettextf("%% useful samples"), type = "number")
issuesTable$addColumnInfo(name = "noSuccess", title = gettext("Useful samples"), type = "integer")
issuesTable$addColumnInfo(name = "total", title = gettext("Total samples drawn"), type = "integer")
df <- data.frame(
Models = vapply(samplingIssues, `[[`, FUN.VALUE = character(1L), "model"),
noSuccess = vapply(samplingIssues, `[[`, FUN.VALUE = integer(1L), "remainingRows"),
total = vapply(samplingIssues, `[[`, FUN.VALUE = integer(1L), "originalRows")
)
df[["percentageSucces"]] <- df[["noSuccess"]] / df[["total"]]
if (any(df[["percentageSucces"]] < 0.25) || any(df[["remainingRows"]] < 1000L))
issuesTable$addFootnote(
gettextf("For some affected models, more than 75%% of the posterior samples failed, or fewer than 1000 samples remained for subsequent results. All model-averaged output may be biased and uninterpretable. Check the model specification and data for any odd patterns."),
symbol = .BANOVAGetWarningSymbol()
)
issuesTable$setData(df)
issuesTable$dependOn(
# these options correspond to userNeedsPosteriorSamples inside .BANOVAestimatePosteriors
options = c("posteriorEstimates", "modelAveragedPosteriorPlot", "qqPlot", "rsqPlot", "criTable", "modelTerms"),
optionsFromObject = jaspResults[["statePosteriors"]]
)
jaspResults[["tableSamplingIssues"]] <- issuesTable
}
.BANOVAGetWarningSymbol <- function() {
gettext("<b><em>Warning.</em></b>")
}
# Plots wrappers ----
.BANOVAposteriorPlot <- function(jaspResults, dataset, options, model) {
# meta wrapper for model averaged posterior plots
if (!is.null(jaspResults[["posteriorPlot"]]) || !options$modelAveragedPosteriorPlot)
return()
posteriorPlotContainer <- createJaspContainer(title = gettext("Model Averaged Posterior Distributions"))
jaspResults[["posteriorPlot"]] <- posteriorPlotContainer
posteriorPlotContainer$dependOn(c("modelAveragedPosteriorPlot", "modelTerms", "credibleInterval", "groupPosterior",
"repeatedMeasuresCells", "dependent"))
posteriorPlotContainer$position <- 4
if (is.null(model$models)) {
posteriorPlotContainer[["dummyplot"]] <- createJaspPlot(title = gettext("Posterior distribution"), width = 400, height = 400,
plot = NULL)
} else {
posteriorPlotContainer$dependOn(optionsFromObject = jaspResults[["tableModelComparisonState"]])
.BANOVAfillPosteriorPlotContainer(
container = posteriorPlotContainer,
densities = model$posterior$weightedDensities[, -1L, ], # omit intercept
cris = model$posterior$weightedCRIs[-1L, ], # omit intercept
isRandom = model$posteriors$isRandom[-1L], # omit intercept
groupParameters = identical(options[["groupPosterior"]], "grouped")
)
}
return()
}
.BANOVAfillPosteriorPlotContainer <- function(container, densities, cris, isRandom = NULL, groupParameters = FALSE) {
allParamNames <- colnames(densities)
tmp <- .BANOVAgetLevelsFromParamNames(allParamNames)
plotTitles <- jaspBase::gsubInteractionSymbol(tmp[, "parameter"])
xNames <- tmp[, "level"]
if (is.null(isRandom)) {
indices <- seq_along(allParamNames)
} else {
indices <- which(!isRandom)
}
if (groupParameters) {
indices <- split(indices, plotTitles[indices])[order(unique(plotTitles[indices]))]
nms <- names(indices)
for (i in seq_along(indices)) {
ind <- indices[[i]]
if (all(c(densities[, ind, "y"]) == 0)) { # only TRUE is never sampled and set to 0
plot <- createJaspPlot(title = nms[i], width = 400, height = 400)
plot$setError(gettext("Could not plot posterior distribution. Check if an error occurred for models including this parameter."))
} else {
# make prior posterior plot
dfLines <- data.frame(x = c(densities[, ind, "x"]),
y = c(densities[, ind, "y"]),
g = rep(xNames[ind], each = nrow(densities)))
xBreaks <- jaspGraphs::getPrettyAxisBreaks(dfLines$x)
yBreaks <- jaspGraphs::getPrettyAxisBreaks(c(0, dfLines$y))
breaksYmax <- yBreaks[length(yBreaks)]
obsYmax <- max(dfLines$y)
newymax <- max(1.25 * obsYmax, breaksYmax)
lInd <- length(ind)
if (lInd == 1L) {
plusmin <- 0
showLegend <- FALSE
} else {
# some jitter in CRI height
showLegend <- TRUE
plusmin <- rep(-1, lInd)
plusmin[seq(2, lInd, 2)] <- 0
if (lInd > 2)
plusmin[seq(3, lInd, 3)] <- 1
}
dfCri <- data.frame(
xmin = cris[ind, 1L],
xmax = cris[ind, 2L],
y = (newymax - obsYmax)/2 + obsYmax + plusmin * (newymax - obsYmax) / 10,
g = xNames[ind]
)
if (showLegend) {
# if two distributions are remarkably similar, i.e., have nearly identical credible intervals,
# we add a linetype aestethic
tol <- 1e-4
distMin <- which(stats::dist(dfCri$xmin) < tol)
distMax <- which(stats::dist(dfCri$xmin) < tol)
distBoth <- intersect(distMin, distMax)
if (length(distBoth) > 0) {
aesLine <- ggplot2::aes(x = x, y = y, g = g, color = g, linetype = g)
aesErrorbar <- ggplot2::aes(xmin = xmin, xmax = xmax, y = y, group = g, color = g, linetype = g)
} else {
aesLine <- ggplot2::aes(x = x, y = y, g = g, color = g)
aesErrorbar <- ggplot2::aes(xmin = xmin, xmax = xmax, y = y, group = g, color = g)
}
} else {
# don't use color for single density plots (covariates)
aesLine <- ggplot2::aes(x = x, y = y, g = g)
aesErrorbar <- ggplot2::aes(xmin = xmin, xmax = xmax, y = y, group = g)
}
maxheight <- min(newymax - dfCri$y[1:min(lInd, 3)])
xlab <- nms[i]
# ggplot doesn't like our fancy unicode * so we need to escape it
xlab <- stringi::stri_escape_unicode(xlab)
# change escaped version into x
xlab <- gsub(pattern = "\\u2009\\u273b\\u2009", replacement = " x ", x = xlab, fixed = TRUE)
ncolLegend <- ceiling(lInd / 14)
guideLegend <- ggplot2::guide_legend(title = gettext("Level"), keywidth = 0.25, keyheight = 0.1, default.unit = "inch",
ncol = ncolLegend)
p <- ggplot2::ggplot(data = dfLines, mapping = aesLine) +
ggplot2::geom_line(size = 1.1) +
ggplot2::geom_errorbarh(data = dfCri, mapping = aesErrorbar, height = maxheight, size = 1.1,
inherit.aes = FALSE) +
ggplot2::scale_x_continuous(name = xlab, breaks = xBreaks, limits = range(xBreaks)) +
ggplot2::scale_y_continuous(name = gettext("Density"), breaks = yBreaks, limits = c(0, newymax)) +
colorspace::scale_color_discrete_qualitative() +
ggplot2::scale_linetype() +
ggplot2::guides(color = guideLegend, linetype = guideLegend)
p <- jaspGraphs::themeJasp(p, legend.position = if (showLegend) "right" else "none")
plot <- createJaspPlot(title = nms[i], width = 400, height = 400, plot = p)
}
container[[allParamNames[i]]] <- plot
}
} else {
for (i in indices) {
# make prior posterior plot
df <- data.frame(x = densities[, i, "x"],
y = densities[, i, "y"])
p <- jaspGraphs::PlotPriorAndPosterior(
dfLines = df,
xName = xNames[i],
CRI = cris[i, ],
drawCRItxt = FALSE
)
plot <- createJaspPlot(title = plotTitles[i], width = 400, height = 400, plot = p)
container[[allParamNames[i]]] <- plot
}
}
return()
}
.BANOVAqqplot <- function(jaspResults, options, model) {
if (!is.null(jaspResults[["QQplot"]]) || !options[["qqPlot"]])
return()
plot <- createJaspPlot(
title = gettext("Model Averaged Q-Q Plot"),
width = 400,
height = 400,
aspectRatio = 1
)
if (!is.null(model[["models"]])) {
plot$plotObject <- jaspGraphs::plotQQnorm(
residuals = model[["posteriors"]][["weightedResidSumStats"]][,"mean"],
lower = model[["posteriors"]][["weightedResidSumStats"]][,"cri.2.5%"],
upper = model[["posteriors"]][["weightedResidSumStats"]][,"cri.97.5%"],
ablineColor = "darkred"
)
plot$dependOn(optionsFromObject = jaspResults[["tableModelComparisonState"]])
}
plot$dependOn(c("qqPlot", "modelTerms", "credibleInterval", "repeatedMeasuresCells", "dependent"))
plot$position <- 5
jaspResults[["QQplot"]] <- plot
return()
}
.BANOVArsqplot <- function(jaspResults, options, model) {
if (!is.null(jaspResults[["rsqplot"]]) || !options[["rsqPlot"]])
return()
plot <- createJaspPlot(
title = gettextf("Model Averaged Posterior R%s","\u00B2"),
width = 400,
height = 400,
aspectRatio = 1
)
if (!is.null(model[["models"]])) {
dd <- model[["posteriors"]][["weightedRsqDens"]]
rsqCri <- model[["posteriors"]][["weightedRsqCri"]]
df <- data.frame(x = dd$x, y = dd$y)
xName <- expression(R^2)
plot$plotObject <- jaspGraphs::PlotPriorAndPosterior(dfLines = df, xName = xName, CRI = rsqCri, drawCRItxt = FALSE)
plot$dependOn(optionsFromObject = jaspResults[["tableModelComparisonState"]])
}
plot$dependOn(c("rsqPlot", "modelTerms", "credibleInterval", "repeatedMeasuresCells", "dependent"))
plot$position <- 6
jaspResults[["rsqplot"]] <- plot
return()
}
# Post hoc comparison ----
.BANOVAnullControlPostHocTable <- function(jaspResults, dataset, options, model) {
if (length(options$postHocTerms) == 0L)
return()
postHocCollection <- jaspResults[["collectionPosthoc"]]
if (is.null(postHocCollection)) {
postHocCollection <- createJaspContainer(title = gettext("Post Hoc Tests"))
postHocCollection$position <- 8
postHocCollection$addCitation(.BANOVAcitations[3:4])
postHocCollection$dependOn(c("dependent", "repeatedMeasuresCells", "postHocNullControl", "bayesFactorType"))
jaspResults[["collectionPosthoc"]] <- postHocCollection
}
# the same footnote for all the tables
footnote <- gsub("[\r\n\t]", "",
gettext("The posterior odds have been corrected for multiple testing by fixing to 0.5 the prior probability that the null hypothesis holds across all comparisons (Westfall, Johnson, & Utts, 1997). Individual comparisons are based on the default t-test with a Cauchy (0, r = 1/sqrt(2)) prior. The \"U\" in the Bayes factor denotes that it is uncorrected."))
bfTxt <- if (options[["postHocNullControl"]]) ", U" else ""
switch(options[["bayesFactorType"]],
BF10 = { bf.title <- paste0("BF<sub>10", bfTxt, "</sub>") },
BF01 = { bf.title <- paste0("BF<sub>01", bfTxt, "</sub>") },
LogBF10 = { bf.title <- paste0("Log(BF<sub>10", bfTxt, "</sub>)") }
)
priorWidth <- 1 / sqrt(2)
posthoc.variables <- unlist(options[["postHocTerms"]])
if (model[["analysisType"]] == "RM-ANOVA") {
dependent <- .BANOVAdependentName
} else {
dependent <- options[["dependent"]]
}
for (posthoc.var in posthoc.variables) {
# does the table already exist?
if (!is.null(postHocCollection[[paste0("postHoc_", posthoc.var)]]))
next
postHocTable <- createJaspTable(title = paste0("Post Hoc Comparisons - ", posthoc.var))
postHocTable$addColumnInfo(name = "(I)", type = "string", title = "", combine=TRUE)
postHocTable$addColumnInfo(name = "(J)", type = "string", title = "")
postHocTable$addColumnInfo(name = "Prior Odds", type = "number", title = gettext("Prior Odds"))
postHocTable$addColumnInfo(name = "Posterior Odds", type = "number", title = gettext("Posterior Odds"))
postHocTable$addColumnInfo(name = "BF", type = "number", title = bf.title)
postHocTable$addColumnInfo(name = "error %", type = "number", title = gettextf("error %%"))
postHocTable$dependOn(optionContainsValue = list("postHocTerms" = posthoc.var))
if (options[["postHocNullControl"]])
postHocTable$addFootnote(footnote)
if (is.null(model$models)) { # only show empty table
postHocCollection[[paste0("postHoc_", posthoc.var)]] <- postHocTable
next
}
fixed <- unlist(c(options$fixedFactors, sapply(options$repeatedMeasuresFactors, `[[`, "name")))
if (model$analysisType == "RM-ANOVA" && posthoc.var %in% fixed && !posthoc.var %in% options$betweenSubjectFactors) {
variable.levels <- options$repeatedMeasuresFactors[[which(lapply(options$repeatedMeasuresFactors, function(x) x$name) == posthoc.var)]]$levels
paired <- TRUE
} else if (posthoc.var %in% c(options$fixedFactors, options$betweenSubjectFactors, options$randomFactors)) {
variable.levels <- levels(dataset[[posthoc.var]])
paired <- FALSE
} else {
next
}
if (length(variable.levels) < 2L)
next
pairs <- utils::combn(variable.levels, 2)
splitname <- if (model[["analysisType"]] == "RM-ANOVA") .BANOVAdependentName else dependent
allSplits <- split(dataset[[splitname]], dataset[[posthoc.var]])
errMessages <- NULL
for (i in 1:ncol(pairs)) {
if (!is.null(model$models)) {
x <- na.omit(allSplits[[pairs[1L, i]]])
y <- na.omit(allSplits[[pairs[2L, i]]])
ttest <- try(BayesFactor::ttestBF(x = x, y = y, rscale = priorWidth, paired = paired), silent=TRUE)
if (isTryError(ttest)) {
priorOdds <- postOdds <- logBF <- error <- "NaN"
errorMsg <- .extractErrorMessage(ttest)
if (errorMsg %in% names(errMessages)) {
errMessages[[errorMsg]][["row_names"]] <- c(errMessages[[errorMsg]][["row_names"]], i)
} else {
errMessages[[errorMsg]] <- list(
message = if (endsWith(errorMsg, ".")) errorMsg else paste0(errorMsg, "."),
row_names = i
)
}
} else {
if (options[["postHocNullControl"]]) {
pH0 <- 0.5^(2 / length(variable.levels))
} else {
pH0 <- 0.5
}
logBF <- ttest@bayesFactor$bf # <- log(BF10)
if (options[["bayesFactorType"]] == "BF01") {
priorOdds <- pH0 / (1 - pH0)
logBF <- -logBF
} else {
priorOdds <- (1 - pH0) / pH0
}
postOdds <- log(priorOdds) + logBF
postOdds <- exp(postOdds)
if (options[["bayesFactorType"]] != "LogBF10")
logBF <- exp(logBF)
error <- ttest@bayesFactor$error * 100
}
}
row <- list(
"(I)" = pairs[1L, i],
"(J)" = pairs[2L, i],
"Prior Odds" = priorOdds,
"Posterior Odds" = postOdds,
"BF" = logBF,
"error %" = error
)
postHocTable$addRows(row, rowNames = paste0("row", i))
}
if (!is.null(errMessages)) {
for (i in seq_along(errMessages))
postHocTable$addFootnote(message = errMessages[[i]][["message"]],
rowNames = paste0("row", errMessages[[i]][["row_names"]]))
}
postHocCollection[[paste0("postHoc_", posthoc.var)]] <- postHocTable
}
return()
}
# Data reading ----
.BANOVAreadData <- function(dataset, options, analysisType) {
if (is.null(dataset)) {
if (analysisType == "RM-ANOVA")
return(.BANOVAreadRManovaData(dataset, options))
numeric.vars <- c(unlist(options$covariates), unlist(options$dependent))
numeric.vars <- numeric.vars[numeric.vars != ""]
factor.vars <- c(unlist(options$fixedFactors), unlist(options$randomFactors))
factor.vars <- factor.vars[factor.vars != ""]
dataset <- .readDataSetToEnd(
columns.as.numeric = numeric.vars,
columns.as.factor = factor.vars,
exclude.na.listwise = c(numeric.vars, factor.vars)
)
}
return(dataset)
}
.BANOVAdependentName <- "JaspColumn_.dependent._Encoded"
.BANOVAsubjectName <- "JaspColumn_.subject._Encoded"
.BANOVAdecodeSubject <- function(string) {
return(gsub(.BANOVAsubjectName, "subject", string))
}
.BANOVAdecodeNuisance <- function(nuisance) {
# .BANOVAsubjectName needs to be handled separately
idx <- nuisance == .BANOVAsubjectName
nuisance[idx] <- "subject"
nuisance[!idx] <- jaspBase::gsubInteractionSymbol(nuisance[!idx])
return(nuisance)
}
.BANOVAreadRManovaData <- function(dataset, options) {
if (!("" %in% options$repeatedMeasuresCells)) {
rm.vars <- options$repeatedMeasuresCells
bs.factors <- options$betweenSubjectFactors
bs.covariates <- options$covariates
rm.factors <- options$repeatedMeasuresFactors
all.variables <- c (bs.factors, bs.covariates, rm.vars)
dataset <- .readDataSetToEnd(
columns.as.numeric = c(rm.vars, bs.covariates),
columns.as.factor = bs.factors,
exclude.na.listwise = all.variables
)
dataset <- try(
.shortToLong(dataset, rm.factors, rm.vars, c(bs.factors, bs.covariates), dependentName = .BANOVAdependentName, subjectName = .BANOVAsubjectName),
silent = TRUE
)
}
return(dataset)
}
# Descriptives ----
.BANOVAdescriptives <- function(jaspResults, dataset, options, errors, analysisType, ready = TRUE, position = 9001) {
if (!ready)
return()
# the main use of this function is that descriptives can now be reused for the frequentist ANOVAs
# without the container, the position could mess things up
descriptivesContainer <- jaspResults[["descriptivesContainer"]]
if (is.null(descriptivesContainer)) {
descriptivesContainer <- createJaspContainer(title = gettext("Descriptives"))
descriptivesContainer$dependOn(c("dependent", "repeatedMeasuresCells"))
descriptivesContainer$position <- position
jaspResults[["descriptivesContainer"]] <- descriptivesContainer
}
.BANOVAdescriptivesTable (descriptivesContainer, dataset, options, errors, analysisType)
.BANOVAdescriptivesPlots (descriptivesContainer, dataset, options, errors, analysisType)
.BANOVAbarPlots (descriptivesContainer, dataset, options, errors, analysisType)
.BANOVArainCloudPlots (descriptivesContainer, dataset, options, errors, analysisType)
return()
}
.BANOVAdescriptivesTable <- function(jaspContainer, dataset, options, errors, analysisType) {
if (!options[["descriptives"]] || !is.null(jaspContainer[["tableDescriptives"]]))
return()
if (analysisType == "RM-ANOVA") {
dependent <- .BANOVAdependentName
fixed <- unlist(c(lapply(options[["repeatedMeasuresFactors"]], `[[`, "name"), options[["betweenSubjectFactors"]]))
title <- gettext("Descriptives")
} else {
dependent <- options[["dependent"]]
fixed <- options[["fixedFactors"]]
if (!is.null(dependent) && dependent != "") {
title <- gettextf("Descriptives - %s", options$dependent)
} else {
title <- gettext("Descriptives")
}
}
descriptivesTable <- createJaspTable(title = title)
descriptivesTable$position <- 1
descriptivesTable$dependOn(c("dependent", "fixedFactors", "betweenSubjectFactors", "descriptives",
"credibleInterval"))
# internal names use base64 so they don't have " " which R changes into "." because R does that to dataframe names.
# Also adds a . in case the base64 is magically "Mean", "SD" or "N"
fixedDot <- paste0(fixed, ".")
for (i in seq_along(fixed))
descriptivesTable$addColumnInfo(name = fixedDot[i], type = "string", title = fixed[i], combine = TRUE)
descriptivesTable$addColumnInfo(name = "N", title = gettext("N"), type = "integer")
descriptivesTable$addColumnInfo(name = "Mean", title = gettext("Mean"), type = "number")
descriptivesTable$addColumnInfo(name = "SD", title = gettext("SD"), type = "number")
descriptivesTable$addColumnInfo(name = "SE", title = gettext("SE"), type = "number")
descriptivesTable$addColumnInfo(name = "coefOfVariation", title = gettext("Coefficient of variation"), type = "number")
if (!is.null(options[["credibleInterval"]])) {
overTitle <- gettextf("%s%% Credible Interval", format(100 * options[["credibleInterval"]], digits = 3))
descriptivesTable$addColumnInfo(name = gettext("Lower"), type = "number", overtitle = overTitle)
descriptivesTable$addColumnInfo(name = gettext("Upper"), type = "number", overtitle = overTitle)
}
descriptivesTable$showSpecifiedColumnsOnly <- TRUE
jaspContainer[["tableDescriptives"]] <- descriptivesTable
if (errors$noVariables)
return()
# order the data to show
dataset2 <- dataset[do.call(order, dataset[, fixed, drop = FALSE]), c(dependent, fixed)]
# by pasting the fixedFactors together we obtain the unique indices to group on. This excludes
# non-existent combinations. A "." is added to deal with the level "".
ind <- apply(dataset2[, fixed, drop = FALSE], 1L, paste0, ".", collapse = "")
# temporary function to calculate all descriptives
tmpFun <- function(data, fixed, dependent, cri) {
row <- list()
for (j in fixed)
row[[paste0(j, ".")]] <- as.character(data[1L, j])
N <- nrow(data)
row[["N"]] <- N
if (N == 0L) {
row[["Mean"]] <- row[["SD"]] <- row[["SE"]] <- row[["coefOfVariation"]] <- NA
} else if (N == 1L) {
row[["Mean"]] <- data[[dependent]]
row[["SD"]] <- row[["SE"]] <- row[["coefOfVariation"]] <- row[["Lower"]] <- row[["Upper"]] <- NA
} else {
row[["Mean"]] <- mean(data[[dependent]])
row[["SD"]] <- stats::sd(data[[dependent]])
row[["SE"]] <- stats::sd(data[[dependent]]) / sqrt(N)
row[["coefOfVariation"]] <- stats::sd(data[[dependent]]) / mean(data[[dependent]])
tmp <- jaspTTests::.posteriorSummaryGroupMean(data[[dependent]], cri)
row[["Lower"]] <- tmp[["ciLower"]]
row[["Upper"]] <- tmp[["ciUpper"]]
}
return(row)
}
# apply tempFun on each subset defined by ind
rows <- by(dataset2, ind, tmpFun, fixed = fixed,
dependent = dependent, cri = options[["credibleInterval"]])
# do.call(rbind, rows) turns rows into a data.frame (from a list) for jaspResults
data <- do.call(rbind.data.frame, rows)
# the results of `by` are in alphabetical order, so we reorder the data.frame to match the factor level order. Fixes https://github.com/jasp-stats/jasp-issues/issues/1741
newOrder <- match(unique(ind), rownames(data))
data <- data[newOrder, ]
# add footnote if there are unobserved combinations
nObserved <- nrow(data)
nPossible <- prod(sapply(dataset2[, fixed, drop = FALSE], nlevels))
if (nObserved != nPossible) {
descriptivesTable$addFootnote(
message = gettextf(
"Some combinations of factors are not observed and hence omitted (%1$g out of %2$g combinations are unobserved).",
nPossible - nObserved, nPossible
)
)
}
descriptivesTable$setData(data)
return()
}
.BANOVAdescriptivesPlots <- function(jaspContainer, dataset, options, errors, analysisType) {
if (length(options[["descriptivePlotHorizontalAxis"]]) == 0L
|| options[["descriptivePlotHorizontalAxis"]] == ""
|| !is.null(jaspContainer[["containerDescriptivesPlots"]]))
return()
descriptivesPlotContainer <- createJaspContainer(title = gettext("Descriptives plots"))
descriptivesPlotContainer$position <- 2
jaspContainer[["containerDescriptivesPlots"]] <- descriptivesPlotContainer
# either Bayesian or Frequentist anova
if (is.null(options$descriptivePlotErrorBarType)) { # TRUE implies Bayesian
plotErrorBars <- options$descriptivePlotCi
errorBarType <- "ci"
conf.interval <- options$descriptivePlotCiLevel
moreyCorrection <- TRUE
descriptivesPlotContainer$dependOn(c("dependent", "descriptivePlotCi", "descriptivePlotCiLevel"))
} else {
plotErrorBars <- options$descriptivePlotErrorBar
errorBarType <- options$descriptivePlotErrorBarType
conf.interval <- options$descriptivePlotCiLevel
moreyCorrection <- if (is.null(options[["applyMoreyCorrectionErrorBars"]])) FALSE else options[["applyMoreyCorrectionErrorBars"]]
descriptivesPlotContainer$dependOn(c("dependent", "descriptivePlotErrorBar", "descriptivePlotErrorBarType", "descriptivePlotCiLevel",
"descriptivePlotErrorBarPooled", "applyMoreyCorrectionErrorBars"))
}
usePooledSE <- if (is.null(options[["descriptivePlotErrorBarPooled"]])) FALSE else options[["descriptivePlotErrorBarPooled"]]
descriptivesPlotContainer$dependOn(c("descriptivePlotHorizontalAxis", "descriptivePlotSeparateLines", "descriptivePlotSeparatePlot", "descriptivePlotYAxisLabel"))
if (errors$noVariables) {
descriptivesPlotContainer[["dummyplot"]] <- createJaspPlot(title = gettext("Descriptives Plot"))
return()
}
groupVars <- c(options$descriptivePlotHorizontalAxis, options$descriptivePlotSeparateLines, options$descriptivePlotSeparatePlot)
groupVars <- groupVars[groupVars != ""]
if (analysisType == "RM-ANOVA") {
dependent <- .BANOVAdependentName
yLabel <- options[["descriptivePlotYAxisLabel"]]
} else {
dependent<- options$dependent
yLabel <- options[["dependent"]]
}
betweenSubjectFactors <- groupVars[groupVars %in% options$betweenSubjectFactors]
repeatedMeasuresFactors <- groupVars[groupVars %in% sapply(options$repeatedMeasuresFactors, `[[`, "name")]
if (length(repeatedMeasuresFactors) == 0) {
summaryStat <- jaspTTests::.summarySE(as.data.frame(dataset), measurevar = dependent, groupvars = groupVars,
conf.interval = conf.interval, na.rm = TRUE, .drop = FALSE,
errorBarType = errorBarType, dependentName = .BANOVAdependentName,
subjectName = .BANOVAsubjectName)
} else {
summaryStat <- jaspTTests::.summarySEwithin(as.data.frame(dataset), measurevar= .BANOVAdependentName,
betweenvars = betweenSubjectFactors,
withinvars = repeatedMeasuresFactors,
idvar = .BANOVAsubjectName,
conf.interval = conf.interval,
na.rm=TRUE, .drop = FALSE, errorBarType = errorBarType,
usePooledSE = usePooledSE,
useMoreyCorrection = moreyCorrection,
dependentName = .BANOVAdependentName,
subjectName = .BANOVAsubjectName)
}
if (options[["descriptivePlotHorizontalAxis"]] %in% options[["covariates"]]) {
splitScatterOptions <- options
splitScatterOptions[["colorPalette"]] <- "colorblind3"
splitScatterOptions[["scatterPlotLegend"]] <- TRUE
splitScatterOptions[["scatterPlotRegressionLine"]] <- TRUE
splitScatterOptions[["scatterPlotRegressionLineCi"]] <- plotErrorBars
splitScatterOptions[["scatterPlotRegressionLineType"]] <- "linear"
splitScatterOptions[["scatterPlotGraphTypeAbove"]] <- "none"
splitScatterOptions[["scatterPlotGraphTypeRight"]] <- "none"
splitScatterOptions[["scatterPlotRegressionLineCiLevel"]] <- options[["descriptivePlotCiLevel"]]
if (options$descriptivePlotSeparatePlot != "") {
for (thisLevel in levels(dataset[[options[["descriptivePlotSeparatePlot"]]]])) {
subData <- dataset[dataset[[options[["descriptivePlotSeparatePlot"]]]] == thisLevel, ]
thisPlotName <- paste0(options[["descriptivePlotHorizontalAxis"]], " - ", options[["dependent"]], ": ",
options[["descriptivePlotSeparatePlot"]], " = ", thisLevel)
jaspDescriptives::.descriptivesScatterPlots(descriptivesPlotContainer, subData, c(options[["descriptivePlotHorizontalAxis"]], options[["dependent"]]),
split = options[["descriptivePlotSeparateLines"]], options = splitScatterOptions, name = thisPlotName,
dependOnVariables = FALSE)
}
} else {
jaspDescriptives::.descriptivesScatterPlots(descriptivesPlotContainer, dataset, c(options[["descriptivePlotHorizontalAxis"]], options[["dependent"]]),
split = options[["descriptivePlotSeparateLines"]], options = splitScatterOptions, dependOnVariables = FALSE)
}
return()
}
colnames(summaryStat)[colnames(summaryStat) == dependent] <- "dependent"
if (options$descriptivePlotHorizontalAxis != "") {
colnames(summaryStat)[colnames(summaryStat) == options$descriptivePlotHorizontalAxis] <- "descriptivePlotHorizontalAxis"
}
if (options$descriptivePlotSeparateLines != "") {
colnames(summaryStat)[colnames(summaryStat) == options$descriptivePlotSeparateLines] <- "descriptivePlotSeparateLines"
}
if (options$descriptivePlotSeparatePlot != "") {
colnames(summaryStat)[colnames(summaryStat) == options$descriptivePlotSeparatePlot] <- "descriptivePlotSeparatePlot"
}
base_breaks_x <- function(x){
b <- unique(as.numeric(x))
d <- data.frame(y=-Inf, yend=-Inf, x=min(b), xend=max(b))
list(ggplot2::geom_segment(data=d, ggplot2::aes(x=x, y=y, xend=xend, yend=yend), inherit.aes=FALSE, size = 1))
}
base_breaks_y <- function(x, plotErrorBars){
if (plotErrorBars) {
ci.pos <- c(x[,"dependent"], x[,"dependent"]-x[,"ci"],x[,"dependent"]+x[,"ci"])
b <- pretty(ci.pos)
d <- data.frame(x=-Inf, xend=-Inf, y=min(b), yend=max(b))
list(ggplot2::geom_segment(data=d, ggplot2::aes(x=x, y=y, xend=xend, yend=yend), inherit.aes=FALSE, size = 1),
ggplot2::scale_y_continuous(breaks=c(min(b),max(b))))
} else {
b <- pretty(x[,"dependent"])
d <- data.frame(x=-Inf, xend=-Inf, y=min(b), yend=max(b))
list(ggplot2::geom_segment(data=d, ggplot2::aes(x=x, y=y, xend=xend, yend=yend), inherit.aes=FALSE, size = 1),
ggplot2::scale_y_continuous(breaks=c(min(b),max(b))))
}
}
if (options$descriptivePlotSeparatePlot != "") {
subsetPlots <- levels(summaryStat[,"descriptivePlotSeparatePlot"])
nPlots <- length(subsetPlots)
} else {
nPlots <- 1
}
for (i in seq_len(nPlots)) {
if (nPlots > 1L) {
title <- paste(options$descriptivePlotSeparatePlot,": ",subsetPlots[i], sep = "")
} else {
title <- ""
}
descriptivesPlot <- createJaspPlot(title = title)
descriptivesPlotContainer[[title]] <- descriptivesPlot
descriptivesPlot$height <- 300
if (options$descriptivePlotSeparateLines != "") {
descriptivesPlot$width <- 430
} else {
descriptivesPlot$width <- 300
}
if (options$descriptivePlotSeparatePlot != "") {
summaryStatSubset <- subset(summaryStat,summaryStat[,"descriptivePlotSeparatePlot"] == subsetPlots[i])
} else {
summaryStatSubset <- summaryStat
}
if (options$descriptivePlotSeparateLines == "") {
p <- ggplot2::ggplot(summaryStatSubset, ggplot2::aes(x=descriptivePlotHorizontalAxis,
y=dependent,
group=1))
} else {
p <- ggplot2::ggplot(summaryStatSubset, ggplot2::aes(x=descriptivePlotHorizontalAxis,
y=dependent,
group=descriptivePlotSeparateLines,
shape=descriptivePlotSeparateLines,
fill=descriptivePlotSeparateLines))
}
if (plotErrorBars && !(options[["descriptivePlotHorizontalAxis"]] %in% options[["covariates"]])) {
pd <- ggplot2::position_dodge(.2)
p = p + ggplot2::geom_errorbar(ggplot2::aes(ymin=ciLower,
ymax=ciUpper),
colour="black", width=.2, position=pd)
} else {
pd <- ggplot2::position_dodge(0)
}
guideLegend <- ggplot2::guide_legend(nrow = min(10, nlevels(summaryStatSubset$descriptivePlotSeparateLines)),
title = options$descriptivePlotSeparateLines, keywidth = 0.1, keyheight = 0.3,
default.unit = "inch")
if (options[["descriptivePlotHorizontalAxis"]] %in% options[["covariates"]]) {
line <- ggplot2::geom_smooth(method = "lm", size = .7, color = "black", se = FALSE)
addHorizontalVar <- summaryStatSubset[,"descriptivePlotHorizontalAxis"]
} else {
line <- ggplot2::geom_line(position=pd, size = .7)
}
if (plotErrorBars) {
ci.pos <- c(summaryStatSubset[,"dependent"],
summaryStatSubset[,"dependent"]-summaryStatSubset[,"ci"],
summaryStatSubset[,"dependent"]+summaryStatSubset[,"ci"],
min(summaryStatSubset[,"dependent"])*1.1,
max(summaryStatSubset[,"dependent"])*1.1)
yBreaks <- jaspGraphs::getPrettyAxisBreaks(ci.pos)
} else {
yBreaks <- jaspGraphs::getPrettyAxisBreaks(c(summaryStatSubset[,"dependent"],
min(summaryStatSubset[,"dependent"])*1.1,
max(summaryStatSubset[,"dependent"])*1.1))
}
if (options[["descriptivePlotHorizontalAxis"]] %in% options[["covariates"]]) {
ggXaxis <- ggplot2::scale_x_continuous(breaks = jaspGraphs::getPrettyAxisBreaks(summaryStatSubset[,"descriptivePlotHorizontalAxis"]))
} else {
ggXaxis <- ggplot2::scale_x_discrete(breaks = jaspGraphs::getPrettyAxisBreaks(summaryStatSubset[,"descriptivePlotHorizontalAxis"]))
}
p <- p + line +
ggplot2::geom_point(position=pd, size=4) +
ggplot2::scale_fill_manual(values = c(rep(c("white","black"),5),rep("grey",100)), guide=guideLegend) +
ggplot2::scale_shape_manual(values = c(rep(c(21:25),each=2),21:25,7:14,33:112), guide=guideLegend) +
ggplot2::scale_color_manual(values = rep("black",200),guide=guideLegend) +
ggplot2::labs(y = yLabel, x = options[["descriptivePlotHorizontalAxis"]]) +
ggplot2::scale_y_continuous(breaks = yBreaks, limits = range(yBreaks)) +
ggXaxis +
jaspGraphs::geom_rangeframe() +
jaspGraphs::themeJaspRaw(legend.position = "right")
descriptivesPlot$plotObject <- p
}
return()
}
.BANOVAbarPlots <- function(jaspContainer, dataset, options, errors, analysisType) {
if (length(options[["barPlotHorizontalAxis"]]) == 0L
|| options[["barPlotHorizontalAxis"]] == ""
|| !is.null(jaspContainer[["containerBarPlots"]]))
return()
barPlotContainer <- createJaspContainer(title = gettext("Bar plots"))
barPlotContainer$position <- 3
jaspContainer[["containerBarPlots"]] <- barPlotContainer
plotErrorBars <- options[["barPlotErrorBars"]]
errorBarType <- options[["barPlotErrorBarType"]]
confInterval <- options[["barPlotCiInterval"]]
barPlotContainer$dependOn(c("dependent", "barPlotErrorBars", "barPlotErrorBarType", "applyMoreyCorrectionErrorBarsBarplot",
"barPlotHorizontalZeroFix", "barPlotCiInterval", "usePooledStandErrorCITwo"))
usePooledSE <- if (is.null(options[["usePooledStandErrorCITwo"]])) FALSE else options[["usePooledStandErrorCITwo"]]
useMoreyCorrection <- if (is.null(options[["applyMoreyCorrectionErrorBarsBarplot"]])) TRUE else options[["applyMoreyCorrectionErrorBarsBarplot"]]
barPlotHorizontalZeroFix <- options[["barPlotHorizontalZeroFix"]]
barPlotContainer$dependOn(c("barPlotHorizontalAxis", "barPlotSeparatePlots", "labelYAxisTwo"))
if (errors$noVariables) {
barPlotContainer[["dummyplot"]] <- createJaspPlot(title = gettext("Bar Plot"))
return()
}
groupVars <- c(options[["barPlotHorizontalAxis"]], options[["barPlotSeparatePlots"]])
groupVars <- groupVars[groupVars != ""]
if (analysisType == "RM-ANOVA") {
dependent <- .BANOVAdependentName
yLabel <- options[["labelYAxisTwo"]]
} else {
dependent<- options[["dependent"]]
yLabel <- options[["dependent"]]
}
betweenSubjectFactors <- groupVars[groupVars %in% options[["betweenSubjectFactors"]]]
repeatedMeasuresFactors <- groupVars[groupVars %in% sapply(options[["repeatedMeasuresFactors"]], `[[`, "name")]
if (length(repeatedMeasuresFactors) == 0) {
summaryStat <- jaspTTests::.summarySE(as.data.frame(dataset), measurevar = dependent, groupvars = groupVars,
conf.interval = confInterval, na.rm = TRUE, .drop = FALSE,
errorBarType = errorBarType, dependentName = .BANOVAdependentName,
subjectName = .BANOVAsubjectName)
} else {
summaryStat <- jaspTTests::.summarySEwithin(as.data.frame(dataset), measurevar = .BANOVAdependentName,
betweenvars = betweenSubjectFactors,
withinvars = repeatedMeasuresFactors,
idvar = .BANOVAsubjectName,
conf.interval = confInterval,
na.rm=TRUE, .drop = FALSE, errorBarType = errorBarType,
usePooledSE = usePooledSE,
useMoreyCorrection = useMoreyCorrection,
dependentName = .BANOVAdependentName,
subjectName = .BANOVAsubjectName)
}
colnames(summaryStat)[colnames(summaryStat) == dependent] <- "dependent"
if (options[["barPlotHorizontalAxis"]] != "") {
colnames(summaryStat)[colnames(summaryStat) == options[["barPlotHorizontalAxis"]]] <- "barPlotHorizontalAxis"
}
if (options[["barPlotSeparatePlots"]] != "") {
colnames(summaryStat)[colnames(summaryStat) == options[["barPlotSeparatePlots"]]] <- "barPlotSeparatePlots"
subsetPlots <- levels(summaryStat[, "barPlotSeparatePlots"])
nPlots <- length(subsetPlots)
} else {
nPlots <- 1L
}
for (i in seq_len(nPlots)) {
title <- if (nPlots > 1L) {
paste0(options[["barPlotSeparatePlots"]], ": ", subsetPlots[i])
} else {
""
}
barPlot <- createJaspPlot(title = title)
barPlotContainer[[title]] <- barPlot
barPlot$height <- 500
barPlot$width <- 500
if (options[["barPlotSeparatePlots"]] != "") {
summaryStatSubset <- subset(summaryStat, summaryStat[, "barPlotSeparatePlots"] == subsetPlots[i])
} else {
summaryStatSubset <- summaryStat
}
error <- NULL
if (plotErrorBars) {
pd <- ggplot2::position_dodge(.2)
error <- ggplot2::geom_errorbar(ggplot2::aes(ymin = ciLower, ymax = ciUpper),
colour = "black", width = .2, position = pd)
}
values <- 1.1 * range(summaryStat[["dependent"]])
if (barPlotHorizontalZeroFix)
values <- c(0, values)
if (plotErrorBars) {
ci.pos <- c(summaryStat[,"dependent"],
summaryStat[,"dependent"]-summaryStat[,"ci"],
summaryStat[,"dependent"]+summaryStat[,"ci"],
min(summaryStat[,"dependent"])*1.1,
max(summaryStat[,"dependent"])*1.1)
yBreaks <- jaspGraphs::getPrettyAxisBreaks(c(values, ci.pos))
} else {
yBreaks <- jaspGraphs::getPrettyAxisBreaks(values)
}
pd2 <- ggplot2::position_dodge2(preserve = "single")
p <- ggplot2::ggplot(summaryStatSubset, ggplot2::aes(x = barPlotHorizontalAxis, y = dependent, group = 1)) +
ggplot2::geom_hline(yintercept = 0, color = "#858585", size = 0.3) +
ggplot2::geom_bar(stat = "identity", fill = "grey", col = "black", width = .6, position = pd2) +
error +
ggplot2::labs(y = yLabel, x = options[["barPlotHorizontalAxis"]]) +
ggplot2::scale_y_continuous(breaks = yBreaks, limits = range(yBreaks), oob = scales::rescale_none) +
ggplot2::scale_x_discrete(breaks = jaspGraphs::getPrettyAxisBreaks(summaryStatSubset[, "barPlotHorizontalAxis"])) +
jaspGraphs::geom_rangeframe(sides = "l") +
jaspGraphs::themeJaspRaw()
barPlot$plotObject <- p
}
return()
}
.BANOVArainCloudPlots <- function(jaspContainer, dataset, options, errors, analysisType) {
if (length(options[["rainCloudHorizontalAxis"]]) == 0L
|| options[["rainCloudHorizontalAxis"]] == ""
|| !is.null(jaspContainer[["containerRainCloudPlots"]]))
return()
rainCloudPlotsContainer <- createJaspContainer(title = gettext("Raincloud plots"))
rainCloudPlotsContainer$position <- 4
jaspContainer[["containerRainCloudPlots"]] <- rainCloudPlotsContainer
rainCloudPlotsContainer$dependOn(c("dependent", "rainCloudHorizontalAxis", "rainCloudSeparatePlots",
"rainCloudYAxisLabel", "rainCloudHorizontalDisplay"))
if (errors$noVariables) {
rainCloudPlotsContainer[["dummyplot"]] <- createJaspPlot(title = "")
return()
}
groupVar <- options[["rainCloudHorizontalAxis"]]
if (analysisType == "RM-ANOVA") {
addLines <- !(groupVar %in% unlist(options[["betweenSubjectFactors"]]))
dependentV <- .BANOVAdependentName
yLabel <- options[["rainCloudYAxisLabel"]]
if (trimws(yLabel) == "") {
title <- gettext("Dependent")
yLabel <- NULL
} else {
title <- yLabel
}
} else {
addLines <- FALSE
dependentV <- options[["dependent"]]
yLabel <- options[["dependent"]]
title <- options[["dependent"]]
}
if (!is.null(options$rainCloudHorizontalDisplay) && options$rainCloudHorizontalDisplay)
horiz <- TRUE
else
horiz <- FALSE
if (options$rainCloudSeparatePlots != "") {
for (thisLevel in levels(dataset[[options[["rainCloudSeparatePlots"]]]])) {
subData <- dataset[dataset[[options[["rainCloudSeparatePlots"]]]] == thisLevel, ]
thisPlotName <- paste0(title, ": ", options[["rainCloudSeparatePlots"]], ": ", thisLevel)
subPlot <- createJaspPlot(title = thisPlotName, width = 480, height = 320)
rainCloudPlotsContainer[[thisLevel]] <- subPlot
p <- try(jaspTTests::.descriptivesPlotsRainCloudFill(subData, dependentV, groupVar, yLabel, groupVar, addLines, horiz, NULL))
if(isTryError(p))
subPlot$setError(.extractErrorMessage(p))
else
subPlot$plotObject <- p
}
} else {
singlePlot <- createJaspPlot(title = title, width = 480, height = 320)
rainCloudPlotsContainer[["rainCloudPlotSingle"]] <- singlePlot
p <- try(jaspTTests::.descriptivesPlotsRainCloudFill(dataset, dependentV, groupVar, yLabel, groupVar, addLines, horiz, NULL))
if(isTryError(p))
singlePlot$setError(.extractErrorMessage(p))
else
singlePlot$plotObject <- p
}
return()
}
# Sample posteriors ----
.BANOVAsamplePosteriors <- function(jaspResults, dataset, options, model, state) {
# TODO: the density approximation can become more efficient with a fast parametric density approximation
# TODO: Bayes factor samples unobserved interaction levels, what to do?
# if the most complex model is retrieved from the state?
nIter <- if (options[["samplingMethodMCMC"]] == "auto") 1e4L else options[["samplesMCMC"]]
nmodels <- length(model[["models"]])
postProbs <- model[["postProbs"]]
statistics <- vector("list", nmodels)
randomFactors <- .BANOVAgetRandomFactors(options, model[["analysisType"]])
dataTypes <- .BANOVAgetDataTypes(dataset, model[["model.list"]][[nmodels]], randomFactors)
levelInfo <- .BANOVAgetLevelInfo(dataset, model[["model.list"]][[nmodels]], dataTypes)
renameFrom <- renameTo <- NULL
if (!is.null(state)) { # can we reuse some posteriors?
reuseable <- model[["reuseable"]]
# it's possible that a user just renames a level of a repeated measures factor
# in that case everything can be reused, but we have to rename some parameters.
# the same may happen when a variable becomes a nuisance parameter, because then
# BayesFactor changes its order in the names.
oldLevelInfo <- state[["levelInfo"]]$levelNames
newLevelInfo <- levelInfo$levelNames
sortTerms <- function(x) {
# split b:a to c("b", "a"), sort it, and then paste it back
# otherwise posteriors samples between different runs are not correctly retrieved from the state.
sapply(strsplit(x, ":", fixed = TRUE), function(x) paste(sort(x), collapse = ":"))
}
tmp_old <- sortTerms(names(oldLevelInfo))
tmp_new <- sortTerms(names(newLevelInfo))
matches <- match(tmp_new, tmp_old)
for (i in seq_along(matches)) {
if (is.na(matches[i]))
next
j <- matches[i]
if (!identical(oldLevelInfo[[j]], newLevelInfo[[i]])) {
if (length(oldLevelInfo[[j]]) == length(newLevelInfo[[i]])) {
renameFrom <- c(renameFrom, oldLevelInfo[[j]])
renameTo <- c(renameTo, newLevelInfo[[i]])
}
}
}
} else {
reuseable <- rep(NA, nmodels)
}
# NOTE: some code checks require saving all samples. To do so, change all samples to samples[[i]] and uncomment:
# samples <- vector("list", nmodels)
allParamNames <- c("mu", unlist(levelInfo$levelNames))
nparam <- length(allParamNames)
weightedMeans <- weights <- numeric(nparam)
names(weights) <- names(weightedMeans) <- allParamNames
allContinuous <- TRUE
h <- (1 - options[["credibleInterval"]]) / 2
probs <- c(h, 1-h)
originalFun <- BayesFactor:::makeChainNeater
on.exit(jaspBase:::assignFunctionInPackage(originalFun, "makeChainNeater", "BayesFactor"))
jaspBase:::assignFunctionInPackage(.BANOVAmakeChainNeater, "makeChainNeater", "BayesFactor")
samplingIssues <- list()
startProgressbar(nmodels, gettext("Sampling posteriors"))
for (i in seq_len(nmodels)) {
# check if the state is reuseable and if it's not NULL, which means it didn't crash
if (is.na(reuseable[i]) || is.null(state$statistics[[reuseable[i]]])) {
if (i == 1L && is.null(model[["models"]][[i]][["bf"]])) {
# NULL model only contains an intercept, use custom sampler
# NOTE: RM-ANOVA never enters here (and would crash if it did)
.setSeedJASP(options)
samples <- .BANOVAsampleNullModel(dataset[[options[["dependent"]]]], nsamples = nIter)
types <- NULL
} else {
# NOTE: we have to sample the random effects, otherwise we cant make predictions (needed for residuals and R^2)
# put the dataset back in
.setSeedJASP(options)
bfObj <- model[["models"]][[i]][["bf"]]
bfObj@data <- dataset
samples <- try(BayesFactor::posterior(bfObj, iterations = nIter))
if (isTryError(samples)) {
# this may error whenever the Bayes factor couldn't be calculated
next
}
types <- samples@model@dataTypes
# BayesFactor stores an internal copy of the dataset, so it can still have old names
if (length(renameFrom) > 0) {
cnms <- colnames(samples)
cnms <- plyr::mapvalues(cnms, renameFrom, renameTo, warn_missing = FALSE) # NOTE: dependency could be removed
colnames(samples) <- cnms
}
# keep only relevant columns, drop sig2, g_xxx, ...
idx <- match(allParamNames, colnames(samples), nomatch = 0L)
samples <- samples[, idx, drop = FALSE]
# for some odd reason, Bayesfactor uses as column name contcor1-contcor1
# if there is a covariate AND fixed factors, but only contcor1 if all variables are continuous...
if (all(types == "continuous")) {
cnms <- colnames(samples)[-1L] # omit the intercept (mu) which is not changed by Bayesfactor
colnames(samples)[-1L] <- paste0(cnms, "-", cnms)
} else {
allContinuous <- FALSE
}
# so some models may yield a bunch of NAs, for example,
# BayesFactor::lmBF(contNormal ~ facGender + contGamma + facGender * contGamma, data = "debug.csv", whichRandom = "facGender")
# we add a footnote and try to not to crash.
if (anyNA(samples)) {
originalRows <- nrow(samples)
samples <- samples[complete.cases(samples), , drop = FALSE]
remainingRows <- nrow(samples)
samplingIssues[[length(samplingIssues) + 1L]] <- list(
model = model[["models"]][[i]][["title"]],
originalRows = originalRows,
remainingRows = remainingRows
)
next
}
}
# although matrixStats::colMeans2 is faster than .colMeans the cost of matrixStats:: is not worth it.
nms <- colnames(samples)
statistics[[i]]$names <- nms # <- these are the names for all objects within one sublist
statistics[[i]]$mean <- .colMeans (samples, m = nIter, n = NCOL(samples))
statistics[[i]]$var <- matrixStats::colVars (samples)
statistics[[i]]$cri <- matrixStats::colQuantiles(samples, probs = probs)
statistics[[i]]$approx <- .BANOVAfitDensity(samples = samples)
statistics[[i]]$types <- types
} else { # reuse state
statistics[[i]] <- state$statistics[[reuseable[i]]]
nms <- statistics[[i]]$names
types <- statistics[[i]]$types
if (length(renameFrom) > 0) {
nms <- plyr::mapvalues(nms, renameFrom, renameTo, warn_missing = FALSE) # NOTE: dependency could be removed
statistics[[i]]$names <- nms
}
if (allContinuous)
allContinuous <- all(types == "continuous")
}
# compute model averaged posterior means
weightedMeans[nms] <- weightedMeans[nms] + postProbs[i] * statistics[[i]]$mean
weights [nms] <- weights [nms] + postProbs[i]
progressbarTick()
}
# find out which parameters are random -- this uses types from the last iteration above
isRandom <- logical(nparam)
idx <- which(types == "random")
for (i in idx)
isRandom <- isRandom | startsWith(names(weights), names(types)[i])
# the weights used above don't sum to 1 because we consider a subset of the models.
# Now we renormalize to ensure the weights used in each weighted mean do sum to 1.
weightedMeans <- weightedMeans / weights
# given the model averaged posterior means calculate the weighted posterior standard deviations
weightedSds <- 0 * weightedMeans # keeps the names
r <- nIter / (nIter - 1)
# statistics has length 0 iff the posterior sampling errored
widxGoodStatistics <- which(lengths(statistics) > 0L)
for (i in widxGoodStatistics) {
nms <- statistics[[i]]$names
var <- statistics[[i]]$var # ~ sum((x - mean(x))^2
mu <- statistics[[i]]$mean
# cc = sum((x - y)^2) - sum((x - mean(x))^2), where y is the weighted mean
# hence, we can get the weighted sum of squares from the individual posterior variances
cc <- weightedMeans[nms]^2 + mu^2 - 2 * mu * weightedMeans[nms]
weightedSds[nms] <- weightedSds[nms] + postProbs[i] * (var + cc * r)
}
weightedSds <- sqrt(weightedSds / weights)
# the loops above are optimized versions of the code below that also calculates the
# weighted mean and weighted sd, but needs to store all samples at once.
# xx <- lapply(samples, function(x, y) {
# if (y %in% colnames(x)) {dd <- model$posteriors$weightedRsqDens
# x[, y]
# } else {
# NULL
# }
# }, y = "XY29udEJpbm9t-1") # XY29udEJpbm9t = contBinom
# idx <- lengths(xx) > 0
# xx <- unlist(xx[idx])
# w <- postProbs[idx]
# sum(w) # equals weights[2]
# weighted.mean(xx, rep(w / sum(w), each = nIter)) # equals weightedMeans[2]
# sqrt(Hmisc::wtd.var(xx, rep(w / sum(w), each = nIter), method = "unbiased")) # equals weightedSds[2]
# compute model averaged densities
steps <- 2^9 # grid size for densities is 2^steps
weightedDensities <- array(0, dim = c(steps, nparam, 2), dimnames = list(NULL, names(weightedMeans), c("x", "y")))
ranges <- matrix(0, nparam, 2L, dimnames = list(allParamNames, NULL))
# get the outermost x-values for each densities this could be vectorized with matrixStats::rowRanges if the
# data are stored as a matrix but that is memory inefficient
for (i in widxGoodStatistics) {
nms <- statistics[[i]]$names
indices <- which(nms %in% allParamNames)
for (j in indices) {
ranges[j, ] <- range(ranges[j, ], statistics[[i]]$approx$xRanges[j, ])
}
}
# construct one common grid for each observed density
for (i in seq_len(nparam)) {
weightedDensities[, i, 1L] <- seq(ranges[i, 1], ranges[i, 2], length.out = steps)
}
for (i in widxGoodStatistics) {
nms <- statistics[[i]]$names
ind <- match(nms, allParamNames)
for (j in seq_along(ind)) {
# approximate all distributions on a common grid
ap <- approx(x = statistics[[i]]$approx$fit[, j, 1L],
y = statistics[[i]]$approx$fit[, j, 2L],
xout = weightedDensities [, ind[j], 1L],
# not observed is approximated to 0
yleft = 0, yright = 0)
weightedDensities[, ind[j], 2L] <- weightedDensities[, ind[j], 2L] + ap$y * postProbs[i]
}
}
# postProbs don't sum to one so we renormalize the densities
weightedDensities[, , 2L] <- sweep(weightedDensities[, , 2L], 2, weights, FUN = `/`)
# if all model with one parameter failed, the line above introduces NaNs since 0 / 0 is NaN
weightedDensities[is.nan(weightedDensities)] <- 0
# # compute weighted CRIs
# weightedCRIs <- matrix(NA, nparam, 2L, dimnames = list(names(weights), NULL))
# cri <- options[["credibleInterval"]]
# for (i in seq_len(nparam)) {
# weightedCRIs[i, ] <- .BANOVAapproxCRI(weightedDensities[, i, 1L], weightedDensities[, i, 2L], cri)
# }
#
# # compute residuals and r-squared
# # sample from the joint posterior over models and parameters
# tmp <- .BANOVAgetSMIResidRsq(weightedDensities, dataset, model$model.list[[nmodels]], nIter, weights)
# means <- rowMeans(tmp$resid)
# h <- (1 - cri) / 2
# quants <- matrixStats::rowQuantiles(tmp$resid, probs = c(h, 1 - h))
#
# # the code above is equivalent to the code below, but the code below needs to keep all posterior samples of
# # all models in memory.
# # weights <- rep(postProbs, each = nIter)
# # independentVariable <- all.vars(.BANOVAgetModelFormulaFromBFobj(model$models[[2L]]))[1L]
# # resids <- matrix(NA, nrow(dataset), 0)
# # rsq <- vector("list", nmodels)
# # # get residuals of all models individually
# # for (i in seq_len(nmodels)) {
# # if (is.null(model$models[[i]]$bf)) {
# # tmp2 <- .BANOVAresidualsNullModel(nIter, dataset[[independentVariable]])
# # } else {
# # tmp2 <- .BANOVAgetSMIResidRsq(
# # posterior = samples[[i]],
# # dataset = dataset,
# # formula = .BANOVAgetModelFormulaFromBFobj(model$models[[i]])
# # )
# # }
# # resids <- cbind(resids, tmp2$resid)
# # rsq[[i]] <- tmp2$rsq
# # }
# # # compute weighted mean for each row
# # means2 <- tcrossprod(weights / nIter, resids)
# # plot(means, means2); abline(0, 1)
# # quants2 <- apply(resids, 1L, Hmisc::wtd.quantile, weights = weights, probs = c(0.025, 0.975))
# # plot(quants[, 1], quants2[1, ]); abline(0, 1)
# # plot(quants[, 2], quants2[2, ]); abline(0, 1)
#
# # all information for q-q plot of residuals
# weightedResidSumStats <- matrix(c(means, quants), nrow = length(means), ncol = 3L,
# dimnames = list(NULL, c("mean", "cri.2.5%", "cri.97.5%")))
#
# # all information for r-squared density plot
# weightedRsqDens <- density(tmp$rsq, n = 2^11, from = 0, to = 1)
# weightedRsqCri <- quantile(tmp$rsq, probs = c(h, 1 - h))
return(list(
statistics = statistics, weights = weights, weightedMeans = weightedMeans, weightedSds = weightedSds,
weightedDensities = weightedDensities,
#weightedCRIs = weightedCRIs, weightedResidSumStats = weightedResidSumStats,
#weightedRsqDens = weightedRsqDens, weightedRsqCri = weightedRsqCri,
allContinuous = allContinuous, isRandom = isRandom, levelInfo = levelInfo,
samplingIssues = samplingIssues
))
}
.BANOVAcomputePosteriorCRI <- function(dataset, options, model, posterior) {
nIter <- if (options[["samplingMethodMCMC"]] == "auto") 1e4L else options[["samplesMCMC"]]
weightedDensities <- posterior[["weightedDensities"]]
weights <- posterior[["weights"]]
nmodels <- length(model[["models"]])
nparam <- length(weights)
# compute weighted CRIs
weightedCRIs <- matrix(0, nparam, 2L, dimnames = list(names(weights), NULL))
cri <- options[["credibleInterval"]]
for (i in seq_len(nparam)) {
if (!all(weightedDensities[, i, 2L] == 0)) { # only TRUE if we explicitly set it to 0
weightedCRIs[i, ] <- .BANOVAapproxCRI(weightedDensities[, i, 1L], weightedDensities[, i, 2L], cri)
}
}
# compute residuals and r-squared
# sample from the joint posterior over models and parameters
tmp <- .BANOVAgetSMIResidRsq(weightedDensities, dataset, model$model.list[[nmodels]], nIter, model, options)
means <- rowMeans(tmp$resid)
h <- (1 - cri) / 2
quants <- matrixStats::rowQuantiles(tmp$resid, probs = c(h, 1 - h))
# the code above is equivalent to the code below, but the code below needs to keep all posterior samples of
# all models in memory. plug this body of this function into the call inside .BANOVAsamplePosteriors() to check
# weights <- rep(postProbs, each = nIter)
# independentVariable <- all.vars(.BANOVAgetModelFormulaFromBFobj(model$models[[2L]]))[1L]
# resids <- matrix(NA, nrow(dataset), 0)
# rsq <- vector("list", nmodels)
# # get residuals of all models individually
# for (i in seq_len(nmodels)) {
# if (is.null(model$models[[i]]$bf)) {
# tmp2 <- .BANOVAresidualsNullModel(nIter, dataset[[independentVariable]])
# } else {
# tmp2 <- .BANOVAgetSMIResidRsq(
# posterior = samples[[i]],
# dataset = dataset,
# formula = .BANOVAgetModelFormulaFromBFobj(model$models[[i]])
# )
# }
# resids <- cbind(resids, tmp2$resid)
# rsq[[i]] <- tmp2$rsq
# }
# # compute weighted mean for each row
# means2 <- tcrossprod(weights / nIter, resids)
# plot(means, means2); abline(0, 1)
# quants2 <- apply(resids, 1L, Hmisc::wtd.quantile, weights = weights, probs = c(0.025, 0.975))
# plot(quants[, 1], quants2[1, ]); abline(0, 1)
# plot(quants[, 2], quants2[2, ]); abline(0, 1)
# all information for q-q plot of residuals
weightedResidSumStats <- matrix(c(means, quants), nrow = length(means), ncol = 3L,
dimnames = list(NULL, c("mean", "cri.2.5%", "cri.97.5%")))
# all information for r-squared density plot
weightedRsqDens <- density(tmp$rsq, n = 2^11, from = 0, to = 1)
weightedRsqCri <- quantile(tmp$rsq, probs = c(h, 1 - h))
weightedRsqMean <- mean(tmp$rsq)
return(list(
weightedCRIs = weightedCRIs, weightedResidSumStats = weightedResidSumStats,
weightedRsqDens = weightedRsqDens, weightedRsqCri = weightedRsqCri, weightedRsqMean = weightedRsqMean
))
}
.BANOVAsampleNullModel <- function(nsamples, dependent) {
# sample from posterior under NULL model, needed to compute model averaged residuals.
rt.scaled <- function (n, df, mean = 0, sd = 1, ncp) {
mean + sd * stats::rt(n, df, ncp = ncp)
}
# sample from the marginal posterior distribution of the mean t-distribution, based on Murphy (2007)
n <- length(dependent)
muObs <- mean(dependent)
varObs <- var(dependent)
# uninformative priors
k0 <- 1e-6
a0 <- 1e-5
b0 <- 1e-5
an <- a0 + n / 2
bn <- b0 + 0.5 * (n - 1L) * varObs + k0 * n * muObs / (2 * (k0 + n))
kn <- k0 + n
mun <- n * muObs / kn
samples <- matrix(rt.scaled(n = nsamples, df = 2 * an, mean = mun, sd = bn / (an * kn), ncp = 0), nsamples,
1L, dimnames = list(NULL, "mu"))
return(samples)
}
.BANOVAresidualsNullModel <- function(nsamples, dependent) {
# sample from posterior under NULL model, needed to compute model averaged residuals.
rt.scaled <- function (n, df, mean = 0, sd = 1, ncp) {
mean + sd * stats::rt(n, df, ncp = ncp)
}
# sample from the marginal posterior distribution of the mean t-distribution, based on Murphy (2007)
n <- length(dependent)
muObs <- mean(dependent)
varObs <- var(dependent)
# uninformative priors
k0 <- 1e-6
a0 <- 1e-5
b0 <- 1e-5
an <- a0 + n / 2
bn <- b0 + 0.5 * (n - 1L) * varObs + k0 * n * muObs / (2 * (k0 + n))
kn <- k0 + n
mun <- n * muObs / kn
samples <- rt.scaled(n = nsamples, df = 2 * an, mean = mun, sd = bn / (an * kn), ncp = 0)
# compute all pairwise differences (residuals)
preds <- tcrossprod(rep(1, n), samples)
resids <- dependent - preds
rsq <- .BANOVAcomputeRsq(dependent, preds)
return(list(resids = resids, rsq = rsq))
}
.BANOVAfitDensity <- function(samples, gridsize = 100L, getXRange = TRUE) {
# ideally we don't do kernel density estimation but instead use some parametric approximation that is suited
# for unimodal distributions. It should also be fast, e.g., fit using method of moments (or another analytic method)
nc <- ncol(samples)
fits <- array(
data = NA,
dim = c(gridsize, nc, 2),
dimnames = list(NULL, colnames(samples), c("x", "y"))
)
n <- nrow(samples)
del0 <- (1/(4 * pi))^(1/10) # normal kernel
for (i in seq_len(nc)) {
# bandwidth definition from KernSmooth::bkde, but adjusted when sqrt(var(samples[, i])) is infinite
vx <- sqrt(var(samples[, i]))
if (is.infinite(vx)) # contWide can cause the variance to be infinite
vx <- sqrt(.Machine$double.xmax) # some big number as a fallback
bandwidth <- del0 * (243/(35 * n))^(1/5) * vx
fits[, i, ] <- do.call(cbind, KernSmooth::bkde(samples[, i], gridsize = gridsize, bandwidth = bandwidth))
}
if (getXRange) {
if (nc == 1L) {
xRanges <- matrix(range(fits[, , "x"]), 1L, 2L)
} else {
xRanges <- matrixStats::colRanges(fits[, , "x"])
}
rownames(xRanges) <- colnames(samples)
} else {
xRanges <- NULL
}
return(list(fit = fits, xRanges = xRanges))
}
.BANOVAapproxCRI <- function(x, y, cri = 0.95) {
# approximate cdf
y2 <- cumsum(y)
y2 <- y2 / y2[length(y2)]
h <- (1 - cri) / 2
# find closest observed match
idx <- c(
which.min(abs(y2 - h)),
which.min(abs(y2 - 1 + h))
)
return(x[idx])
}
# plot posteriors ----
.BANOVAgetBMAdensity <- function(samples, weights, fromTo = NULL, n = 2^10) {
# @param samples, list of samples
# @param weigths, vector of numeric weights
# @param fromTo vector of length 2 specifying lower and upper bound (optional).
# @return a list with $x the x-coordinates and $y the y-coordinates.
if (length(samples) != length(weights))
stop(gettext("length of samples must be equal to length of weights!"))
# remove NULL indices
idxNonNull <- lengths(samples) > 0
weights <- weights[idxNonNull]
samples <- samples[idxNonNull]
# renormalize the weights so output is a proper density function (TODO: do we want this, or BAS style?)
weights <- weights / sum(weights)
# create x-grid for density
if (is.null(fromTo))
fromTo <- range(sapply(samples, range))
xs <- seq(fromTo[1L], fromTo[2L], length.out = 2^10)
# compute weighted density
ys <- numeric(n)
for (i in seq_along(samples)) {
ys <- ys + weights[i] * density(samples[[i]], from = fromTo[1L], to = fromTo[2L], n = n)$y
}
return(list(x = xs, y = ys))
}
.BANOVAreSample <- function(n, x, y, prop0 = NULL) {
cdf <- cumsum(y)
max <- cdf[length(cdf)]
if (max == 0) # can't salvage this
return(numeric(n))
cdf <- cdf / max
if (!is.null(prop0) && prop0 <= (1 - sqrt(.Machine$double.eps))) {
i1 <- runif(n) <= prop0
samples <- numeric(n)
samples[i1] <- approx(cdf, x, runif(sum(i1)), rule = 2)$y
} else {
samples <- approx(cdf, x, runif(n), rule = 2)$y
}
return(samples)
}
.BANOVAgetSMIResidRsq <- function(posterior, dataset, formula, nIter, model, options) {
# @param posterior object from Bayesfactor package, or SxPx2 array of weighted densities
# @param dataset dataset
# @param formula the formula for this specific model. Supply the formula of the most complex model for BMA inference.
# @param nIter number of posterior samples
# @param model model estimates
# @param levelInfo for each variable, how many levels there are.
#
# @return a list with residuals, predictions, and r-squared
# @details the matrix multiplication in this function allocates an array of nobs * nsamples, which can be enormous.
# if is more memory efficient to use for loops (but slower in R) to calculate predictions for each observation
# and posterior sample. This could be done in the future if performance is an issue. However, this likely cannot be
# done efficiently in R. For modelAveraged inference, model and levelInfo need to be given.
# here we need the data in a one-hot encoded way
# first one is dependent variable, rest are independent variables
dvs <- all.vars(formula)[1L]
ivs <- all.vars(formula)[-1L]
# idx contains variables that need to be one-hot encoded
idx <- sapply(dataset[ivs], is.factor)
# idx <- isFactor & names(isFactor) %in% dvs
# do one-hot encoding (thank base R for calling a function `contrast` and giving it the argument `contrast`...)
datOneHot <- model.matrix(formula, data = dataset[c(dvs, ivs)],
contrasts.arg = lapply(dataset[ivs][idx], contrasts, contrasts = FALSE))
nobs <- nrow(datOneHot)
# possible switch for memory efficient alternatives:
# size of 1 numeric element in bytes
# doubleSize <- as.numeric(utils::object.size(double(2)) - utils::object.size(double(1)))
# 1073741824 is 1 GB in bytes
# 536870912 is 512 MB in bytes
# the size of an empty matrix is ignored here
# useMatmult <- ((doubleSize * nobs * nIter) / 536870912.0) <= 1
# if (useMatmult) {
if (length(dim(posterior)) == 3L) {
# we're doing model averaged inference
# recompute the levelInfo, this is relatively cheap and we cannot be sure that the levelInfo in the
# posterior was not retrieved from state (and contains too many or too few variables)
nmodels <- length(model[["model.list"]])
randomFactors <- .BANOVAgetRandomFactors(options, model[["analysisType"]])
dataTypes <- .BANOVAgetDataTypes(dataset, model[["model.list"]][[nmodels]], randomFactors)
levelInfo <- .BANOVAgetLevelInfo(dataset, model[["model.list"]][[nmodels]], dataTypes)
# which parameters are nuisance?
parameterNames <- c("mu", names(levelInfo[["levelNames"]]))
# the variables that are nuisance, e.g., contBinom
isNuisanceVar <- c(TRUE, parameterNames[-1L] %in% model[["nuisance"]])
names(isNuisanceVar) <- parameterNames
# the parameters that are nuisance, e.g., contBinom-0 and contBinom-1
isNuisanceParam <- rep(isNuisanceVar, c(1, lengths(levelInfo[["levelNames"]])))
names(isNuisanceParam) <- c("mu", unlist(levelInfo[["levelNames"]]))
if (!all(names(isNuisanceParam) %in% colnames(posterior)))
stop("!all(names(isNuisance) %in% colnames(posterior)) was not true.")
# we cannot use model[["effects"]] because that exludes nuisance parameters, so we rebuild one that doesn't exclude these
effects <- matrix(FALSE, nmodels, length(isNuisanceParam), dimnames = list(NULL, names(isNuisanceParam)))
for (i in seq_along(model[["model.list"]])) {
if (is.null(model[["model.list"]][[i]])) { # implies an intercept-only null model
cnms <- "mu"
} else {
idx <- names(levelInfo[["levelNames"]]) %in% attr(terms(model[["model.list"]][[i]]), "term.labels")
cnms <- c("mu", unlist(levelInfo[["levelNames"]][idx]))
}
effects[i, cnms] <- TRUE#c(TRUE, names(levelInfo[["levelNames"]]) %in% attr(terms(model[["model.list"]][[i]]), "term.labels"))
}
# sample from the BMA posterior
postProbs <- model[["postProbs"]]
postProbs[is.na(postProbs)] <- 0
.setSeedJASP(options)
modelIdx <- sample(length(postProbs), nIter, TRUE, postProbs) # resample the models according to posterior prob
samples <- matrix(0, nrow = nIter, ncol = ncol(datOneHot), dimnames = list(NULL, names(isNuisanceParam)))
# resample nuisance parameters
for (i in which(isNuisanceParam)) {
.setSeedJASP(options)
samples[, i] <- .BANOVAreSample(n = nIter, x = posterior[, i, "x"], y = posterior[, i, "y"])
}
# resample non nuisance parameters
for (i in which(!isNuisanceParam)) {
idx <- names(isNuisanceParam)[i]
mult <- effects[modelIdx, idx] # how often models with this variable are sampled
nTemp <- sum(mult)
if (nTemp > 0L){
.setSeedJASP(options)
samples[mult, i] <- .BANOVAreSample(n = sum(mult), x = posterior[, i, "x"], y = posterior[, i, "y"])
}
}
preds <- tcrossprod(datOneHot, samples)
} else {
# TODO: ensure that column order is always correct for both!
# otherwise we're doing inference for a single model
preds <- tcrossprod(datOneHot, posterior)
}
# calculate residuals (correctly recycles dat[[dvs]])
resid <- dataset[[dvs]] - preds
rsq <- .BANOVAcomputeRsq(dataset[[dvs]], preds)
# eta <- .BANOVAcomputeEtasq(dat[[dvs]], preds)
# } else {
# slow but memory efficient stuff
# }
return(list(resid = resid, preds = preds, rsq = rsq))
}
# HF computation ----
.BANOVAinitBayesFactor <- function() {
defaults <- list(
BFMaxModels = 50000,
BFpretestIterations = 100,
BFapproxOptimizer = "optim",
BFapproxLimits = c(-15, 15),
BFprogress = interactive(),
BFfactorsMax = 5
)
idx <- setdiff(names(defaults), names(options()))
options(defaults[idx])
}
.BANOVAcomputeRsq <- function(obs, preds) {
# NOTE: R^2 != cor(obs, predict) because the predictions from the posterior samples are not OLS estimates.
if (is.null(dim(preds)))
preds <- matrix(preds, ncol = 1L)
# definition from http://www.stat.columbia.edu/~gelman/research/unpublished/bayes_R2.pdf
colVars2 <- function(x) {matrixStats::colVars(sweep(x, 2L, matrixStats::colMeans2(x)))}
ee <- colVars2(preds)
ff <- colVars2(obs - preds)
gg <- ee / (ee + ff)
if (anyNA(gg)) {
idx <- is.na(gg)
if (all(idx)) {
# this means that either the posterior sampling failed or the input data is bogus.
.quitAnalysis(gettext("All predictions had zero variance. Check the predictors for anomalies or increase the number of posterior samples."))
} else {
# this shouldn't happen, but is necessary because otherwise stuff will crash downstream.
gg[is.na(gg)] <- mean(gg, na.rm = TRUE)
}
}
return(gg)
}
.BANOVAcomputeEtasq <- function(obs, preds) {
# partial eta^2
if (is.null(dim(preds)))
preds <- matrix(preds, ncol = 1L)
# return(1 - matrixStats::colVars(preds - obs) / var(obs))
# return(1 - matrixStats::rowVars(preds - obs) / var(obs))
return(matrixStats::colVars(preds) / var(obs))
}
.BANOVAmakeChainNeater <- function(chains, Xnames, formula, data, dataTypes, gMap, unreduce, continuous, columnFilter) {
# so BayesFactor:::makeChainNeater doesn't always make things neater
# see also https://github.com/jasp-stats/jaspAnova/pull/43#discussion_r656430090
# example where BayesFactor goes wrong:
# dd <- structure(list(
# rm_factor = structure(c(1L, 2L, 1L, 2L, 1L, 2L), .Label = c("Level 1", "Level 2"), class = "factor"),
# dependent = c(6, 6, 5, 10, 6, 9),
# age = c(59.2161793866222, 59.2161793866222, 53.4457078686802, 53.4457078686802, 50.3248450015473, 50.3248450015473),
# subject = structure(c(1L, 1L, 2L, 2L, 3L, 3L), .Label = c("1", "2", "3"), class = "factor")),
# row.names = c(NA, 6L), class = "data.frame"
# )
#
# bf_obj_1 <- BayesFactor::lmBF(dependent ~ age + subject, data = dd, whichRandom = "subject")
# bf_obj_2 <- BayesFactor::lmBF(dependent ~ age + subject + rm_factor, data = dd, whichRandom = "subject")
#
# print(BayesFactor::posterior(bf_obj_1, iterations = 3)) # <- ugly
# print(BayesFactor::posterior(bf_obj_2, iterations = 3)) # <- good
#
# originalFun <- BayesFactor:::makeChainNeater
# jaspBase:::assignFunctionInPackage(.BANOVAmakeChainNeater, "makeChainNeater", "BayesFactor")
# print(BayesFactor::posterior(bf_obj_1, iterations = 3)) # <- good
#
# jaspBase:::assignFunctionInPackage(originalFun, "makeChainNeater", "BayesFactor")
# print(BayesFactor::posterior(bf_obj_1, iterations = 3)) # <- ugly
# this part is identical to BayesFactor:::makeChainNeater
P = length(gMap)
nGs = max(gMap) + 1
factors = BayesFactor:::fmlaFactors(formula, data)[-1]
dataTypes = dataTypes[names(dataTypes) %in% factors]
types = BayesFactor:::termTypes(formula, data, dataTypes)
lastPars = ncol(chains) + (-nGs):0
if (any(continuous)) {
gNames = paste("g", c(names(types[types != "continuous"]), "continuous"), sep = "_")
} else {
gNames = paste("g", names(types), sep = "_")
}
if (is.null(columnFilter)) {
ignoreCols = ignoreCols = rep(0, P)
} else {
ignoreCols = BayesFactor:::filterVectorLogical(columnFilter, names(gMap))
}
# here we're also checking here if any dataTypes are "random" instead of only checking for fixed dataTypes
if (!unreduce | !any(dataTypes %in% c("fixed", "random"))) {
labels = c("mu", Xnames[!ignoreCols], "sig2", gNames)
colnames(chains) = labels
return(chains)
}
parLabels = BayesFactor:::makeLabelList(formula, data, dataTypes, unreduce,
columnFilter)
labels = c("mu", parLabels)
betaChains = chains[, 1:(ncol(chains) - 2 - nGs) + 1, drop = FALSE]
betaChains = BayesFactor:::ureduceChains(betaChains, formula, data, dataTypes, gMap, ignoreCols)
newChains = cbind(chains[, 1], betaChains, chains[, lastPars])
labels = c(labels, "sig2", gNames)
colnames(newChains) = labels
return(newChains)
}
.BANOVAcomputeInteractionsMatrix <- function(effects) {
neffects <- length(effects)
interactions.matrix <- matrix(FALSE, nrow = neffects, ncol = neffects)
rownames(interactions.matrix) <- colnames(interactions.matrix) <- effects
if (neffects > 1L) {
effect.components <- sapply(effects, strsplit, split = ":", fixed = TRUE)
for (e in seq_len(neffects)) {
interactions.matrix[e, ] <- sapply(1:neffects, function(ee) {
(sum(effect.components[[e]] %in% effect.components[[ee]]) == length(effect.components[[e]]))
})
}
diag(interactions.matrix) <- FALSE
}
return(interactions.matrix)
}
# Model prior ----
.BANOVAcomputePriorModelProbs <- function(models, nuisance, options) {
if (options[["modelPrior"]] == "uniform") {
modelprobs <- rep(1 / length(models), length(models))
} else if (options[["modelPrior"]] == "custom") {
inclusionProbabilities <- vapply(options[["customPriorSpecification"]], `[[`, FUN.VALUE = numeric(1L), "inclusionProbability")
modelprobs <- .BANOVAcustomInclusionProbabilitiesToModelProbabilities(models, nuisance, inclusionProbabilities, enforceMarginality = options[["enforcePrincipleOfMarginalityFixedEffects"]])
} else {
noPredictorsPerModel <- vapply(models, function(x) if (is.null(x)) 0L else length(attr(terms(x), "term.labels")), FUN.VALUE = integer(1L))
noNuisancePredictors <- noPredictorsPerModel[1L]
noPredictorsPerModel <- noPredictorsPerModel - noNuisancePredictors
totalNoPredictors <- noPredictorsPerModel[length(noPredictorsPerModel)]
if (options[["modelPrior"]] %in% c("betaBinomial", "Wilson", "Castillo")) {
switch (options[["modelPrior"]],
"betaBinomial" = {alpha = options[["betaBinomialParameterA"]]; beta = options[["betaBinomialParameterB"]] },
"Wilson" = {alpha = 1.0; beta = totalNoPredictors * options[["wilsonParameterLambda"]] },
"Castillo" = {alpha = 1.0; beta = totalNoPredictors ^ options[["castilloParameterU"]] }
)
modelprobs <- dBetaBinomialModelPrior(noPredictorsPerModel, totalNoPredictors, alpha, beta)
} else if (options[["modelPrior"]] == "Bernoulli") {
modelprobs <- dBernoulliModelPrior(noPredictorsPerModel, totalNoPredictors, options[["bernoulliParameter"]])
}
# both the betabinomial and bernoulli model priors do not sum to 1 when marginality is respected
modelprobs <- modelprobs / sum(modelprobs)
}
return(modelprobs)
}
dBetaBinomialModelPrior <- function(k, n, alpha = 1.0, beta = 1.0, log = FALSE) {
# NOTE: this is not the pdf of the betabinomial, but the pdf divided by choose(n, k), so that it's the probabiltiy of a particular model
logprobability <- lbeta(k + alpha, n - k + beta) - lbeta(alpha, beta)
if (log)
return(logprobability)
return(exp(logprobability))
}
dBernoulliModelPrior <- function(k, n, prob = 0.5, log = FALSE) {
logprobability <- k * log(prob) + (n - k) * log(1 - prob)
if (log)
return(logprobability)
return(exp(logprobability))
}
.BANOVAcustomInclusionProbabilitiesToModelProbabilities <- function(modelList, nuisance, inclusionProbabilities, enforceMarginality = TRUE) {
# TODO: discuss with the team:
# when marginality is enforced, how should the prior on interaction effects be interpreted when a user wants matched models?
formulaFullModel <- modelList[[length(modelList)]]
termsFullModel <- terms(formulaFullModel)
termLabels <- attr(termsFullModel, "term.labels")
fullNonNuisance <- !(termLabels %in% nuisance)
termLabels <- termLabels[fullNonNuisance]
# the principle of marginality is enforced (or not) through the function getPresentInteractions
if (enforceMarginality) {
# in this case, interaction effects do not appear without the main effects, so the user provided probability
# is interpreted as a conditional probability given that the main effects are included.
# thus, p(interaction) = 0 if the main effects are missing, and the user provided probability otherwise.
#
# for example, consider p(A) = P(B) = .5, P(A:B) = .2
#
# model P(model)
# A P(A) * (1 - P(B)) # <- here we do NOT do '* (1 - P(A:B))'
# A, B p(A) * P(B) * (1 - P(A:B))
# A, B, A:B p(A) * P(B) * P(A:B)
termFactors <- attr(termsFullModel, "factors")[, termLabels, drop = FALSE]
termFactors <- termFactors[rownames(termFactors) %in% termLabels, , drop = FALSE]
rnmsTermFactors <- rownames(termFactors)
termOrders <- attr(termsFullModel, "order")[fullNonNuisance]
# create a list where the names refer to the interactions and the elements refer to all children of that interaction
listOfDescendants <- setNames(lapply(termLabels, function(x) {
idx <- which(termFactors[, x] == 1L)
rnmsTermFactors[idx]
}), termLabels)
termIsNotInteraction <- termOrders == 1L
getPresentInteractions <- function(currentTermLabels) {
vapply(listOfDescendants, function(descendants, currentTermLabels) {
all(descendants %in% currentTermLabels)
}, logical(1L), currentTermLabels) | termIsNotInteraction
}
# NOTE: the loop below fails for some combinations of user priors, but I can see how this behavior is desirable
# the users provide the raw p(interaction) however, the code below assumes that this is
# p(interaction | main effects), so to compensate we divide each interaction by the product of the components.
# however, for some inclusion probabilities of the main effects you get impossible values, for example:
# p(A_) = 0.1, p(B) = 0.1, P(A*B) = 0.9 => 0.9 / 0.1^2 = 90, which is bigger than 1 and should be impossible.
# for (i in which(!termIsNotInteraction)) { # loop over all interaction terms
# idxSubterms <- which(termLabels %in% listOfDescendants[[termLabels[i]]])
# inclusionProbabilities[i] <- inclusionProbabilities[i] / prod(inclusionProbabilities[idxSubterms])
# }
} else {
# in this case, interaction effects do appear without the main effects, so the user provided probability
# is interpreted in the same way for interaction effects and fixed effects.
# it's interpreted as an unconditional inclusion probability
getPresentInteractions <- function(...) {
rep(TRUE, length(termLabels))
}
}
exclusionProbabilties <- 1 - inclusionProbabilities
# so terms(y ~ b + a:b) reorders a:b to b:a but terms(y ~ a + a:b) does not...
termLabelOrder <- strsplit(termLabels, ":", fixed = TRUE)
modelProbs <- numeric(length(modelList))
for (i in seq_along(modelList)) {
if (is.null(modelList[[i]])) {
excludedTerms <- rep(TRUE, length(termLabels))
excludedTerms <- excludedTerms & getPresentInteractions(NULL)
modelProbs[i] <- prod(exclusionProbabilties)
} else {
currentTerms <- terms(modelList[[i]])
currentTermLabels <- setdiff(labels(currentTerms), nuisance)
currentTermLabels <- .BANOVAorderTermsByKnownOrder(currentTerms, currentTermLabels, termLabelOrder)
presentInteractions <- getPresentInteractions(currentTermLabels)
includedTerms <- termLabels %in% currentTermLabels
excludedTerms <- !includedTerms
includedTerms <- includedTerms & presentInteractions
excludedTerms <- excludedTerms & presentInteractions
modelProbs[i] <- prod(inclusionProbabilities[includedTerms], exclusionProbabilties[excludedTerms])
}
}
return(modelProbs)
}
# Other ----
.BANOVAgetRScale <- function(options, analysisType) {
if (options[["priorSpecificationMode"]] == "acrossParameters") {
rscaleFixed <- options[["cauchyPriorScaleFixedEffects"]]
rscaleRandom <- options[["cauchyPriorScaleRandomEffects"]]
rscaleCont <- if (analysisType == "ANOVA") "medium" else options[["cauchyPriorScaleCovariates"]]
rscaleEffects <- NULL
} else {
# NOTE: this still need a value otherwise BayesFactor returns NA for the bf
# default values of lmBF
rscaleFixed <- "medium"
rscaleRandom <- "nuisance"
rscaleCont <- if (analysisType == "ANOVA") "medium" else options[["cauchyPriorScaleCovariates"]]
rscaleEffectsNames <- vapply(options[["customPriorSpecification"]], FUN.VALUE = character(1L), function(x) {
paste(x[["components"]], collapse = ":")
})
rscaleEffects <- vapply(options[["customPriorSpecification"]], FUN.VALUE = numeric(1L), `[[`, "scaleFixedEffects")
rscaleEffectsKeep <- if (analysisType == "ANOVA") {
rep(TRUE, length(rscaleEffects))
} else {
vapply(options[["customPriorSpecification"]], FUN.VALUE = logical(1L), function(x) {
is.null(options[["covariates"]]) || !any(x[["components"]] %in% options[["covariates"]])
})
}
rscaleEffects <- rscaleEffects[rscaleEffectsKeep]
names(rscaleEffects) <- rscaleEffectsNames[rscaleEffectsKeep]
}
return(list(rscaleFixed = rscaleFixed, rscaleRandom = rscaleRandom, rscaleCont = rscaleCont, rscaleEffects = rscaleEffects))
}
.BANOVAgetShownTableSizeAndAddFootnote <- function(modelTable, nmodels, modelsShown, numModelsShown) {
if (modelsShown == "unlimited") {
shownTableSize <- nmodels
} else {
shownTableSize <- min(nmodels, numModelsShown)
if (shownTableSize < nmodels)
modelTable$addFootnote(gettextf("Showing the best %1$d out of %2$d models.", numModelsShown, nmodels))
}
return(shownTableSize)
}
# # .BANOVAupdateRscales <- function() {
#
# # for some reason, BayesFactor supports custom rscales for but throws a error if you try to set them for continuous parameters
# # this temporarily overwrite the stop call in this BayesFactor:::createRscales
# # the function will throw an error if BayesFactor:::createRscales is updated
#
# # originalCreateRscales <- BayesFactor:::createRscales
# # originalBody <- body(originalCreateRscales)
# # if (!identical(originalBody[[17]][[3]][[2]][[1]], quote(invisible))) {
# # if (!identical(originalBody[[17]][[3]][[2]][[1]], quote(stop)))
# # stop("Bayesfactor got an update, and createRscales/ .BANOVAupdateRscales needs to be fixed!", domain = NA)
# # originalBody[[17L]][[3L]][[2L]][[1L]] <- substitute(invisible)
# # newCreateRscales <- originalCreateRscales
# # body(newCreateRscales) <- originalBody
# #
# # jaspBase::assignFunctionInPackage(newCreateRscales, "createRscales", "BayesFactor")
# # }
# #
# # tmp <- methods::getMethod(BayesFactor::compare, list(numerator="BFlinearModel", denominator="missing", data="data.frame"))
# # fun <- tmp@.Data
# # newfun <- fun
# # body <- body(fun)
# # if (!identical(body[[19L]][[2L]][[2L]][[4L]][[2L]], quote(FALSE))) {
# # if (!identical(body[[19L]][[2L]][[2L]][[4L]][[2L]], quote(all(relevantDataTypes == "continuous"))))
# # stop("Bayesfactor got an update, and BayesFactor::`.__T__compare:BayesFactor`$`BFlinearModel#missing#data.frame`/ .BANOVAupdateRscales needs to be fixed!", domain = NA)
# # body[[19L]][[2L]][[2L]][[4L]][[2L]] <- substitute(FALSE) # do not take the fast path
# # body(newfun) <- body
# #
# # # horrible but necessary
# # env <- environment()#getNamespace(jaspAnova)
# # assign("compare", BayesFactor::compare, envir = env)
# # setMethod("compare", signature(numerator = "BFlinearModel", denominator = "missing", data = "data.frame"), newfun, where = env)
#
# # env <- getNamespace("BayesFactor")
# # unlockBinding("compare", env)
#
# # debugonce(setMethod)
# # setMethod("compare", signature(numerator = "BFlinearModel", denominator = "missing", data = "data.frame"), newfun, where = env)
#
# # tmp@.Data <- newfun
# # s4env <- BayesFactor:::`.__T__compare:BayesFactor`
# # unlockBinding("BFlinearModel#missing#data.frame", s4env)
# # s4env$`BFlinearModel#missing#data.frame` <- tmp
# # lockBinding("BFlinearModel#missing#data.frame", s4env)
# # jaspBase::assignFunctionInPackage(s4env, ".__T__compare:BayesFactor", "BayesFactor")
# # }
#
# # return()
#
# # we could reset the changes with on.exit, but I'm not sure if it's necessary and doing that cleanly requires withr::defer and adds a dependency
# # do.call(on.exit({
# # jaspBase::assignFunctionInPackage(tempRscales[["originalCreateRscales"]], "createRscales", "BayesFactor")
# #
# # s4env <- BayesFactor:::`.__T__compare:BayesFactor`
# # unlockBinding("BFlinearModel#missing#data.frame", s4env)
# # s4env$`BFlinearModel#missing#data.frame` <- newfun
# # lockBinding("BFlinearModel#missing#data.frame", s4env)
# # jaspBase::assignFunctionInPackage(s4env, ".__T__compare:BayesFactor", "BayesFactor")
# # }, add = TRUE, after = FALSE)
# }
# Dependencies ----
.BANOVAdataDependencies <- function() {
# seed is in here because basically everything depends on the seed
c("dependent", "randomFactors", "covariates", "fixedFactors", "betweenSubjectFactors", "repeatedMeasuresFactors", "repeatedMeasuresCells", "modelTerms",
"seed", "setSeed")
}
.BANOVAmodelSpaceDependencies <- function(modelPrior) {
c("modelPrior", "betaBinomialParameterA", "betaBinomialParameterB", "wilsonParameterLambda", "castilloParamerU", "enforcePrincipleOfMarginalityFixedEffects", "enforcePrincipleOfMarginalityRandomSlopes",
if (modelPrior == "custom") "customPriorSpecification"
)
}
.BANOVArscaleDependencies <- function(priorSpecificationMode) {
c("cauchyPriorScaleFixedEffects", "cauchyPriorScaleRandomEffects", "cauchyPriorScaleCovariates", "priorSpecificationMode",
if (priorSpecificationMode == "perTerm") "customPriorSpecification"
)
}
.BANOVAmodelPriorOptionsChanged <- function(state, options) {
!identical(state[["modelPriorOptions"]][.BANOVAmodelSpaceDependencies(options[["modelPrior"]])], options[.BANOVAmodelSpaceDependencies(options[["modelPrior"]])])
}
.BANOVAmarginalityOptionsUnchanged <- function(state, options) {
stateOpts <- state[["modelPriorOptions"]]
stateOpts[["enforcePrincipleOfMarginalityFixedEffects"]] == options[["enforcePrincipleOfMarginalityFixedEffects"]] &&
stateOpts[["enforcePrincipleOfMarginalityRandomSlopes"]] == options[["enforcePrincipleOfMarginalityRandomSlopes"]]
}
.BANOVAmodelBFTypeOrOrderChanged <- function(state, options) {
nms <- c("fixedFactors", "modelTerms", "randomFactors", "covariates", "seed", "setSeed")
nms <- intersect(names(options), nms) # excludes covariates for ANOVA
identical(state[nms], options[nms])
}
.BANOVAmodelTermsUnchanged <- function(state, options) {
identical(state[["modelTerms"]], options[["modelTerms"]])
}
# HF formulas ----
.BANOVAgetFormulaComponents <- function(x, what = c("components", "variables")) {
what <- match.arg(what)
if (what == "components") {
return(colnames(attr(terms(x), "factors")))
} else {
return(all.vars(x)[-1L])
}
}
.BANOVAgetModelTitlesWithAllTerms <- function(modelObjects, modelList, analysisType, hideNuisance) {
if (hideNuisance)
return(vapply(modelObjects, FUN = `[[`, FUN.VALUE = character(1L), "title"))
nullModelName <- gettext("Null model")
res <- gsub("(.*)~\\s+", "", vapply(modelList, function(x) if (is.null(x)) nullModelName else .BANOVAas.character.formula(x), character(1L)))
if (analysisType == "RM-ANOVA")
res <- gsub(.BANOVAsubjectName, "subject", res)
res[res == ""] <- nullModelName
return(jaspBase::gsubInteractionSymbol(res))
}
.BANOVAgenerateAllModelFormulas <- function(formula, nuisance = NULL, analysisType = "RM-ANOVA",
enforcePrincipleOfMarginalityFixedEffects = TRUE,
enforcePrincipleOfMarginalityRandomSlopes = FALSE,
rmFactors = NULL, legacy = FALSE
) {
# TODO: it might be nicer to represent the NULL model as
# y ~ 1, rather than NULL. Right now the null model is a formula
# when there are nuisance variables and otherwise NULL, which leads to a bunch
# of annoying if (is.null(model[i])) exceptions.
neverExclude <- paste0("^", nuisance, "$")
if (!legacy && analysisType == "RM-ANOVA" && is.null(rmFactors))
stop(".BANOVAgenerateAllModelFormulas called with invalid arguments: analysisType = \"RM-ANOVA\", legacy = FALSE, rmFactors = NULL", domain = NA)
modelSpace <- try(BayesFactor::enumerateGeneralModels(
formula,
whichModels = if (enforcePrincipleOfMarginalityFixedEffects) "withmain" else "all",
neverExclude = neverExclude)
)
nuisanceRandomSlopes <- NULL
if (isTryError(modelSpace))
.quitAnalysis(gettextf("The following error occured in BayesFactor::enumerateGeneralModels: %s",
.extractErrorMessage(modelSpace)))
if (analysisType == "RM-ANOVA" && !legacy) {
# adjust the nuisance to include the random slopes, only done here because BayesFactor::enumerateGeneralModels
# does not handle this properly
allPossibleSlopes <- labels(stats::terms(stats::as.formula(paste("y~", paste(rmFactors, collapse = "*")))))
# drop the most complex interaction
allPossibleSlopes <- allPossibleSlopes[-length(allPossibleSlopes)]
# if at least one interaction among the repeated measures is considered
if (length(allPossibleSlopes) > 0L) {
# add interaction with subject
nuisanceRandomSlopes <- paste0(allPossibleSlopes, ":", .BANOVAsubjectName)
if (enforcePrincipleOfMarginalityRandomSlopes) {
for (i in seq_along(modelSpace)) {
presentLabels <- labels(stats::terms(modelSpace[[i]]))
termsToAddChar <- intersect(allPossibleSlopes, presentLabels)
if (length(termsToAddChar) > 0L) {
termsToAddChar <- paste0(termsToAddChar, ":", .BANOVAsubjectName)
termsToAdd <- as.formula(paste("~ . +", paste0(termsToAddChar, collapse = " + ")))
modelSpace[[i]] <- update.formula(modelSpace[[i]], new = termsToAdd)
}
}
} else {
termsToAdd <- as.formula(paste("~ . +", paste0(nuisanceRandomSlopes, collapse = " + ")))
modelSpace <- lapply(modelSpace, update.formula, new = termsToAdd)
}
# for the reordering done below. termsToAdd always contains the most complex random effects
formula <- update.formula(formula, new = termsToAdd)
# update nuisance
nuisance <- c(nuisance, nuisanceRandomSlopes)
}
}
if (is.null(nuisance)) {
return(list(modelList = c(list(NULL), modelSpace), nuisance = nuisance, nuisanceRandomSlopes = nuisanceRandomSlopes))
} else {
# put the null-model first
i <- length(modelSpace)
modelSpace <- c(modelSpace[[i]], modelSpace[-i])
# BayesFactor has the nasty habit of changing the order of interactions whenever one of the components is
# a nuisance variable. Here we ensure the order matches that of the input formula (which matches the order a user entered the factors).
# see also https://github.com/jasp-stats/jasp-test-release/issues/1181
originalTerms <- terms(formula)
originalOrd <- attr(originalTerms, "order")
originalIdxNonInteraction <- which(originalOrd == 1L)
originalLabels <- attr(originalTerms, "term.labels")
originalLabelsPieces <- strsplit(originalLabels[originalOrd > 1L], ":")
originalLabelsPiecesSorted <- lapply(originalLabelsPieces, sort)
dependent <- all.vars(modelSpace[[1]])[1L]
for (i in seq_along(modelSpace)) {
term <- terms(modelSpace[[i]])
ord <- attr(term, "order") # interaction order
idxNonInteraction <- which(ord == 1L)
termLabels <- attr(term, "term.labels")
termLabels[idxNonInteraction] <- originalLabels[originalIdxNonInteraction][match(termLabels[idxNonInteraction], originalLabels[originalIdxNonInteraction])]
for (j in which(ord > 1L)) {
labelPieces <- strsplit(termLabels[j], ":")[[1L]]
sortedLabelPieces <- sort(labelPieces)
for (k in seq_along(originalLabelsPieces))
if (identical(sortedLabelPieces, originalLabelsPiecesSorted[[k]]))
termLabels[j] <- paste(originalLabelsPieces[[k]], collapse = ":")
}
modelSpace[[i]] <- as.formula(paste(dependent, "~", paste(termLabels, collapse = " + ")))
}
return(list(modelList = modelSpace, nuisance = nuisance, nuisanceRandomSlopes = nuisanceRandomSlopes))
}
}
.BANOVAcreateModelFormula <- function(dependent, modelTerms) {
rhs <- "" # right hand side of the formula
nuisance <- NULL
effects <- NULL
for (term in modelTerms) {
comp <- term[["components"]]
newValue <- paste(comp, collapse = ":")
rhs <- if (rhs != "") paste0(rhs, " + ", newValue) else newValue
if (!is.null(term[["isNuisance"]]) && term[["isNuisance"]]) {
nuisance <- c(nuisance, newValue)
} else {
effects <- c(effects, newValue)
}
}
model.formula <- formula(paste(dependent, "~", rhs))
# this would be cleaner ideal if BayesFactor::enumerateGeneralModels would handle the nuisance properly.
# if (isRMANOVA && !legacy) {
# randomSlopes <- paste0(rmFactors, ":", .BANOVAsubjectName)
# termsToAdd <- as.formula(paste("~ . +", paste0(randomSlopes, collapse = " + ")))
# model.formula <- update.formula(model.formula, termsToAdd)
# nuisance <- c(nuisance, randomSlopes)
# }
return(list(model.formula = model.formula, nuisance = nuisance, effects = effects))
}
.BANOVAgetModelFormulaFromBFobj <- function(BayesFactorObj, asCharacter = FALSE) {
out <- BayesFactorObj$bf@numerator[[1L]]@identifier$formula
if (asCharacter) {
return(out)
} else {
return(as.formula(out))
}
}
.BANOVAgetLevelInfo <- function(dataset, formula, dataTypes) {
levelNames <- .BANOVAmakeLabelList(formula, dataset, dataTypes)
levelCounts <- lengths(levelNames) # counts of the factors including interaction terms
return(list(levelCounts = levelCounts, levelNames = levelNames))
}
.BANOVAmakeLabelList <- function(formula, data, dataTypes, unreduce = TRUE, columnFilter = NULL) {
# from BayesFactor 0.9.12.4.2
# identical to BayesFactor:::makeLabelList, except that we return a named list
# with
# names: the variables that make up a particular (interaction) effect
# values: the parameter names as returned by BayesFactor::posterior(...)
terms = attr(terms(formula, data = data), "term.labels")
if (!is.null(columnFilter))
terms = terms[!BayesFactor:::filterVectorLogical(columnFilter, terms)]
if (unreduce)
dataTypes[dataTypes == "fixed"] = "random"
labelList = lapply(terms, function(term, data, dataTypes) {
effects = strsplit(term, ":", fixed = TRUE)[[1]]
my.names = BayesFactor:::design.names.intList(effects, data, dataTypes)
return(paste(term, "-", my.names, sep = ""))
}, data = data, dataTypes = dataTypes)
# this part is different from BayesFactor
names(labelList) <- terms
return(labelList)
}
.BANOVAgetDataTypes <- function(dataset, formula, whichRandom = NULL) {
# from BayesFactor:::lmBF
BayesFactor:::createDataTypes(
formula,
whichRandom = whichRandom,
data = dataset,
analysis = "lm"
)
}
.BANOVAgetRandomFactors <- function(options, analysisType) {
if (analysisType == "RM-ANOVA")
return(.BANOVAsubjectName)
else
return(unlist(options[["randomFactors"]]))
}
.BANOVAgetLevelsFromParamNames <- function(names) {
# NOTE: this works because base64 does not contain "-"
# split on first "-"; 2 implies output of length 2, i.e., only split once
out <- do.call(rbind, stringr::str_split(names, "-", 2L))
# continous variables have as level name the variable name
idx <- out[, 1L] == out[, 2L]
out[idx, 2L] <- ""
# change dots into spaces for aesthetic purposes
out[, 2L] <- gsub(".", " ", out[, 2L], fixed = TRUE)
colnames(out) <- c("parameter", "level")
return(out)
}
.BANOVAas.character.formula <- function(x, ...) {
# we could also extend the S3 function as.character
Reduce(paste, trimws(deparse(x)))
}
.BANOVAreorderFormulas <- function(x) {
# This function reorders the terms of a formula such that they are alphabetical
# e.g., a ~ c + b + c:b becomes a ~ b + b:c
# This is necessary because BayesFactor::enumerateGeneralModels always appends the nuisance terms
# and a ~ b + c != a ~ c + b
# so without this function the state does not get reused whenever a user modifies the nuisance terms
if (is.null(x))
return("NULL") # as a string so it can become one big character vector for match()
s <- strsplit(attr(stats::terms.formula(x), "term.labels"), ":")
for (i in which(lengths(s) > 1L))
s[[i]] <- paste0(sort(s[[i]]), collapse = ":")
return(paste(all.vars(x)[1L], "~", paste(sort(unlist(s)), collapse = " + ")))
}
.BANOVAreorderTerms <- function(x) {
# This function reorders the terms of a formula such that they are alphabetical
# it essentially does the same as .BANOVAreorderFormulas but expects x to be character
# x should be a character string of the form c("a" "a:b"), so not a ~ b + c:d.
# For example, output from `all.vars(formula)` or `.BANOVAgetFormulaComponents(formula)`.
s <- strsplit(x, ":")
for (i in which(lengths(s) > 1L))
s[[i]] <- paste0(sort(s[[i]]), collapse = ":")
return(unlist(s))
}
.BANOVAorderTermsByKnownOrder <- function(currentTerms, currentTermLabels, termLabelOrder) {
# This function reorders the terms of a formula such that they follow the order of termLabelOrder
# currentTerms: output of stats::terms(formula)
# currentTermLabels: output of labels(stats::formula(formula)), possibly after filtering out nuisance
# termLabelOrder: character vector of term labels in the desired order
currentFactors <- attr(currentTerms, "factors")
if (any(currentFactors > 1L)) { # TRUE implies may need to reorder some terms according to termLabelOrder
for (j in grep(":", currentTermLabels, fixed = TRUE)) {
tmp <- strsplit(currentTermLabels[j], ":", fixed = TRUE)[[1L]]
for (order in termLabelOrder) {
if (all(tmp %in% order) && length(tmp) == length(order)) {
currentTermLabels[j] <- paste(order, collapse = ":")
break
}
}
}
}
return(currentTermLabels)
}
# Single Model Inference (SMI) ----
.BANOVAsmi <- function(jaspResults, dataset, options, model) {
userWantsSMI <- any(unlist(options[c(
"singleModelPosteriorPlot", "singleModelQqPlot", "singleModelRsqPlot", "singleModelEstimates", "singleModelCriTable"
)]))
if (!userWantsSMI)
return()
if (!is.null(jaspResults[["containerSingleModel"]])) {
singleModelContainer <- jaspResults[["containerSingleModel"]]
} else {
singleModelContainer <- createJaspContainer(title = gettext("Single Model Inference"))
singleModelContainer$dependOn(c(
"singleModelTerms", "dependent", "samplingMethodMCMC", "samplesMCMC",
"repeatedMeasuresCells", "seed", "setSeed",
.BANOVArscaleDependencies(options[["priorSpecificationMode"]])
))
jaspResults[["containerSingleModel"]] <- singleModelContainer
singleModelContainer$position <- 7
}
singleModel <- jaspResults[["singleModelState"]]$object
if (!is.null(model[["models"]]) && is.null(singleModel) && length(options$singleModelTerms) > 0L && userWantsSMI) {
singleModel <- try(.BANOVAsmiSamplePosterior(dataset, options, model[["analysisType"]]))
if (isTryError(singleModel)) {
singleModelContainer$setError(gettextf("Error in single model inference:\n%s", singleModel))
singleModel <- NULL
} else {
singleModelState <- createJaspState(object = singleModel)
singleModelState$dependOn(optionsFromObject = singleModelContainer)
jaspResults[["singleModelState"]] <- singleModelState
}
}
.BANOVAsmiEstimates(singleModelContainer, options, singleModel)
.BANOVAsmiRsquared (singleModelContainer, options, singleModel)
.BANOVAsmiQqPlot (singleModelContainer, options, singleModel)
.BANOVAsmiRsqPlot (singleModelContainer, options, singleModel)
.BANOVAsmiPosteriorPlot(singleModelContainer, dataset, options, singleModel)
return()
}
.BANOVAsmiSamplePosterior <- function(dataset, options, analysisType) {
nIter <- if (options[["samplingMethodMCMC"]] == "auto") 1e3L else options[["samplesMCMC"]]
modelTerms <- options$singleModelTerms
dependent <- options$dependent
if (analysisType == "RM-ANOVA") {
modelTerms[[length(modelTerms) + 1L]] <- list(components = .BANOVAsubjectName, isNuisance = TRUE)
dependent <- .BANOVAdependentName
}
tempRScale <- .BANOVAgetRScale(options, analysisType)
rscaleFixed <- tempRScale[["rscaleFixed"]]
rscaleRandom <- tempRScale[["rscaleRandom"]]
rscaleCont <- tempRScale[["rscaleCont"]]
rscaleEffects <- tempRScale[["rscaleEffects"]]
formula <- .BANOVAcreateModelFormula(dependent, modelTerms)$model.formula
randomFactors <- .BANOVAgetRandomFactors(options, analysisType)
dataTypes <- .BANOVAgetDataTypes(dataset, formula, randomFactors)
levelInfo <- .BANOVAgetLevelInfo(dataset, formula, dataTypes)
# if all variables are continuous, then continuous variables keep their name. E.g., "contGamma" stays "contGamma".
# if any variables are factors, then continuous variables names are doubled. E.g., "contGamma" becomes "contGamma-contGamma".
allParamNames <- c(
"mu",
if (all(dataTypes == "continuous")) names(levelInfo$levelNames) else unlist(levelInfo$levelNames)
)
.setSeedJASP(options)
samples <- BayesFactor::lmBF(
formula = formula,
data = dataset,
whichRandom = unlist(randomFactors),
progress = TRUE,
posterior = TRUE,
rscaleFixed = rscaleFixed,
rscaleRandom = rscaleRandom,
rscaleCont = rscaleCont,
rscaleEffects = rscaleEffects,
iterations = nIter
)
types <- samples@model@dataTypes
# keep only relevant columns, drop sig2, g_xxx, ...
idx <- match(allParamNames, colnames(samples), nomatch = 0L)
samples <- samples[, idx, drop = FALSE]
# for some odd reason, Bayesfactor uses as column name contcor1-contcor1
# if there is a covariate AND fixed factors, but only contcor1 if all variables are continuous...
allContinuous <- all(types == "continuous")
if (allContinuous) {
cnms <- colnames(samples)[-1L] # omit the intercept (mu) which is not changed by Bayesfactor
colnames(samples)[-1L] <- paste0(cnms, "-", cnms)
}
# find out which parameters are random
isRandom <- logical(ncol(samples))
idx <- which(types == "random")
for (i in idx)
isRandom <- isRandom | startsWith(colnames(samples), names(types)[i])
means <- colMeans(samples)
sds <- matrixStats::colSds(samples)
names(means) <- names(sds) <- colnames(samples)
h <- (1 - options[["credibleInterval"]]) / 2
probs <- c(h, 1-h)
cri <- matrixStats::colQuantiles(samples, probs = probs)
densities <- .BANOVAfitDensity(samples, 2^9, FALSE)
tmp <- .BANOVAgetSMIResidRsq(samples, dataset, formula, nIter, options = options)
residmeans <- rowMeans(tmp$resid)
residquants <- matrixStats::rowQuantiles(tmp$resid, probs = c(h, 1-h))
# all information for q-q plot of residuals
residSumStats <- matrix(c(residmeans, residquants), nrow = length(residmeans), ncol = 3L,
dimnames = list(NULL, c("mean", "cri.2.5%", "cri.97.5%")))
# all information for r-squared density plot
rsqDens <- density(tmp$rsq, n = 2^11, from = 0, to = 1)
rsqCri <- quantile(tmp$rsq, probs = probs)
rsqMean <- mean(tmp$rsq)
return(list(
means = means, sds = sds, CRIs = cri, densities = densities$fit,
residSumStats = residSumStats, rsqDens = rsqDens, rsqCri = rsqCri, rsqMean = rsqMean,
allContinuous = allContinuous, isRandom = isRandom
))
}
.BANOVAsmiEstimates <- function(jaspContainer, options, model) {
if (!is.null(jaspContainer[["SMItablePosteriorEstimates"]]) || !options[["singleModelEstimates"]])
return()
estsTable <- createJaspTable(title = gettext("Single Model Posterior Summary"))
estsTable$position <- 1
estsTable$dependOn(c("singleModelEstimates", "credibleInterval"))
overTitle <- gettextf("%s%% Credible Interval", format(100 * options[["credibleInterval"]], digits = 3))
estsTable$addColumnInfo(name = "Variable", title = gettext("Variable"), type = "string")
estsTable$addColumnInfo(name = "Level", title = gettext("Level"), type = "string")
estsTable$addColumnInfo(name = "Mean", title = gettext("Mean"), type = "number")
estsTable$addColumnInfo(name = "SD", title = gettext("SD"), type = "number")
estsTable$addColumnInfo(name = "Lower", title = gettext("Lower"), type = "number", overtitle = overTitle)
estsTable$addColumnInfo(name = "Upper", title = gettext("Upper"), type = "number", overtitle = overTitle)
if (!(is.null(model) || estsTable$getError())) {
.BANOVAfillEstimatesTable(
jaspTable = estsTable,
mus = model$means,
sds = model$sds,
cri = model$CRIs,
hasNoLevels = model$allContinuous,
isRandom = model$isRandom
)
}
jaspContainer[["SMItablePosteriorEstimates"]] <- estsTable
return()
}
.BANOVAsmiPosteriorPlot <- function(jaspContainer, dataset, options, model) {
# meta wrapper for model averaged posterior plots, single model posterior plots, and Q-Q plots
if (!is.null(jaspContainer[["SMIposteriorPlot"]]) || !options$singleModelPosteriorPlot)
return()
posteriorPlotContainer <- createJaspContainer(title = gettext("Single Model Posterior Distributions"))
jaspContainer[["SMIposteriorPlot"]] <- posteriorPlotContainer
posteriorPlotContainer$position <- 2
posteriorPlotContainer$dependOn(c("singleModelPosteriorPlot", "singleModelGroupPosterior"))
if (is.null(model) || posteriorPlotContainer$getError()) {
posteriorPlotContainer[["dummyplot"]] <- createJaspPlot(title = gettext("Posterior distribution"), width = 400, height = 400,
plot = NULL)
} else {
.BANOVAfillPosteriorPlotContainer(
container = posteriorPlotContainer,
densities = model$densities[, -1L, ], # omit intercept
cris = model$CRIs[-1L, ], # omit intercept
isRandom = model$isRandom[-1L], # omit intercept
groupParameters = identical(options[["groupPosterior"]], "grouped")
)
}
return()
}
.BANOVAsmiQqPlot <- function(jaspContainer, options, model) {
if (!is.null(jaspContainer[["QQplot"]]) || !options$singleModelQqPlot)
return()
if (is.null(model) || jaspContainer$getError()) {
p <- NULL
} else {
p <- jaspGraphs::plotQQnorm(
residuals = model$residSumStats[,"mean"],
lower = model$residSumStats[,"cri.2.5%"],
upper = model$residSumStats[,"cri.97.5%"],
ablineColor = "darkred"
)
}
plot <- createJaspPlot(
title = gettext("Q-Q Plot"),
width = 400,
height = 400,
plot = p,
aspectRatio = 1
)
plot$dependOn("singleModelQqPlot")
plot$position <- 3
jaspContainer[["QQplot"]] <- plot
return()
}
.BANOVAsmiRsqPlot <- function(jaspContainer, options, model) {
if (!is.null(jaspContainer[["smirsqplot"]]) || !options$singleModelRsqPlot)
return()
if (is.null(model) || jaspContainer$getError()) {
p <- NULL
} else {
dd <- model$rsqDens
rsqCri <- model$rsqCri
df <- data.frame(x = dd$x, y = dd$y)
xName <- expression(R^2)
p <- jaspGraphs::PlotPriorAndPosterior(dfLines = df, xName = xName, CRI = rsqCri, drawCRItxt = FALSE)
}
plot <- createJaspPlot(
title = gettextf("Posterior R%s", "\u00B2"),
width = 400,
height = 400,
plot = p,
aspectRatio = 1
)
plot$dependOn("singleModelRsqPlot")
plot$position <- 4
jaspContainer[["smirsqplot"]] <- plot
return()
}
.BANOVAsmiRsquared <- function(jaspResults, options, model) {
if (!is.null(jaspResults[["tableSMICRI"]]) || !options[["singleModelCriTable"]])
return()
criTable <- createJaspTable(title = gettextf("Single Model R%s", "\u00B2"))
criTable$position <- 3.5
criTable$dependOn(c("singleModelCriTable", "credibleInterval"))
overTitle <- gettextf("%s%% Credible Interval", format(100 * options[["credibleInterval"]], digits = 3))
criTable$addColumnInfo(name = "rsq", title = "", type = "string")
criTable$addColumnInfo(name = "Mean", title = gettext("Mean"), type = "number")
criTable$addColumnInfo(name = "Lower", title = gettext("Lower"), type = "number", overtitle = overTitle)
criTable$addColumnInfo(name = "Upper", title = gettext("Upper"), type = "number", overtitle = overTitle)
if (!is.null(model)) {
cri <- model[["rsqCri"]]
df <- data.frame(
rsq = "R\u00B2",
Mean = model[["rsqMean"]],
Lower = cri[1L],
Upper = cri[2L],
row.names = NULL
)
criTable$setData(df)
} else {
criTable[["rsq"]] <- "R\u00B2"
}
jaspResults[["tableSMICRI"]] <- criTable
return()
}
# Citations ----
.BANOVAcitations <- c(
"MoreyEtal2015" = "Morey, R. D. & Rouder, J. N. (2015). BayesFactor (Version 0.9.10-2)[Computer software].",
"RouderEtal2012" = "Rouder, J. N., Morey, R. D., Speckman, P. L., Province, J. M., (2012) Default Bayes Factors for ANOVA Designs. Journal of Mathematical Psychology. 56. p. 356-374.",
"Jeffreys1938" = "Jeffreys, H. (1938). Significance tests when several degrees of freedom arise simultaneously. Proceedings of the Royal Society of London. Series A, Mathematical and Physical Sciences, 165, 161-198.",
"WestfallEtal1997" = "Westfall, P. H., Johnson, W. O., & Utts, J. M. (1997). A Bayesian perspective on the Bonferroni adjustment. Biometrika, 84, 419-427."
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.