#
# Copyright (C) 2013-2018 University of Amsterdam
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
RegressionLinearInternal <- function(jaspResults, dataset = NULL, options) {
ready <- options$dependent != "" && (length(unlist(options$modelTerms)) > 0 || options$interceptTerm)
if (ready) {
dataset <- .linregReadDataset(dataset, options)
.linregCheckErrors(dataset, options)
}
modelContainer <- .linregGetModelContainer(jaspResults, position = 1)
model <- .linregCalcModel(modelContainer, dataset, options, ready)
# these output elements show statistics of all the lm fits
if (is.null(modelContainer[["summaryTable"]]))
.linregCreateSummaryTable(modelContainer, model, options, position = 1)
if (options$modelFit && is.null(modelContainer[["anovaTable"]]))
.linregCreateAnovaTable(modelContainer, model, options, position = 2)
if (options$coefficientEstimate && is.null(modelContainer[["coeffTable"]]))
.linregCreateCoefficientsTable(modelContainer, model, dataset, options, position = 3)
if (options$coefficientBootstrap && is.null(modelContainer[["bootstrapCoeffTable"]]))
.linregCreateBootstrapCoefficientsTable(modelContainer, model, dataset, options, position = 4)
if (options$partAndPartialCorrelation && is.null(modelContainer[["partialCorTable"]]))
.linregCreatePartialCorrelationsTable(modelContainer, model, dataset, options, position = 6)
if (options$covarianceMatrix && is.null(modelContainer[["coeffCovMatrixTable"]]))
.linregCreateCoefficientsCovarianceMatrixTable(modelContainer, model, options, position = 7)
if (options$collinearityDiagnostic && is.null(modelContainer[["collinearityTable"]]))
.linregCreateCollinearityDiagnosticsTable(modelContainer, model, options, position = 8)
# these output elements show statistics of the "final model" (lm fit with all predictors in enter method and last lm fit in stepping methods)
finalModel <- model[[length(model)]]
if (options$residualCasewiseDiagnostic && is.null(modelContainer[["influenceTable"]]))
.glmInfluenceTable(modelContainer, finalModel$fit, dataset, options, ready = ready, position = 9, linRegAnalysis = TRUE)
.regressionExportResiduals(modelContainer, finalModel$fit, dataset, options, ready = ready)
if (options$residualStatistic && is.null(modelContainer[["residualsTable"]]))
.linregCreateResidualsTable(modelContainer, finalModel, options, position = 10)
if (options$residualVsDependentPlot && is.null(modelContainer[["residualsVsDepPlot"]]))
.linregCreateResidualsVsDependentPlot(modelContainer, finalModel, options, position = 11)
if (options$residualVsCovariatePlot && is.null(modelContainer[["residualsVsCovContainer"]]))
.linregCreateResidualsVsCovariatesPlots(modelContainer, finalModel, dataset, options, position = 12)
if (options$residualVsFittedPlot && is.null(modelContainer[["residualsVsPredPlot"]]))
.linregCreateResidualsVsPredictedPlot(modelContainer, finalModel, options, position = 13)
if (options$residualHistogramPlot && is.null(modelContainer[["residualsVsHistPlot"]]))
.linregCreateResidualsVsHistogramPlot(modelContainer, finalModel, options, position = 14)
if (options$residualQqPlot && is.null(modelContainer[["residualsQQPlot"]]))
.linregCreateResidualsQQPlot(modelContainer, finalModel, options, position = 15)
if (options$marginalPlot && is.null(modelContainer[["marginalPlotsContainer"]]))
.linregCreateMarginalPlots(modelContainer, finalModel, dataset, options, position = 17)
# these output elements do not use statistics of a pre-calculated lm fit
if (options$partialResidualPlot && is.null(modelContainer[["partialPlotContainer"]]))
.linregCreatePartialPlots(modelContainer, dataset, options, position = 16)
if (options$descriptives && is.null(modelContainer[["descriptivesTable"]]))
.linregCreateDescriptivesTable(modelContainer, dataset, options, position = 5)
}
#TODO: capture crashes with many interactions between factors!
.linregReadDataset <- function(dataset, options, onlyCompleteCases = TRUE) {
if (!is.null(dataset))
return(dataset)
numericVariables <- c(options$dependent, unlist(options$covariates), options$weights)
numericVariables <- numericVariables[numericVariables != ""]
factors <- unlist(options$factors)
excludeNaListwise <- if(onlyCompleteCases) c(factors, numericVariables) else NULL
dataset <- .readDataSetToEnd(columns.as.factor = factors, columns.as.numeric = numericVariables, exclude.na.listwise = excludeNaListwise)
return(dataset)
}
.linregGetCaseNumbers <- function(options) {
dataset <- .linregReadDataset(NULL, options, onlyCompleteCases = FALSE)
numericVariables <- c(options$dependent, unlist(options$covariates), options$weights)
numericVariables <- numericVariables[numericVariables != ""]
factors <- unlist(options$factors)
completeCases <- complete.cases(dataset[, c(factors, numericVariables)])
return(seq_len(nrow(dataset))[completeCases])
}
.linregCheckIfFactorWithMoreLevels <- function(var) {
# Custom function to check if a variable is a factor with more than 2 levels
return(is.factor(var) && nlevels(var) > 2)
}
.linregCheckIfInteractionWithFactors <- function(modelTerm, factorVariables) {
# Custom function to check if interaction contains more than 1 factor
return(sum(modelTerm[["components"]] %in% factorVariables) > 1)
}
.linregCheckErrors <- function(dataset, options) {
stepwiseProcedureChecks <- NULL
if (options$method %in% c("backward", "forward", "stepwise")) {
stepwiseProcedureChecks <- list(
checkIfFactorWithMoreLevels = function() {
if (any(vapply(dataset, .linregCheckIfFactorWithMoreLevels, logical(1L)))) {
return(gettext("Stepwise procedures are not supported for models containing factors with more than 2 levels; retry the analysis using dummy variables"))
}
},
checkIfFactorInteractions = function() {
if (any(vapply(options[["modelTerms"]], .linregCheckIfInteractionWithFactors, logical(1L), factorVariables = options[["factors"]]))) {
return(gettext("Stepwise procedures are not supported for interactions containing more than 1 factor"))
}
},
checkIfPEntryIsValid = function() {
if (options$steppingMethodCriteriaType == "pValue" && options$steppingMethodCriteriaPEntry > options$steppingMethodCriteriaPRemoval)
return(gettext("Error in Stepping Method Criteria: Entry p-value needs to be smaller than removal p-value"))
},
checkIfFEntryIsValid = function() {
if (options$steppingMethodCriteriaType == "fValue" && options$steppingMethodCriteriaFEntry < options$steppingMethodCriteriaFRemoval)
return(gettext("Error in Stepping Method Criteria: Entry F-value needs to be larger than removal F-value"))
}
)
}
defaultTarget <- c(options$dependent, unlist(options$covariates))
.hasErrors(dataset, type = c("infinity", "variance", "observations", "varCovData"),
custom = stepwiseProcedureChecks,
custom.target = defaultTarget,
observations.amount = "< 2",
observations.target = defaultTarget,
varCovData.target = unlist(options$covariates),
varCovData.corFun = stats::cov,
exitAnalysisIfErrors = TRUE)
if (options$weights != "") {
.hasErrors(dataset, type = c("infinity", "limits", "observations"),
all.target = options$weights, limits.min = 0, observations.amount = "< 2",
exitAnalysisIfErrors = TRUE)
covwt <- function(...) return(stats::cov.wt(..., wt = dataset[[.v(options[["weights"]])]])$cov)
.hasErrors(dataset[, -which(colnames(dataset) %in% c(.v(options$weights)))], type = "varCovData", varCovData.corFun = covwt,
exitAnalysisIfErrors = TRUE)
}
}
.linregGetModelContainer <- function(jaspResults, position) {
if (is.null(jaspResults[["modelContainer"]])) {
modelContainer <- createJaspContainer()
modelContainer$dependOn(c("dependent", "method", "covariates", "factors", "weights", "modelTerms", "steppingMethodCriteriaType",
"steppingMethodCriteriaPEntry", "steppingMethodCriteriaPRemoval", "steppingMethodCriteriaFEntry", "steppingMethodCriteriaFRemoval",
"interceptTerm", "naAction"))
modelContainer$position <- position
jaspResults[["modelContainer"]] <- modelContainer
}
return(jaspResults[["modelContainer"]])
}
.linregCreateSummaryTable <- function(modelContainer, model, options, position) {
if(options[['dependent']] == "")
summaryTable <- createJaspTable(gettext("Model Summary"))
else
summaryTable <- createJaspTable(gettextf("Model Summary - %s", options[['dependent']]))
summaryTable$dependOn(c("residualDurbinWatson", "rSquaredChange", "fChange", "modelAICBIC"))
summaryTable$position <- position
summaryTable$showSpecifiedColumnsOnly <- TRUE
summaryTable$addColumnInfo(name = "model", title = gettext("Model"), type = "string")
summaryTable$addColumnInfo(name = "R", title = gettext("R"), type = "number", format = "dp:3")
summaryTable$addColumnInfo(name = "R2", title = gettextf("R%s", "\u00B2"), type = "number", format = "dp:3")
summaryTable$addColumnInfo(name = "adjR2", title = gettextf("Adjusted R%s", "\u00B2"), type = "number", format = "dp:3")
summaryTable$addColumnInfo(name = "RMSE", title = gettext("RMSE"), type = "number")
if (options$modelAICBIC) {
summaryTable$addColumnInfo(name = "AIC", title = gettext("AIC"), type = "number", format = "dp:3")
summaryTable$addColumnInfo(name = "BIC", title = gettext("BIC"), type = "number", format = "dp:3")
}
if (options$rSquaredChange || options$fChange) {
if (options$rSquaredChange)
summaryTable$addColumnInfo(name = "R2c", title = gettextf("R%s Change", "\u00B2"), type = "number", format = "dp:3")
if (options$fChange)
summaryTable$addColumnInfo(name = "Fc", title = gettext("F Change"), type = "number")
summaryTable$addColumnInfo(name = "df1", title = gettext("df1"), type = "integer")
summaryTable$addColumnInfo(name = "df2", title = gettext("df2"), type = "integer")
summaryTable$addColumnInfo(name = "p", title = gettext("p"), type = "pvalue")
}
if (options$residualDurbinWatson) {
summaryTable$addColumnInfo(name = "DW_ac", title = gettext("Autocorrelation"), type = "number", overtitle = gettext("Durbin-Watson"))
summaryTable$addColumnInfo(name = "DW", title = gettext("Statistic"), type = "number", overtitle = gettext("Durbin-Watson"))
if (options$weights == "")
summaryTable$addColumnInfo(name = "DW_p", title = gettext("p"), type = "pvalue", overtitle = "Durbin-Watson")
else
summaryTable$addFootnote(message = gettext("p-value for Durbin-Watson test is unavailable for weighted regression."))
}
.linregAddPredictorsInNullFootnote(summaryTable, options$modelTerms[[1]][["components"]])
if (!is.null(model)) {
if (length(model) == 1 && length(model[[1]]$predictors) == 0 && !options$interceptTerm)
summaryTable$addFootnote(gettext("No covariate could be entered in the model"))
else
.linregFillSummaryTable(summaryTable, model)
}
modelContainer[["summaryTable"]] <- summaryTable
}
.linregFillSummaryTable <- function(summaryTable, model) {
for (i in seq_along(model)) {
lmSummary <- model[[i]][["summary"]]
rSquareChange <- model[[i]][["rSquareChange"]]
durbinWatson <- model[[i]][["durbinWatson"]]
summaryTable$addRows(list(
model = model[[i]]$title,
R = as.numeric(sqrt(lmSummary$r.squared)),
R2 = as.numeric(lmSummary$r.squared),
adjR2 = as.numeric(lmSummary$adj.r.squared),
AIC = as.numeric(AIC(model[[i]][["fit"]])),
BIC = as.numeric(BIC(model[[i]][["fit"]])),
RMSE = as.numeric(lmSummary$sigma),
R2c = rSquareChange$R2c,
Fc = rSquareChange$Fc,
df1 = rSquareChange$df1,
df2 = rSquareChange$df2,
p = rSquareChange$p,
DW_ac = durbinWatson$r,
DW = durbinWatson$dw,
DW_p = durbinWatson$p
))
}
}
.linregCreateAnovaTable <- function(modelContainer, model, options, position) {
anovaTable <- createJaspTable(gettext("ANOVA"))
anovaTable$dependOn(c("modelFit", "vovkSellke"))
anovaTable$position <- position
anovaTable$showSpecifiedColumnsOnly <- TRUE
anovaTable$addColumnInfo(name = "model", title = gettext("Model"), type = "string", combine = TRUE)
anovaTable$addColumnInfo(name = "cases", title = "", type = "string")
anovaTable$addColumnInfo(name = "SS", title = gettext("Sum of Squares"), type = "number")
anovaTable$addColumnInfo(name = "df", title = gettext("df"), type = "integer")
anovaTable$addColumnInfo(name = "MS", title = gettext("Mean Square"), type = "number")
anovaTable$addColumnInfo(name = "F", title = gettext("F"), type = "number")
anovaTable$addColumnInfo(name = "p", title = gettext("p"), type = "pvalue")
.linregAddPredictorsInNullFootnote(anovaTable, options$modelTerms[[1]][["components"]])
.linregAddVovkSellke(anovaTable, options$vovkSellke)
if (!is.null(model)) {
.linregAddInterceptNotShownFootnote(anovaTable, model, options)
.linregFillAnovaTable(anovaTable, model, options)
}
modelContainer[["anovaTable"]] <- anovaTable
}
.linregFillAnovaTable <- function(anovaTable, model, options) {
rowTypes <- list(Regression = gettext("Regression"), Residual = gettext("Residual"), Total = gettext("Total"))
indicesOfModelsWithPredictors <- .linregGetIndicesOfModelsWithPredictors(model, options)
for (i in indicesOfModelsWithPredictors) {
isNewGroup <- i > 1
anovaRes <- .linregGetAnova(model[[i]]$fit, model[[i]]$predictors)
for (rowType in names(rowTypes)) {
anovaTable$addRows(c(anovaRes[[rowType]], list(.isNewGroup = isNewGroup, model = model[[i]]$title, cases = rowTypes[[rowType]])))
isNewGroup <- FALSE
}
}
}
.linregCreateCoefficientsTable <- function(modelContainer, model, dataset, options, position) {
coeffTable <- createJaspTable(gettext("Coefficients"))
coeffTable$dependOn(c("coefficientEstimate", "coefficientCi", "coefficientCiLevel",
"collinearityStatistic", "vovkSellke"))
coeffTable$position <- position
coeffTable$showSpecifiedColumnsOnly <- TRUE
coeffTable$addColumnInfo(name = "model", title = gettext("Model"), type = "string", combine = TRUE)
coeffTable$addColumnInfo(name = "name", title = "", type = "string")
coeffTable$addColumnInfo(name = "unstandCoeff", title = gettext("Unstandardized"), type = "number")
coeffTable$addColumnInfo(name = "SE", title = gettext("Standard Error"), type = "number")
coeffTable$addColumnInfo(name = "standCoeff", title = gettext("Standardized"), type = "number")
coeffTable$addColumnInfo(name = "t", title = gettext("t"), type = "number")
coeffTable$addColumnInfo(name = "p", title = gettext("p"), type = "pvalue")
.linregAddVovkSellke(coeffTable, options$vovkSellke)
if (options$coefficientCi) {
overtitle <- gettextf("%.0f%% CI", 100 * options$coefficientCiLevel)
coeffTable$addColumnInfo(name = "lower", title = gettext("Lower"), type = "number", overtitle = overtitle)
coeffTable$addColumnInfo(name = "upper", title = gettext("Upper"), type = "number", overtitle = overtitle)
}
if (options$collinearityStatistic) {
overtitle <- gettext("Collinearity Statistics")
coeffTable$addColumnInfo(name = "tolerance", title = gettext("Tolerance"), type = "number", format = "dp:3", overtitle = overtitle)
coeffTable$addColumnInfo(name = "VIF", title = gettext("VIF"), type = "number", overtitle = overtitle)
}
if (!is.null(model)) {
.linregAddFootnotePredictorsNeverIncluded(coeffTable, model, options)
.linregAddFootnoteFactors(coeffTable, options[["factors"]], options[["collinearityDiagnostic"]])
.linregFillCoefficientsTable(coeffTable, model, dataset, options)
}
modelContainer[["coeffTable"]] <- coeffTable
}
.linregAddFootnotePredictorsNeverIncluded <- function(coeffTable, model, options) {
if (options$method %in% c("forward", "stepwise")) {
includedPredictors <- unlist(lapply(model, "[[", "predictors"))
neverIncludedPredictors <- setdiff(unlist(options$covariates), .unv(includedPredictors))
if (length(neverIncludedPredictors) > 0) {
message <- sprintf(ngettext(length(neverIncludedPredictors),
"The following covariate was considered but not included: %s.",
"The following covariates were considered but not included: %s."),
paste(neverIncludedPredictors, collapse=", "))
coeffTable$addFootnote(message)
}
}
}
.linregAddFootnoteFactors <- function(table, factors, collinearityDiagnostics = FALSE) {
if (length(factors) > 0L) {
colNames <- "standCoeff"
message <- gettext("Standardized coefficients can only be computed for continuous predictors.")
table$addFootnote(colNames = colNames, message = message)
}
}
.linregAddFootnoteAliasedCoefficients <- function(table, rows) {
table$addFootnote(gettext("Missing coefficients are undefined because of singularities. Check the data for anything out of order!"))
}
.linregFillCoefficientsTable <- function(coeffTable, model, dataset, options) {
for (i in seq_along(model)) {
isNewGroup <- i > 1
temp <- .linregGetCoefficients(model[[i]]$fit, model[[i]]$predictors, dataset, options)
coefficients <- temp[["coefficients"]]
footnoteRows <- temp[["footnote"]]
for (j in seq_along(coefficients)) {
coeffTable$addRows(c(coefficients[[j]], list(.isNewGroup = isNewGroup, model = model[[i]]$title)))
isNewGroup <- FALSE
}
# TODO: should this be done only after joining all obtained footnotes?
if (!is.null(footnoteRows))
.linregAddFootnoteAliasedCoefficients(coeffTable, footnoteRows)
}
}
.linregCreateBootstrapCoefficientsTable <- function(modelContainer, model, dataset, options, position) {
bootstrapCoeffTable <- createJaspTable(gettext("Bootstrap Coefficients"))
bootstrapCoeffTable$dependOn(c("coefficientEstimate", "coefficientCi", "coefficientCiLevel",
"coefficientBootstrap", "coefficientBootstrapSamples"))
bootstrapCoeffTable$position <- position
bootstrapCoeffTable$showSpecifiedColumnsOnly <- TRUE
bootstrapCoeffTable$addColumnInfo(name = "model", title = gettext("Model"), type = "string", combine = TRUE)
bootstrapCoeffTable$addColumnInfo(name = "name", title = "", type = "string")
bootstrapCoeffTable$addColumnInfo(name = "unstandCoeff", title = gettext("Unstandardized"), type = "number")
bootstrapCoeffTable$addColumnInfo(name = "bias", title = gettext("Bias"), type = "number")
bootstrapCoeffTable$addColumnInfo(name = "SE", title = gettext("Standard Error"), type = "number")
bootstrapCoeffTable$addColumnInfo(name = "pvalue", title = gettext("p"), type = "pvalue")
if (options$coefficientCi) {
overtitle <- gettextf("%s%% CI\u002A", 100 * options$coefficientCiLevel)
bootstrapCoeffTable$addColumnInfo(name = "lower", title = gettext("Lower"), type = "number", overtitle = overtitle)
bootstrapCoeffTable$addColumnInfo(name = "upper", title = gettext("Upper"), type = "number", overtitle = overtitle)
}
bootstrapCoeffTable$addFootnote(gettextf("Bootstrapping based on %i replicates.", options[['coefficientBootstrapSamples']]))
bootstrapCoeffTable$addFootnote(gettext("Coefficient estimate is based on the median of the bootstrap distribution."))
bootstrapCoeffTable$addFootnote(gettext("Bias corrected accelerated."), colNames = "pvalue", symbol = "\u002A")
bootstrapCoeffTable$addCitation(
c("Efron, B., & Tibshirani, R. J. (1994). An introduction to the bootstrap. CRC press.",
"Hall, P. (1992). The bootstrap and Edgeworth expansion. Springer Science & Business Media.")
)
modelContainer[["bootstrapCoeffTable"]] <- bootstrapCoeffTable
if (!is.null(model))
.linregFillBootstrapCoefficientsTable(bootstrapCoeffTable, modelContainer, model, dataset, options)
}
.linregFillBootstrapCoefficientsTable <- function(bootstrapCoeffTable, modelContainer, model, dataset, options) {
metaCols <- .linregGetTitlesAndIsNewGroups(model)
if (is.null(modelContainer[["bootstrapCoefficients"]])) {
startProgressbar(options$coefficientBootstrapSamples * length(model))
bootstrapCoeffTable$setData(metaCols)
anyMissingValues <- FALSE
coefficients <- NULL
for (i in seq_along(model)) {
data <- .linregGetBootstrapCoefficients(model[[i]]$fit, dataset, options)
anyMissingValues <- anyNA(data[-1L])
coefficients <- rbind(coefficients, data)
bootstrapCoeffTable$setData(.linregCombineMetaWithData(metaCols, coefficients))
}
if (anyMissingValues)
bootstrapCoeffTable$addFootnote(gettext("Some bootstrap results could not be computed."))
modelContainer[["bootstrapCoefficients"]] <- createJaspState(coefficients)
modelContainer[["bootstrapCoefficients"]]$dependOn(c("coefficientBootstrapSamples", "coefficientCiLevel"))
} else {
bootstrapCoeffTable$setData(.linregCombineMetaWithData(metaCols, modelContainer[["bootstrapCoefficients"]]$object))
}
}
.linregGetTitlesAndIsNewGroups <- function(model, includeConstant) {
isNewGroup <- logical(0)
titles <- character(0)
for (i in seq_along(model)) {
if (is.null(model[[i]]$fit))
next
numPredictors <- length(stats::coef(model[[i]]$fit))
isNewGroupCurrent <- i > 1
if (numPredictors > 1)
isNewGroupCurrent <- c(isNewGroupCurrent, logical(numPredictors - 1))
isNewGroup <- c(isNewGroup, isNewGroupCurrent)
titles <- c(titles, rep(model[[i]]$title, numPredictors))
}
return(data.frame(.isNewGroup = isNewGroup, model = titles))
}
.linregCombineMetaWithData <- function(meta, data) { # this can go once we can add cells to a jaspTable
if (!is.data.frame(meta) || !is.data.frame(data))
stop(gettext("expecting both arguments to be data.frames"))
filler <- matrix(NA, ncol(data), nrow = nrow(meta)-nrow(data))
colnames(filler) <- names(data)
data <- rbind(data, filler)
return(cbind(meta, data))
}
.linregCreatePartialCorrelationsTable <- function(modelContainer, model, dataset, options, position) {
partPartialTable <- createJaspTable(gettext("Part And Partial Correlations"))
partPartialTable$dependOn("partAndPartialCorrelation")
partPartialTable$position <- position
partPartialTable$addColumnInfo(name = "model", title = gettext("Model"), type = "string", combine = TRUE)
partPartialTable$addColumnInfo(name = "name", title = "", type = "string")
partPartialTable$addColumnInfo(name = "partial", title = gettext("Partial"), type = "number", format = "dp:3")
partPartialTable$addColumnInfo(name = "part", title = gettext("Part"), type = "number", format = "dp:3")
if (!is.null(model)) {
.linregAddInterceptNotShownFootnote(partPartialTable, model, options)
.linregFillPartialCorrelationsTable(partPartialTable, model, dataset, options)
}
modelContainer[["partialCorTable"]] <- partPartialTable
}
.linregFillPartialCorrelationsTable <- function(partPartialTable, model, dataset, options) {
indicesOfModelsWithPredictors <- .linregGetIndicesOfModelsWithPredictors(model, options)
for (i in indicesOfModelsWithPredictors) {
isNewGroup <- i > 1
cors <- .linregGetPartAndPartialCorrelation(model[[i]]$fit, model[[i]]$predictors, dataset, options)
for (j in seq_along(cors)) {
partPartialTable$addRows(c(cors[[j]], list(.isNewGroup = isNewGroup, model = model[[i]]$title)))
isNewGroup <- FALSE
}
}
}
.linregCreateCoefficientsCovarianceMatrixTable <- function(modelContainer, model, options, position) {
covMatTable <- createJaspTable(gettext("Coefficients Covariance Matrix"))
covMatTable$dependOn("covarianceMatrix")
covMatTable$position <- position
covMatTable$addColumnInfo(name = "model", title = gettext("Model"), type = "string", combine = TRUE)
covMatTable$addColumnInfo(name = "name", title = "", type = "string")
if (!is.null(model)) {
.linregAddPredictorsAsColumns(covMatTable, model, includeIntercept = FALSE)
.linregAddInterceptNotShownFootnote(covMatTable, model, options)
.linregFillCoefficientsCovarianceMatrixTable(covMatTable, model, options)
}
modelContainer[["coeffCovMatrixTable"]] <- covMatTable
}
.linregFillCoefficientsCovarianceMatrixTable <- function(covMatTable, model, options) {
rawNames <- setdiff(.linregGetParameterNames(model), "(Intercept)") # names used by R
prettyNames <- setdiff(.linregMakePrettyNames(model), gettext("(Intercept)")) # names shown in JASP
indicesOfModelsWithPredictors <- .linregGetIndicesOfModelsWithPredictors(model, options)
for (i in indicesOfModelsWithPredictors) {
isNewGroup <- i > 1
covData <- .linregGetCovarianceMatrix(model[[i]]$fit, rawNames, prettyNames)
if (nrow(covData) > 0) {
covData <- cbind(covData, .isNewGroup = c(isNewGroup, rep(F, nrow(covData) - 1)), model = model[[i]]$title)
covMatTable$addRows(covData)
}
}
}
.linregCreateCollinearityDiagnosticsTable <- function(modelContainer, model, options, position) {
collDiagTable <- createJaspTable(gettext("Collinearity Diagnostics"))
collDiagTable$dependOn("collinearityDiagnostic")
collDiagTable$position <- position
collDiagTable$addColumnInfo(name = "model", title = gettext("Model"), type = "string", combine = TRUE)
collDiagTable$addColumnInfo(name = "dimension", title = gettext("Dimension"), type = "integer")
collDiagTable$addColumnInfo(name = "eigenvalue", title = gettext("Eigenvalue"), type = "number")
collDiagTable$addColumnInfo(name = "condIndex", title = gettext("Condition Index"), type = "number")
if (!is.null(model)) {
.linregAddPredictorsAsColumns(collDiagTable, model, options[["interceptTerm"]], overtitle = gettext("Variance Proportions"), format = "dp:3")
.linregAddInterceptNotShownFootnote(collDiagTable, model, options)
.linregFillCollinearityDiagnosticsTable(collDiagTable, model, options)
}
modelContainer[["collinearityTable"]] <- collDiagTable
}
.linregFillCollinearityDiagnosticsTable <- function(collDiagTable, model, options) {
columns <- .linregGetPredictorColumnNames(model, options$modelTerms)
indicesOfModelsWithPredictors <- .linregGetIndicesOfModelsWithPredictors(model, options)
for (i in indicesOfModelsWithPredictors) {
isNewGroup <- i > 1
collDiagData <- try(.linregGetCollinearityDiagnostics(model[[i]]$fit, columns, options$interceptTerm))
if (jaspBase::isTryError(collDiagData)) {
collDiagData <- cbind(.isNewGroup = isNewGroup, model = model[[i]]$title)
collDiagTable$addFootnote(gettext("Some collinearity diagnostics could not be computed. This is expected if the model contains singularities."))
} else {
collDiagData <- cbind(collDiagData, .isNewGroup = c(isNewGroup, rep(F, nrow(collDiagData) - 1)), model = model[[i]]$title)
}
collDiagTable$addRows(collDiagData)
}
}
.linregCreateResidualsTable <- function(modelContainer, finalModel, options, position) {
residualsTable <- createJaspTable(gettext("Residuals Statistics"))
residualsTable$dependOn("residualStatistic")
residualsTable$position <- position
residualsTable$addColumnInfo(name = "type", title = "", type = "string")
residualsTable$addColumnInfo(name = "min", title = gettext("Minimum"), type = "number")
residualsTable$addColumnInfo(name = "max", title = gettext("Maximum"), type = "number")
residualsTable$addColumnInfo(name = "mean", title = gettext("Mean"), type = "number")
residualsTable$addColumnInfo(name = "SD", title = gettext("SD"), type = "number")
residualsTable$addColumnInfo(name = "N", title = gettext("N"), type = "integer")
if (!is.null(finalModel))
residualsTable$addRows(.linregGetResidualsStatistics(finalModel$fit, finalModel$predictors))
modelContainer[["residualsTable"]] <- residualsTable
}
.linregCreateResidualsVsDependentPlot <- function(modelContainer, finalModel, options, position) {
residVsDepPlot <- createJaspPlot(title = gettext("Residuals vs. Dependent"), width = 530, height = 400)
residVsDepPlot$dependOn("residualVsDependentPlot")
residVsDepPlot$position <- position
modelContainer[["residualsVsDepPlot"]] <- residVsDepPlot
if (!is.null(finalModel) && !is.null(finalModel$fit)) {
fit <- finalModel$fit
.linregInsertPlot(residVsDepPlot, .linregPlotResiduals, xVar = fit$model[ , 1], xlab = options$dependent, res = residuals(fit), ylab = gettext("Residuals"))
}
}
.linregCreateResidualsVsCovariatesPlots <- function(modelContainer, finalModel, dataset, options, position) {
residualsVsCovContainer <- createJaspContainer(gettext("Residuals vs. Covariates"))
residualsVsCovContainer$dependOn("residualVsCovariatePlot")
residualsVsCovContainer$position <- position
modelContainer[["residualsVsCovContainer"]] <- residualsVsCovContainer
if (!is.null(finalModel)) {
predictors <- finalModel$predictors
for (predictor in predictors)
.linregCreatePlotPlaceholder(residualsVsCovContainer, index = .unvf(predictor), title = gettextf("Residuals vs. %s", .unvf(predictor)))
for (predictor in predictors) {
if (.linregIsInteraction(predictor) && .linregContainsFactor(finalModel$fit, predictor)) {
# TODO: this is maybe possible when an interaction consists of only factors, but the plot won't be very pretty
residualsVsCovContainer[[.unvf(predictor)]]$setError(gettext("Cannot plot residuals versus an interaction with a factor."))
} else {
.linregFillResidualsVsCovariatesPlot(residualsVsCovContainer[[.unvf(predictor)]], predictor, finalModel$fit, dataset)
}
}
}
}
.linregFillResidualsVsCovariatesPlot <- function(residVsCovPlot, predictor, fit, dataset) {
if (.linregIsInteraction(predictor))
xVar <- .linregMakeCombinedVariableFromInteraction(predictor, dataset)
else
xVar <- dataset[[predictor]]
.linregInsertPlot(residVsCovPlot, .linregPlotResiduals, xVar = xVar, xlab = .unvf(predictor), res = residuals(fit), ylab = gettext("Residuals"))
}
.linregCreateResidualsVsPredictedPlot <- function(modelContainer, finalModel, options, position) {
residVsPredPlot <- createJaspPlot(title = gettext("Residuals vs. Predicted"), width = 530, height = 400)
residVsPredPlot$dependOn("residualVsFittedPlot")
residVsPredPlot$position <- position
modelContainer[["residualsVsPredPlot"]] <- residVsPredPlot
if (!is.null(finalModel) && !is.null(finalModel$fit)) {
fit <- finalModel$fit
.linregInsertPlot(residVsPredPlot, .linregPlotResiduals, xVar = predict(fit), xlab = gettext("Predicted Values"), res = residuals(fit), ylab = gettext("Residuals"))
}
}
.linregCreateResidualsVsHistogramPlot <- function(modelContainer, finalModel, options, position) {
title <- gettext("Residuals Histogram")
if (options$residualHistogramStandardizedPlot)
title <- gettextf("Standardized %s", title)
residVsHistPlot <- createJaspPlot(title = title, width = 530, height = 400)
residVsHistPlot$dependOn(c("residualHistogramPlot", "residualHistogramStandardizedPlot"))
residVsHistPlot$position <- position
modelContainer[["residualsVsHistPlot"]] <- residVsHistPlot
if (!is.null(finalModel))
.linregFillResidualsVsHistogramPlot(residVsHistPlot, finalModel$fit, options)
}
.linregFillResidualsVsHistogramPlot <- function(residVsHistPlot, fit, options) {
if (!is.null(fit)) {
residName <- gettext("Residuals")
resid <- residuals(fit)
if (options$residualHistogramStandardizedPlot) {
residName <- gettextf("Standardized %s", residName)
resid <- resid / sd(resid)
}
.linregInsertPlot(residVsHistPlot, .linregPlotResidualsHistogram, res = resid, resName = residName)
}
}
.linregCreateResidualsQQPlot <- function(modelContainer, finalModel, options, position) {
residQQPlot <- createJaspPlot(title = gettext("Q-Q Plot Standardized Residuals"), width = 400, height = 400)
residQQPlot$dependOn("residualQqPlot")
residQQPlot$position <- position
modelContainer[["residualsQQPlot"]] <- residQQPlot
if (!is.null(finalModel) && !is.null(finalModel$fit)) {
fit <- finalModel$fit
.linregInsertPlot(residQQPlot, .glmFillPlotResQQ, model = fit,
residType = "deviance", options = options)
}
}
.linregCreatePartialPlots <- function(modelContainer, dataset, options, position) {
predictors <- .linregGetPredictors(options$modelTerms)
title <- ngettext(length(predictors), "Partial Regression Plot", "Partial Regression Plots")
partialPlotContainer <- createJaspContainer(title)
partialPlotContainer$dependOn(c("partialResidualPlot", "partialResidualPlotCi", "partialResidualPlotCiLevel",
"partialResidualPlotPredictionInterval", "partialResidualPlotPredictionIntervalLevel"))
partialPlotContainer$position <- position
modelContainer[["partialPlotContainer"]] <- partialPlotContainer
predictors <- .linregGetPredictors(options$modelTerms[[length(options$modelTerms)]][["components"]])
if (any(.linregIsInteraction(predictors))) {
.linregCreatePlotPlaceholder(partialPlotContainer, index = "placeholder", title = "")
partialPlotContainer$setError(gettext("Partial plots are not supported for models containing interaction terms"))
return()
}
if (options$dependent != "" && length(predictors) > 0) {
for (predictor in predictors)
.linregCreatePlotPlaceholder(partialPlotContainer, index = .unvf(predictor), title = gettextf("%1$s vs. %2$s", options$dependent, .unvf(predictor)))
for (predictor in predictors) {
if (.linregContainsFactor(dataset, predictor)) {
partialPlotContainer[[.unvf(predictor)]]$setError(gettext("Partial plots are not supported for factors"))
} else {
.linregFillPartialPlot(partialPlotContainer[[.unvf(predictor)]], predictor, predictors, dataset, options)
}
}
}
}
.linregFillPartialPlot <- function(partialPlot, predictor, predictors, dataset, options) {
plotData <- .linregGetPartialPlotData(predictor, predictors, dataset, options)
xVar <- plotData[["residualsPred"]]
resid <- plotData[["residualsDep"]]
dfResid <- length(resid) - length(predictors) - 1
xlab <- gettextf("Residuals %s", .unvf(predictor))
ylab <- gettextf("Residuals %s", options$dependent)
# Compute regresion lines
weights <- dataset[[.v(options$weights)]]
line <- as.list(setNames(lm(residualsDep~residualsPred, data = plotData, weights = weights)$coeff,
c("intercept", "slope"))
)
.linregInsertPlot(partialPlot, .linregPlotResiduals, xVar = xVar, res = resid, dfRes = dfResid, xlab = xlab, ylab = ylab,
regressionLine = TRUE, confidenceIntervals = options$partialResidualPlotCi,
confidenceIntervalsInterval = options$partialResidualPlotCiLevel,
predictionIntervals = options$partialResidualPlotPredictionInterval,
predictionIntervalsInterval = options$partialResidualPlotPredictionIntervalLevel,
standardizedResiduals = FALSE, intercept = line[['intercept']], slope = line[['slope']])
}
.linregCreateDescriptivesTable <- function(modelContainer, dataset, options, position) {
descriptivesTable <- createJaspTable(gettext("Descriptives"))
descriptivesTable$dependOn("descriptives")
descriptivesTable$position <- position
descriptivesTable$addColumnInfo(name = "var", title = "", type = "string")
descriptivesTable$addColumnInfo(name = "N", title = gettext("N"), type = "integer")
descriptivesTable$addColumnInfo(name = "mean", title = gettext("Mean"), type = "number")
descriptivesTable$addColumnInfo(name = "SD", title = gettext("SD"), type = "number")
descriptivesTable$addColumnInfo(name = "SE", title = gettext("SE"), type = "number")
variables <- c(options$dependent, unlist(options$covariates))
variables <- variables[variables != ""]
if (length(variables) > 0)
descriptivesTable$addRows(.linregGetDescriptives(variables, dataset))
modelContainer[["descriptivesTable"]] <- descriptivesTable
}
.linregCalcModel <- function(modelContainer, dataset, options, ready) {
if (!ready)
return()
if (!is.null(modelContainer[["model"]])) {
model <- modelContainer[["model"]]$object
model <- .linregCalcDurBinWatsonTestResults(modelContainer, model, options)
return(model)
}
nModels <- length(options$modelTerms)
dependent <- options$dependent
predictorsInNull <- .linregGetPredictors(options$modelTerms[[1]][["components"]], modelType = "null")
predictorsInFull <- .linregGetPredictors(options$modelTerms[[nModels]][["components"]], modelType = "alternative") # these include the null terms
if (options$weights != "")
weights <- dataset[[options$weights]]
else
weights <- rep(1, length(dataset[[dependent]]))
if (options$method %in% c("backward", "forward", "stepwise") && length(predictorsInFull) > 0)
model <- .linregGetModelSteppingMethod(dependent, predictorsInFull, predictorsInNull, dataset, options, weights)
else
model <- .linregGetModelEnterMethod(dependent, modelTerms = options[["modelTerms"]], dataset, options, weights)
for (i in seq_along(model)) {
singleModel <- model[[i]]
model[[i]][["title"]] <- gettextf("M%s", intToUtf8(0x2080 + i - 1, multiple = FALSE)) # singleModel[["title"]]
model[[i]][["summary"]] <- .linregGetSummary(singleModel$fit)
model[[i]][["rSquareChange"]] <- .linregGetrSquaredChange(singleModel$fit, i, model[1:i], options)
}
modelContainer[["model"]] <- createJaspState(model)
model <- .linregCalcDurBinWatsonTestResults(modelContainer, model, options)
return(model)
}
.linregCalcDurBinWatsonTestResults <- function(modelContainer, model, options) {
if (!options[["residualDurbinWatson"]])
return(model)
durbinWatsonResults <- modelContainer[["durbinWatsonResults"]] %setOrRetrieve% (
lapply(model, function(singleModel) {
.linregGetDurBinWatsonTestResults(singleModel$fit, options$weights)
}) |> createJaspState()
)
for (i in seq_along(model))
model[[i]][["durbinWatson"]] <- durbinWatsonResults[[i]]
return(model)
}
.linregGetModelSteppingMethod <- function(dependent, predictors, predictorsInNull, dataset, options, weights) {
if (options$method == "backward")
model <- .linregBackwardRegression(dependent, predictors, predictorsInNull, dataset, options, weights)
else if (options$method == "forward")
model <- .linregForwardRegression(dependent, predictors, predictorsInNull, dataset, options, weights)
else # stepwise
model <- .linregStepwiseRegression(dependent, predictors, predictorsInNull, dataset, options, weights)
return(model)
}
.linregGetModelEnterMethod <- function(dependent, modelTerms, dataset, options, weights) {
model <- list()
for (thisModel in modelTerms) {
thisModelTerms <- .linregGetPredictors(thisModel[["components"]])
formula <- .linregGetFormula(dependent, thisModelTerms, options$interceptTerm)
fit <- stats::lm(formula, data = dataset, weights = weights, x = TRUE)
model[[length(model) + 1]] <- list(fit = fit, predictors = thisModelTerms, title = gettext(thisModel[["title"]]))
}
return(model)
}
.linregBackwardRegression <- function(dependent, predictors, predictorsInNull, data, options, weights) {
formula <- .linregGetFormula(dependent, predictors, options$interceptTerm)
fit <- stats::lm(formula, data = data, weights = weights, x = TRUE)
model <- list(list(fit = fit, predictors = predictors))
candidatePredictors <- setdiff(predictors, predictorsInNull)
while (length(candidatePredictors) > 0) {
prevModel <- model[[length(model)]]
nextModel <- .linregTryToRemoveOnePredictor(prevModel, dependent, predictorsInNull, data, options, weights)
if (is.null(nextModel))
break
model[[length(model) + 1]] <- nextModel
candidatePredictors <- setdiff(nextModel$predictors, predictorsInNull)
}
return(model)
}
.linregForwardRegression <- function(dependent, predictors, predictorsInNull, data, options, weights) {
model <- list()
if (options$interceptTerm || length(predictorsInNull) > 0) {
formula <- .linregGetFormula(dependent, predictorsInNull, options$interceptTerm)
fit <- stats::lm(formula, data = data, weights = weights, x = TRUE)
model <- list(list(fit = fit, predictors = predictorsInNull))
}
candidatePredictors <- setdiff(predictors, predictorsInNull)
while (length(candidatePredictors) > 0) {
if (length(model) == 0)
prevModel <- NULL
else
prevModel <- model[[length(model)]]
nextModel <- .linregTryToAddOnePredictor(prevModel, dependent, candidatePredictors, predictorsInNull, data, options, weights)
if (is.null(nextModel))
break
model[[length(model) + 1]] <- nextModel
candidatePredictors <- setdiff(candidatePredictors, nextModel$predictors)
}
# we don't have an intercept or nuisance variables and no predictor could be added
if (length(model) == 0)
model <- list(list(fit = NULL, predictors = NULL))
return(model)
}
.linregStepwiseRegression <- function(dependent, predictors, predictorsInNull, data, options, weights) {
model <- list()
if (options$interceptTerm || length(predictorsInNull) > 0) {
formula <- .linregGetFormula(dependent, predictorsInNull, options$interceptTerm)
fit <- stats::lm(formula, data = data, weights = weights, x = TRUE)
model <- list(list(fit = fit, predictors = predictorsInNull))
}
predictorsNotInNull <- setdiff(predictors, predictorsInNull)
candidatePredictors <- predictorsNotInNull
while (length(candidatePredictors) > 0) {
if (length(model) == 0)
prevToAddModel <- NULL
else
prevToAddModel <- model[[length(model)]]
addStepModel <- .linregTryToAddOnePredictor(prevToAddModel, dependent, candidatePredictors, predictorsInNull, data, options, weights)
# stop if we cant even perform a single add step
if (is.null(addStepModel) && all(prevToAddModel[["predictors"]] %in% predictorsInNull))
break
if (!is.null(addStepModel))
model[[length(model) + 1]] <- addStepModel
prevToRemoveModel <- model[[length(model)]]
removeStepModel <- .linregTryToRemoveOnePredictor(prevToRemoveModel, dependent, predictorsInNull, data, options, weights)
# stop if no predictor could be added or removed
if (is.null(addStepModel) && is.null(removeStepModel))
break
if (!is.null(removeStepModel))
model[[length(model) + 1]] <- removeStepModel
if (identical(prevToAddModel, removeStepModel))
break
candidatePredictors <- setdiff(predictorsNotInNull, model[[length(model)]]$predictors)
}
# we don't have an intercept or nuisance variables and no predictor could be added
if (length(model) == 0)
model <- list(list(fit = NULL, predictors = NULL))
return(model)
}
.linregTryToAddOnePredictor <- function(prevModel = NULL, dependent, candidatePredictors, predictorsInNull, data, options, weights) {
fValues <- numeric(length(candidatePredictors))
pValues <- numeric(length(candidatePredictors))
for (i in seq_along(candidatePredictors)) {
formula <- .linregGetFormula(dependent, c(prevModel$predictors, candidatePredictors[i]), options$interceptTerm)
fit <- lm(formula, data = data, weights = weights, x = TRUE)
fValue <- summary(fit)$coefficients[, "t value"]
pValue <- summary(fit)$coefficients[, "Pr(>|t|)"]
if (grepl(candidatePredictors[i], pattern = ":")) {
variables <- unlist(strsplit(candidatePredictors[i], ":")) # split up interaction
permutations <- combinat::permn(variables) # realize all orderings
myPattern <- paste(sapply(permutations,
function(perm) paste(paste0(perm, ".?"), collapse = ":")),
collapse = "|") # paste together with "|"
} else {
myPattern <- candidatePredictors[i]
}
if (length(fValue) > 1)
fValue <- fValue[grepl(pattern = myPattern, x = names(fValue))]
if (length(pValue) > 1)
pValue <- pValue[grepl(pattern = myPattern, x = names(pValue))]
fValues[i] <- fValue^2 # turn t-value into f-value by squaring
pValues[i] <- pValue
}
nextPredictors <- NULL
if (options$steppingMethodCriteriaType == "fValue" && max(fValues) > options$steppingMethodCriteriaFEntry) {
highestFvaluePredictor <- candidatePredictors[which.max(fValues)]
nextPredictors <- c(prevModel$predictors, highestFvaluePredictor)
} else if (options$steppingMethodCriteriaType == "pValue" && min(pValues) < options$steppingMethodCriteriaPEntry) {
minimumPvalueVariable <- candidatePredictors[which.min(pValues)]
nextPredictors <- c(prevModel$predictors, minimumPvalueVariable)
}
# no predictors could be added; the algorithm is done
if (is.null(nextPredictors))
return()
formula <- .linregGetFormula(dependent, nextPredictors, options$interceptTerm)
fit <- stats::lm(formula, data = data, weights = weights, x = TRUE)
return(list(fit = fit, predictors = nextPredictors))
}
.linregTryToRemoveOnePredictor <- function(prevModel, dependent, predictorsInNull, data, options, weights) {
tValues <- summary(prevModel[["fit"]])$coefficients[ , "t value"]
pValues <- summary(prevModel[["fit"]])$coefficients[ , "Pr(>|t|)"]
if (options$interceptTerm) {
tValues <- tValues[-1]
pValues <- pValues[-1]
}
tValues <- tValues[!(names(tValues) %in% predictorsInNull)]
pValues <- pValues[!(names(pValues) %in% predictorsInNull)]
fValues <- tValues^2
nextPredictors <- prevModel[["predictors"]]
if (options$steppingMethodCriteriaType == "fValue" && min(fValues) < options$steppingMethodCriteriaFRemoval) {
lowestFvaluePredictor <- names(which.min(fValues))
nextPredictors <- prevModel$predictors[prevModel$predictors != lowestFvaluePredictor]
} else if (options$steppingMethodCriteriaType == "pValue" && max(pValues) > options$steppingMethodCriteriaPRemoval) {
highestPvaluePredictor <- names(which.max(pValues))
nextPredictors <- prevModel$predictors[prevModel$predictors != highestPvaluePredictor]
}
# no more predictors could be removed, the algorithm is done
if (identical(nextPredictors, prevModel[["predictors"]]))
return()
if (length(nextPredictors) > 0)
formula <- .linregGetFormula(dependent, nextPredictors, options$interceptTerm)
else if (length(predictorsInNull) > 0)
formula <- .linregGetFormula(dependent, predictorsInNull, options$interceptTerm)
else if (options$interceptTerm)
formula <- .linregGetFormula(dependent, NULL, TRUE)
else
return() # we can't compute a null model because it has no intercept/nuisance variables, the algorithm is done
fit <- stats::lm(formula, data = data, weights = weights, x = TRUE)
return(list(fit = fit, predictors = nextPredictors))
}
.linregGetSummary <- function(fit) {
summary <- list(r.squared = NaN, r.squared = NaN, adj.r.squared = NaN, sigma = NaN)
if (!is.null(fit))
summary <- summary(fit)
return(summary)
}
.linregGetDurBinWatsonTestResults <- function(fit, weights) {
durbinWatson <- list(r = NaN, dw = NaN, p = NaN)
if (!is.null(fit)) {
# TODO: Make some nicer error messsage/footnote when durbin watson computation fails
durbinWatson <- try(.durbinWatsonTest.lm(fit, alternative = c("two.sided")))
if (jaspBase::isTryError(durbinWatson)) {
return(list(r = NaN, dw = NaN, p = NaN))
}
if (weights == "") # if regression is not weighted, calculate p-value with lmtest (car method is unstable)
# TODO: this can fail when there are many interactions between factors. Do we want to show a footnote about that?
durbinWatson[["p"]] <- tryCatch(
lmtest::dwtest(fit, alternative = c("two.sided"))$p.value,
error = function(e) NaN
)
}
return(durbinWatson)
}
.linregGetrSquaredChange <- function(fit, currentIndex, processedModels, options) {
#R^2_change in Field (2013), Eqn. 8.15:
#F.change = (n-p_new - 1)R^2_change / p_change ( 1- R^2_new)
#df1 = p_change = abs( p_new - p_old )
#df2 = n-p_new
# the above works only for continuous predictors, for categorical, we have (k-1) coefficients
# where k=number of the levels in the categorical variable
if (currentIndex == 1) {
# if we include the intercept, the number of coefficients to compare the null model to is 1
# otherwise there are no coefficients
prevCoefs <- if(options[["interceptTerm"]]) numeric(1) else numeric(0)
prevRSquared <- 0
} else {
prevCoefs <- stats::coefficients(processedModels[[currentIndex - 1]]$fit)
prevRSquared <- summary(processedModels[[currentIndex - 1]]$fit)$r.squared
}
rSquaredChange <- fChange <- df1 <- df2 <- p <- NaN
if (!is.null(fit)) {
rSquared <- summary(fit)$r.squared
rSquaredChange <- rSquared - prevRSquared
coefs <- stats::coefficients(fit)
df1 <- abs(length(coefs) - length(prevCoefs)) # df1 = p_change = abs( p_new - p_old )
df2 <- stats::df.residual(fit) # df2 = n-p_new but should take factors into account
if (df1 > 0L) {
fChange <- (df2 * rSquaredChange) / (df1 * (1 - rSquared))
p <- pf(q = fChange, df1 = df1, df2 = df2, lower.tail = FALSE)
} else {
fChange <- p <- NA
}
}
rSquareChange <- list(
R2c = rSquaredChange,
Fc = fChange,
df1 = df1,
df2 = df2,
p = p
)
return(rSquareChange)
}
.linregGetAnova <- function(fit, predictors) {
Fvalue <- mssResidual <- mssModel <- dfResidual <- dfModel <- dfTotal <- ssResidual <- ssModel <- ssTotal <- p <- vovksellke <- NaN
if (!is.null(fit)) {
if (length(predictors) > 0) {
summary <- summary(fit)
Fvalue <- summary$fstatistic[1]
mssResidual <- summary$sigma^2
mssModel <- Fvalue * mssResidual
dfResidual <- summary$fstatistic[3]
dfModel <- summary$fstatistic[2]
dfTotal <- dfResidual + dfModel
ssResidual <- mssResidual * dfResidual
ssModel <- mssModel * dfModel
ssTotal <- ssResidual + ssModel
p <- pf(q = Fvalue, df1 = dfModel, df2 = dfResidual, lower.tail = FALSE)
vovksellke <- VovkSellkeMPR(p)
} else {
Fvalue <- mssResidual <- mssModel <- dfResidual <- dfModel <- dfTotal <- ssResidual <- ssModel <- ssTotal <- p <- vovksellke <- "."
}
}
anova <- list(
Regression = list(F = Fvalue, SS = ssModel, df = dfModel, MS = mssModel, p = p, vovksellke = vovksellke),
Residual = list( SS = ssResidual, df = dfResidual, MS = mssResidual),
Total = list( SS = ssTotal, df = dfTotal)
)
return(anova)
}
.linregGetCoefficients <- function(fit, predictors, dataset, options) {
rows <- list()
footnote <- NULL
if (!is.null(fit)) {
if (options$interceptTerm)
predictors <- c("(Intercept)", predictors)
factors <- options[["factors"]]
hasFactors <- length(factors) > 0L
summ <- summary(fit)
missingCoeffs <- summ[["aliased"]]
# adapted from stats:::print.summary.lm -- automatically handles missing values
estimates <- matrix(NaN, length(missingCoeffs), 4, dimnames = list(names(missingCoeffs), colnames(coef(summ))))
estimates[!missingCoeffs, ] <- coef(summ)
confInterval <- confint(fit, level = options$coefficientCiLevel)
confInterval[is.na(confInterval)] <- NaN
info <- .linregGetParametersAndLevels(fit)
names <- .linregMakePrettyNames(info)
rawNames <- info[["paramsRaw"]]
names[c(FALSE, names[-1L] == names[-length(names)])] <- ""
# show footnote for missing coefficients
footnote <- NULL
if (any(missingCoeffs))
footnote <- list(rows = names[missingCoeffs])
rows <- vector("list", nrow(estimates))
collinearityDiagnostics <- .linregGetVIFAndTolerance(fit)
dataClasses <- attr(terms(fit), "dataClasses")
# counts if we're showing a second level of a factor/ interaction
# for example for collinearity diagnostics
factorsSeen <- hashtab()
rowIndex <- 1L
for (i in seq_along(names)) {
hasFactors <- any(dataClasses[rawNames[[i]]] == "factor")
unstandCoeff <- estimates[i, "Estimate"]
predictor <- paste(rawNames[[i]], collapse = ":")
# these don't exist for the intercept or categorical predictors.
standCoeff <- NULL
if (!identical(rawNames[[i]], "(Intercept)") && !hasFactors)
standCoeff <- .linregGetStandardizedCoefficient(dataset, options[["dependent"]], predictor, unstandCoeff)
# tolerance/ VIF is only shown for the first level of categorical variables/ interactions
tolerance <- VIF <- NULL
if (!is.null(collinearityDiagnostics) && !identical(predictor, "(Intercept)") && (!hasFactors || !factorsSeen[[rawNames[[i]], nomatch = FALSE]])) {
tolerance <- collinearityDiagnostics[["tolerance"]][[predictor]]
VIF <- collinearityDiagnostics[["VIF"]][[predictor]]
if (hasFactors)
factorsSeen[[rawNames[[i]]]] <- TRUE
}
# get translation for (Intercept)
name <- names[i]
if (identical(name, "(Intercept)"))
name <- gettext("(Intercept)")
row <- list(
name = name,
unstandCoeff = unstandCoeff,
SE = estimates[i, "Std. Error"],
t = estimates[i, "t value"],
p = estimates[i, "Pr(>|t|)"],
lower = confInterval[i, 1],
upper = confInterval[i, 2],
standCoeff = standCoeff,
tolerance = tolerance,
VIF = VIF,
vovksellke = VovkSellkeMPR(estimates[i, "Pr(>|t|)"])
)
rows[[i]] <- row
}
}
return(list(coefficients = rows, footnote = footnote))
}
.linregGetBootstrapCoefficients <- function(fit, dataset, options) {
.bootstrapping <- function(data, indices, formula, wlsWeights) {
progressbarTick()
d <- data[indices, , drop = FALSE] # allows boot to select sample
if (wlsWeights == "") {
fit <- lm(formula = formula, data = d)
} else {
weights <- d[[wlsWeights]]
fit <- lm(formula = formula, data = d, weights = weights)
}
return(coef(fit))
}
data <- data.frame(unstandCoeff = numeric(0), bias = numeric(0), SE = numeric(0), pvalue = numeric(0), lower = numeric(0), upper = numeric(0))
if (!is.null(fit)) {
#weights <- if (options$weights == "") NULL else options$weights
missingCoeffs <- NULL
coefNames <- names(coef(fit))
if (anyNA(fit$coefficients))
missingCoeffs <- coefNames[which(is.na(coef(fit)))]
summary <- boot::boot(data = dataset, statistic = .bootstrapping,
R = options$coefficientBootstrapSamples,
formula = formula(fit),
wlsWeights = options$weights)
coefficients <- matrixStats::colMedians(summary$t, na.rm = TRUE)
bias <- colMeans(summary$t, na.rm = TRUE) - summary$t0
stdErrors <- matrixStats::colSds(summary$t, na.rm = TRUE)
for (i in seq_along(coefNames)) {
coefName <- coefNames[[i]]
if (coefName %in% missingCoeffs) {
data[i, ] <- rep(NaN, ncol(data))
next
}
ci <- try(boot::boot.ci(summary, type = "bca", conf = options$coefficientCiLevel, index = i))
if (jaspBase::isTryError(ci))
ci <- list(bca = rep(NaN, 5L))
p <- try(.boot.pval(summary, type = "bca", index = i))
if (isTryError(p))
p <- NaN
data[i, ] <- c(coefficients[i], bias[i], stdErrors[i], p, ci$bca[4], ci$bca[5])
}
data[["name"]] <- .linregMakePrettyNames(fit)
}
return(data)
}
.linregGetVIFAndTolerance <- function(fit) {
# also used inside car:::vif.default
noTerms <- length(labels(stats::terms(fit)))
if (noTerms == 0L) # nothing can be computed
return(NULL)
if (noTerms == 1L) {# trivial case, always 1
name <- labels(stats::terms(fit))
value <- setNames(1, name)
return(list(VIF = value, tolerance = value))
}
# we can actually compute things
result <- try(car::vif(fit))
if (jaspBase::isTryError(result)) {
nas <- rep(NA, noTerms)
names(nas) <- labels(terms(fit))
result <- list(VIF = nas, tolerance = nas)
return(result)
}
VIF <- if (is.matrix(result)) {
result[, 3L]
} else {
result
}
tolerance <- 1 / VIF
result <- list(VIF = VIF,
tolerance = tolerance)
return(result)
}
.linregGetStandardizedCoefficient <- function(dataset, dependent, predictor, unstandCoeff) {
sdDependent <- sd(dataset[[dependent]])
if (.linregIsInteraction(predictor))
sdIndependent <- sd(.linregMakeCombinedVariableFromInteraction(predictor, dataset))
else
sdIndependent <- sd(dataset[[predictor]])
return(unstandCoeff * sdIndependent / sdDependent)
}
.linregGetPartAndPartialCorrelation <- function(fit, predictors, dataset, options) {
formula <- formula(fit)
R2 <- summary(fit)[["r.squared"]]
cors <- vector("list", length(predictors))
names(cors) <- predictors
for(predictor in predictors) {
# drop the term from the formula and refit the model
newFormula <- update(formula, as.formula(sprintf(". ~ . - %s", predictor)))
data <- dataset # because we call lm(..., data = data) in .linregForwardRegression we need 'data' to exist.
newFit <- update(fit, formula = newFormula)
newR2 <- summary(newFit)[["r.squared"]]
sr2 <- R2 - newR2 # squared semi-partial (part) correlation
pr2 <- sr2 / (1-newR2) # squared partial correlation
# determine the sign of the coefficient
sign <- sign(coefficients(fit)[predictor])
if(is.na(sign)) sign <- 1 # for categorical predictors
cors[[predictor]] <- list(
name = jaspBase::gsubInteractionSymbol(predictor),
part = sign * sqrt(sr2),
partial = sign * sqrt(pr2)
)
}
return(cors)
}
.linregGetCovarianceMatrix <- function(fit, rawNames, prettyNames) {
data <- data.frame()
if (!is.null(fit)) {
namesInModel <- setdiff(names(coef(fit)), "(Intercept)")
covmatrix <- matrix(NA_real_, length(namesInModel), length(rawNames), dimnames = list(namesInModel, rawNames))
covmatrix[namesInModel, namesInModel] <- vcov(fit)[namesInModel, namesInModel]
covmatrix[lower.tri(covmatrix)] <- NA
if (nrow(covmatrix) > 0L) {
colnames(covmatrix) <- prettyNames
names <- prettyNames[rawNames %in% namesInModel]
data <- cbind(data.frame(name = names), covmatrix)
}
}
return(data)
}
.linregGetCollinearityDiagnostics <- function(fit, columns, includeConstant) {
data <- data.frame()
if (!is.null(fit)) {
eigenvalues <- .linregGetEigenValues(fit)
conditionIndices <- .linregGetConditionIndices(fit)
varianceProportions <- .linregGetVarianceProportions(fit)
data <- data.frame(dimension = seq_along(names(fit$coefficients)), eigenvalue = eigenvalues, condIndex = conditionIndices)
colnames(varianceProportions) <- .linregMakePrettyNames(fit)
data <- cbind(data, varianceProportions)
}
return(data)
}
.linregGetEigenValues <- function(fit) {
X <- .linregGetScaledPredictorMatrix(fit)
eigenvalues <- svd(X)$d^2 # see Liao & Valliant (2012)
return(eigenvalues)
}
.linregGetConditionIndices <- function(fit) {
eigenvalues <- .linregGetEigenValues(fit)
conditionIndices <- sqrt(max(eigenvalues) / eigenvalues)
return(conditionIndices)
}
.linregGetVarianceProportions <- function(fit) {
X <- .linregGetScaledPredictorMatrix(fit)
### ( see e.g., Liao & Valliant, 2012 )
svdX <- svd(X) # singular value decomposition
M <- svdX$v %*% solve(diag(svdX$d))
Q <- M*M # Hadamard (elementwise) product
tQ <- t(Q)
for (i in seq_len(ncol(tQ)))
tQ[ , i] <- tQ[ , i] / sum(tQ[ , i])
colnames(tQ) <- names(fit$coefficients)
return(tQ)
}
.linregGetScaledPredictorMatrix <- function(fit) {
X <- fit$x
for (i in seq_len(ncol(X)))
X[ , i] <- X[ , i] / sqrt(sum(X[ , i]^2)) # scale each column using Euclidean norm
return(X)
}
.linregGetCasewiseDiagnostics <- function(fit, options) {
diagnostics <- list()
if (!is.null(fit)) {
predictedValuesAll <- predict(fit)
residualsAll <- residuals(fit)
stdPredictedValuesAll <- (predictedValuesAll - mean(predictedValuesAll)) / sd(predictedValuesAll)
stdResidualsAll <- rstandard(fit)
# stdResidualsAll <- statmod::qresid(fit)
# stdResidualsAll <- rstudent(fit
cooksDAll <- cooks.distance(fit)
if (options$residualCasewiseDiagnosticType == "cooksDistance")
index <- which(abs(cooksDAll) > options$residualCasewiseDiagnosticCooksDistanceThreshold)
else if (options$residualCasewiseDiagnosticType == "outliersOutside")
index <- which(abs(stdResidualsAll) > options$residualCasewiseDiagnosticZThreshold)
else # all
index <- seq_along(predictedValuesAll)
if (length(index) > 0) {
caseNumbers <- .linregGetCaseNumbers(options)
diagnostics[["caseNumber"]] <- caseNumbers[index]
diagnostics[["stdResidual"]] <- stdResidualsAll[index]
diagnostics[["dependent"]] <- fit$model[index, 1]
diagnostics[["predicted"]] <- predictedValuesAll[index]
diagnostics[["residual"]] <- residualsAll[index]
diagnostics[["cooksD"]] <- cooksDAll[index]
}
}
return(diagnostics)
}
.linregGetResidualsStatistics <- function(fit, predictors) {
residuals <- list()
if (!is.null(fit)) {
typesTranslated <- list("Predicted Value"=gettext("Predicted Value"), "Residual"=gettext("Residual"), "Std. Predicted Value"=gettext("Std. Predicted Value"), "Std. Residual"=gettext("Std. Residual"))
types <- names(typesTranslated)
predicted <- predict(fit)
N <- length(predicted)
valuesPerType <- list("Predicted Value" = predicted,
"Residual" = residuals(fit),
"Std. Predicted Value" = (predicted - mean(predicted)) / sd(predicted),
"Std. Residual" = rstandard(fit))
if (length(predictors) == 0)
valuesPerType[["Std. Predicted Value"]] <- NA # cannot compute this for an intercept model
residuals <- vector("list", length(types))
for (i in seq_along(types)) {
residuals[[i]] <- list()
type <- types[i]
residuals[[i]][["type"]] <- typesTranslated[[type]]
residuals[[i]][["min"]] <- min( valuesPerType[[type]], na.rm = TRUE)
residuals[[i]][["max"]] <- max( valuesPerType[[type]], na.rm = TRUE)
residuals[[i]][["mean"]] <- mean(valuesPerType[[type]], na.rm = TRUE)
residuals[[i]][["SD"]] <- sd( valuesPerType[[type]], na.rm = TRUE)
residuals[[i]][["N"]] <- N
}
}
return(residuals)
}
.linregGetDescriptives <- function(variables, dataset) {
descriptives <- vector("list", length(variables))
for (i in seq_along(variables)) {
descriptives[[i]] <- list()
variable <- variables[i]
data <- na.omit(dataset[[.v(variable)]])
descriptives[[i]][["var"]] <- variable
descriptives[[i]][["N"]] <- length(data)
descriptives[[i]][["mean"]] <- mean(data)
descriptives[[i]][["SD"]] <- sd(data)
descriptives[[i]][["SE"]] <- sd(data) / sqrt(length(data))
}
return(descriptives)
}
.linregGetPartialPlotData = function(predictor, predictors, dataset, options) {
predictors <- setdiff(predictors, predictor)
if (length(predictors) == 0)
predictors <- NULL
weights <- dataset[[options$weights]]
# Compute residuals dependent
formulaDep <- .linregGetFormula(options$dependent, predictors = predictors, includeConstant = TRUE)
fitDep <- stats::lm(formula = formulaDep, data = dataset, weights = weights)
residualsDep <- residuals(fitDep)
# Compute residuals predictor as dependent
formulaPred <- .linregGetFormula(predictor, predictors = predictors, includeConstant = TRUE)
fitPred <- stats::lm(formula = formulaPred, data = dataset, weights = weights)
residualsPred <- residuals(fitPred)
return(data.frame(residualsPred = residualsPred, residualsDep = residualsDep))
}
.linregPlotResiduals <- function(xVar = NULL, res = NULL, dfRes = Inf, xlab, ylab = gettext("Residuals"), cexPoints= 1.3, cexXAxis= 1.3, cexYAxis= 1.3, lwd= 2, lwdAxis=1.2,
regressionLine = TRUE, confidenceIntervals = FALSE, confidenceIntervalsInterval = 0.95, predictionIntervals = FALSE, predictionIntervalsInterval = 0.95,
standardizedResiduals = TRUE, intercept = 0, slope = 0) {
# TODO: slope should consist of multiple values for factors with more than 2 levels
d <- data.frame(xx= xVar, yy= res)
d <- na.omit(d)
xVar <- d$xx
res <- d$yy
# construct here the x-axis scale which can be categorical or continuous
if (is.factor(xVar)) {
xScale <- ggplot2::scale_x_discrete(name = xlab)
} else {
xlow <- min(pretty(xVar))
xhigh <- max(pretty(xVar))
xticks <- pretty(c(xlow, xhigh))
xlabs <- jaspGraphs::axesLabeller(xticks, digits = 3)
xScale <- ggplot2::scale_x_continuous(name = xlab, breaks = xticks, labels = xlabs)
}
# y-axis scale is always continuous (since the dependent variable in linear regression should be continuous)
ylow <- min(pretty(res))
yhigh <- max(pretty(res))
yticks <- pretty(c(ylow, yhigh, 0))
ylabs <- jaspGraphs::axesLabeller(yticks, digits = 3)
ylim <- range(yticks)
if (standardizedResiduals) {
stAxisTmp <- pretty( yticks / sd(res) )
stAxisOriginalScaleTmp <- stAxisTmp * sd(res)
stAxisOriginalScale <- stAxisOriginalScaleTmp[stAxisOriginalScaleTmp < max(yticks) & stAxisOriginalScaleTmp > min(yticks)]
stAxis <- stAxisOriginalScale / sd(res)
yScaleSecAxis <- ggplot2::sec_axis(~.+0, breaks = stAxisOriginalScale, name = gettext("Standardized Residuals\n"),labels = stAxis)
} else {
yScaleSecAxis <- ggplot2::waiver()
}
yScale <- ggplot2::scale_y_continuous(name = ylab, breaks = yticks, labels = ylabs, limits = ylim, sec.axis = yScaleSecAxis)
regLine <- confidenceIntervalLines <- predictionIntervalLines <- NULL
if (regressionLine) {
regLine <- if (is.factor(xVar)) {
ggplot2::geom_line(
data = data.frame(x = as.numeric(unique(xVar)), y = intercept + slope),
mapping = ggplot2::aes(x = x, y = y),
col = "darkred", size = .5
)
} else {
ggplot2::geom_line(
data = data.frame(x = c(min(xticks), max(xticks)), y = intercept + slope * c(min(xticks), max(xticks))),
mapping = ggplot2::aes(x = x, y = y),
col = "darkred", size = .5
)
}
if (confidenceIntervals) {
seConf <- sqrt(sum(res^2) / dfRes) *
sqrt(1 / length(res) + (xVar - mean(xVar))^2 / sum((xVar - mean(xVar))^2))
ciConf <- 1 - (1 - confidenceIntervalsInterval)/2
upperConfInt <- (intercept + slope * xVar) + qt(ciConf, dfRes) * seConf
lowerConfInt <- (intercept + slope * xVar) - qt(ciConf, dfRes) * seConf
# ggplot2::geom_errorbar()
confidenceIntervalLines <- if (is.factor(xVar)) {
ggplot2::geom_errorbar(
data = data.frame(x = levels(xVar), ymax = upperConfInt, ymin = lowerConfInt),
mapping = ggplot2::aes(x = x, ymax = ymax, ymin = ymin),
colour = "darkblue", linetype = "dashed"
)
} else {
ggplot2::geom_line(
data = data.frame(x = xVar, y = c(upperConfInt, lowerConfInt), g = rep(1:2, c(length(upperConfInt), length(lowerConfInt)))),
mapping = ggplot2::aes(x = x, y = y, group = g),
colour = "darkblue", linetype = "dashed"
)
}
}
if (predictionIntervals) {
sePred <- sqrt(sum(res^2) / dfRes) *
sqrt(1 + 1 / length(res) + (xVar - mean(xVar))^2 / sum((xVar - mean(xVar))^2))
ciPred <- 1 - (1 - predictionIntervalsInterval)/2
upperPredInt <- (intercept + slope * xVar) + qt(ciPred, dfRes) * sePred
lowerPredInt <- (intercept + slope * xVar) - qt(ciPred, dfRes) * sePred
predictionIntervalLines <- if (is.factor(xVar)) {
ggplot2::geom_errorbar(
data = data.frame(x = levels(xVar), ymax = upperPredInt, ymin = lowerPredInt),
mapping = ggplot2::aes(x = x, ymax = ymax, ymin = ymin),
colour = "darkgreen", linetype = "longdash"
)
} else {
ggplot2::geom_line(data = data.frame(x = xVar, y = c(upperPredInt, lowerPredInt), g = rep(1:2, c(length(upperPredInt), length(lowerPredInt)))),
mapping = ggplot2::aes(x = x, y = y, group = g),
col = "darkblue", linetype = "dashed", size = 1)
}
}
}
residualPoints <- jaspGraphs::geom_point(data = data.frame(x = xVar, y = res), mapping = ggplot2::aes(x = x, y = y))
p <- ggplot2::ggplot() +
xScale + yScale +
regLine +
confidenceIntervalLines +
predictionIntervalLines +
residualPoints +
jaspGraphs::geom_rangeframe(sides = if (standardizedResiduals) "blr" else "bl") +
jaspGraphs::themeJaspRaw(axis.title.cex = 1.2)
return(p)
}
.linregPlotResidualsHistogram <- function(res = NULL, resName = gettext("Residuals"), cexYlab= 1.3, lwd= 2, rugs= FALSE) {
density <- density(res)
h <- hist(res, plot = FALSE)
dens <- density(res)
yhigh <- max(c(h$density, dens$y))
ylow <- 0
xticks <- base::pretty(c(res, h$breaks), min.n= 3)
p <- ggplot2::ggplot() +
ggplot2::scale_x_continuous(name = resName, breaks = xticks, labels = xticks, limits = range(xticks)) +
ggplot2::scale_y_continuous(name = gettext("Density"), breaks = c(ylow, yhigh), labels = NULL) +
ggplot2::geom_histogram(
data = data.frame(res),
mapping = ggplot2::aes(x = res, y = ..density..),
binwidth = (h$breaks[2] - h$breaks[1]),
fill = "grey", col = "black", size = .3,
center = ((h$breaks[2] - h$breaks[1])/2)
) +
ggplot2::geom_line(data = data.frame(x = density$x, y = density$y), mapping = ggplot2::aes(x = x, y = y), lwd = .7, col = "black") +
jaspGraphs::geom_rangeframe() +
jaspGraphs::themeJaspRaw(axis.title.cex = 1.2) +
ggplot2::theme(axis.ticks.y = ggplot2::element_blank())
return(p)
}
.linregGetPredictors <- function(modelTerms, modelType = "alternative") {
if (!is.character(modelType) || !modelType %in% c("alternative", "null"))
stop(gettext("Unknown value provided for modelType, possible values: `alternative`, `null`"))
predictors <- NULL
for (i in seq_along(modelTerms)) {
components <- modelTerms[[i]]
predictor <- paste0(components, collapse = ":")
predictors <- c(predictors, predictor)
}
return(predictors)
}
.reglinTermsContainNuisance <- function(modelTerms) {
for (i in seq_along(modelTerms)) {
isNuisance <- modelTerms[[i]]$isNuisance
if (isNuisance)
return(TRUE)
}
return(FALSE)
}
.linregGetFormula <- function(dependent, predictors = NULL, includeConstant) {
if (is.null(predictors) && includeConstant == FALSE)
stop(gettext("We need at least one predictor, or an intercept to make a formula"))
if (length(predictors) == 0)
formula <- paste(dependent, "~", "1")
else if (includeConstant)
formula <- paste(dependent, "~", paste(predictors, collapse = "+"))
else
formula <- paste(dependent, "~", paste(predictors, collapse = "+"), "-1")
return(as.formula(formula, env = parent.frame(1)))
}
.linregIsInteraction <- function(predictor) {
grepl(":", predictor)
}
.linregMakeCombinedVariableFromInteraction <- function(interaction, dataset) {
terms <- unlist(strsplit(interaction, split = ":"))
newVar <- rep(1, nrow(dataset))
for (i in seq_along(terms))
newVar <- newVar * dataset[[terms[i]]]
return(newVar)
}
.linregGetPredictorColumnNames <- function(model, modelTerms) {
usedPredictors <- unique(unlist(lapply(model, function(x) x$predictors)))
allpredictors <- .linregGetPredictors(modelTerms)
return(intersect(allpredictors, usedPredictors)) # ensures that the terms appear in the covariance matrix like they appear in the model terms box
}
.linregGetParameterNames <- function(model) {
UseMethod(".linregGetParameterNames", model)
}
.linregGetParameterNames.lm <- function(model) {
return(colnames(model.matrix(model)))
}
.linregGetParameterNames.list <- function(model) {
usedParameterNames <- unique(unlist(lapply(model, function(x) .linregGetParameterNames(x[["fit"]])), use.names = FALSE))
return(usedParameterNames)
}
.linregAddPredictorsAsColumns <- function(jaspTable, model, includeIntercept = TRUE, type = "number", format = NULL, overtitle = NULL) {
titles <- .linregMakePrettyNames(model)
if (!includeIntercept)
titles <- titles[-1L]
for (title in titles)
jaspTable$addColumnInfo(name = title, title = title, type = type, format = format, overtitle = overtitle)
}
.linregAddPredictorsInNullFootnote <- function(jaspTable, modelTerms) {
if (length(modelTerms) > 0) {
predictorsInNull <- .linregGetPredictors(modelTerms, modelType = "null")
jaspTable$addFootnote(message = gettextf("Null model includes %s", paste(predictorsInNull, collapse = ", "), sep = ""))
}
}
.linregAddVovkSellke <- function(jaspTable, wantsVovkSellkeMPR) {
if (wantsVovkSellkeMPR) {
jaspTable$addColumnInfo(name = "vovksellke", title = gettext("VS-MPR"), type = "number")
#Haven't I seen the following footnote before?
jaspTable$addFootnote(symbol = "\u002A", colNames = "vovksellke", message = gettextf("Vovk-Sellke Maximum <em>p</em>-Ratio: Based on the <em>p</em>-value, the maximum
possible odds in favor of H%1$s over H%2$s equals 1/(-e <em>p</em> log(<em>p</em>)) for <em>p</em> %3$s .37 (Sellke, Bayarri, & Berger, 2001).", "\u2081", "\u2080", "\u2264"))
}
}
.linregAddInterceptNotShownFootnote <- function(jaspTable, model, options) {
indicesOfModelsWithPredictors <- .linregGetIndicesOfModelsWithPredictors(model, options)
if (options$interceptTerm && length(indicesOfModelsWithPredictors) != length(model)) {
if (length(indicesOfModelsWithPredictors) > 0)
jaspTable$addFootnote(gettext("The intercept model is omitted, as no meaningful information can be shown."))
else
jaspTable$addFootnote(gettext("There is only an intercept model, no meaningful information can be shown."))
}
}
.linregGetIndicesOfModelsWithPredictors <- function(model, options) {
predictorsInNull <- model[[1]]$predictors
indices <- seq_along(model)
if (options$method == "enter") {
if (length(model) >= 1 && options$interceptTerm && length(predictorsInNull) == 0)
indices <- indices[-1]
} else {
for (i in seq_along(model))
if (length(model[[i]]$predictors) == 0)
indices <- indices[-i]
}
return(indices)
}
.linregInsertPlot <- function(jaspPlot, func, ...) {
p <- try(func(...))
if (inherits(p, "try-error")) {
errorMessage <- .extractErrorMessage(p)
jaspPlot$setError(gettextf("Plotting is not possible: %s", errorMessage))
} else {
jaspPlot$plotObject <- p
jaspPlot$status <- "complete"
}
}
.linregCreatePlotPlaceholder <- function(container, index, title, width = 530, height = 400) {
jaspPlot <- createJaspPlot(title = title, width = width, height = height)
jaspPlot$status <- "running"
container[[index]] <- jaspPlot
}
.linregGetParametersAndLevels <- function(model, ...) {
UseMethod(".linregGetParametersAndLevels", model)
}
.linregGetParametersAndLevels.lm <- function(model) {
predictors <- all.vars(formula(model))[-1L]
.linregGetParametersAndLevels(.linregGetParameterNames(model), predictors)
}
.linregGetParametersAndLevels.default <- function(model, predictors) {
# exception for intercept only model
if (length(predictors) == 0L && identical(model, "(Intercept)")) {
return(list(
paramsClean = model,
levelsClean = "",
paramsRaw = model,
levelsRaw = ""
))
}
orderedPredictors <- predictors[order(nchar(predictors))]
# escape the parentheses
orderedPredictors[which(orderedPredictors == "(Intercept)")] <- "\\(Intercept\\)"
regexAnyPredictor <- paste(orderedPredictors, collapse = "|")
lvls <- gsub(regexAnyPredictor, "", model)
levelsRaw <- strsplit(lvls, ":")
levelsRaw[lengths(levelsRaw) == 0] <- list("")
# the above works except for n-way interactions between continuous predictors there needs to be one more empty level
# so that the number of levels matches the number of predictors. The regex below finds these interactions and adds an
# extra "" to the levels.
for (i in grep("^(:+)$", lvls))
levelsRaw[[i]] <- c(levelsRaw[[i]], "")
if (length(levelsRaw[[1L]]) == 1L && levelsRaw[[1L]] == "(Intercept)")
levelsRaw[[1L]] <- ""
levelsClean <- jaspBase::gsubInteractionSymbol(lvls)
splitRnms <- strsplit(model, ":")
allPredsRegex <- paste0("^(", regexAnyPredictor, ").*")
paramsRaw <- lapply(splitRnms, gsub, pattern = allPredsRegex, replacement = "\\1")
paramsClean <- unlist(lapply(paramsRaw, paste, collapse = jaspBase::interactionSymbol), use.names = FALSE)
return(list(
paramsClean = paramsClean,
levelsClean = levelsClean,
paramsRaw = paramsRaw,
levelsRaw = levelsRaw
))
}
#' Change R names into pretty names, e.g., PredictorLevel ->
#'
#' @param info either an lm object, a list where each sub element $fit contains an lm object, or the result of .linregGetParametersAndLevels.
#'
#' @details For example "PredictorLevel" becomes "Predictor" Level
.linregMakePrettyNames <- function(info) {
UseMethod(".linregMakePrettyNames", info)
}
.linregMakePrettyNames.lm <- function(info) {
.linregMakePrettyNames(.linregGetParametersAndLevels(info))
}
.linregMakePrettyNames.list <- function(info) {
if (is.null(names(info)) && !is.null(info[[1L]][["fit"]])) {
# we could also distinguish between these cases by giving the return value of .linregGetParametersAndLevels a class.
return(unique(unlist(lapply(info, function(x) .linregMakePrettyNames.lm(x[["fit"]])), use.names = FALSE)))
}
title <- character(length(info[["paramsRaw"]]))
for (j in seq_along(title)) {
params <- info[["paramsRaw"]][[j]]
levels <- info[["levelsRaw"]][[j]]
ans <- character(length(params))
for (i in seq_along(params)) {
ans[i] <- if (is.na(levels[i]) || levels[i] == "") params[i] else paste0(params[i], " (", levels[i], ")")
}
title[j] <- paste(ans, collapse = " \u2009\u273b\u2009 ")
}
return(title)
}
.linregRemoveFactors <- function(fit, predictors) {
terms <- terms(fit)
dataClasses <- attr(terms, "dataClasses")
factors <- attr(terms, "factors")
result <- logical(length(predictors))
for (i in seq_along(result)) {
consistsOf <- names(which(factors[, predictors[i]] == 1))
result[i] <- !any(dataClasses[consistsOf] == "factor")
}
return(predictors[result])
}
.linregContainsFactor <- function(x, predictors) {
UseMethod(".linregContainsFactor", x)
}
.linregContainsFactor.data.frame <- function(x, predictors) {
idx <- .linregIsInteraction(predictors)
if (any(idx))
predictors <- unique(unlist(strsplit(predictors, ":", fixed = TRUE), use.names = FALSE))
return(any(vapply(x[predictors], is.factor, logical(1L))))
}
.linregContainsFactor.lm <- function(x, predictors) {
# returns TRUE if the predictor is a factor or if it is an interaction that contains a factor
terms <- terms(x)
dataClasses <- attr(terms, "dataClasses")
factors <- attr(terms, "factors")
consistsOf <- names(which(factors[, predictors] == 1))
return(any(dataClasses[consistsOf] == "factor"))
}
.linregCreateMarginalPlots <- function(modelContainer, finalModel, dataset, options, position = 17) {
marginalPlotsContainer <- createJaspContainer(gettext("Marginal Effects Plots"))
marginalPlotsContainer$dependOn(c("marginalPlot", "marginalPlotCi", "marginalPlotCiLevel",
"marginalPlotPredictionInterval", "marginalPlotPredictionIntervalLevel"))
marginalPlotsContainer$position <- position
modelContainer[["marginalPlotsContainer"]] <- marginalPlotsContainer
if (!is.null(finalModel)) {
predictors <- finalModel$predictors
# marginal plots display only the main effects
predictors <- predictors[!grepl(":", predictors)]
for (predictor in predictors)
.linregCreatePlotPlaceholder(marginalPlotsContainer,
index = predictor,
title = gettextf("Marginal effect of %1$s on %2$s", predictor, options$dependent))
for (predictor in predictors) {
.linregFillMarginalPlots(marginalPlotsContainer[[predictor]], predictor, finalModel$fit, dataset, options)
}
}
}
.linregFillMarginalPlots <- function(marginalPlot, predictor, fit, dataset, options) {
xVar <- dataset[[predictor]]
xVar <- stats::na.omit(xVar)
means_ls = list()
if (length(options[["factors"]]) > 0) {
for (var in options[["factors"]]) {
column_value = dataset[[var]]
column_levels = levels(column_value)
means_ls[[var]] = column_levels[1]
}
}
if (length(options[['covariates']]) > 0) {
for (var in options[['covariates']]) {
column_value = dataset[[var]]
column_mean = mean(column_value, na.rm = TRUE)
means_ls[[var]] = column_mean
}
}
means_ls[[predictor]] = NULL
dd_sim = data.frame(predictor = xVar)
colnames(dd_sim) = predictor
if (length(means_ls) > 0) {
dd_sim = cbind(dd_sim, means_ls)
}
fitted = predict(fit, newdata = dd_sim, interval = "none")
if (options$marginalPlotCi == TRUE) {
matrix_conf = predict(fit,
newdata = dd_sim,
interval = "confidence",
level = options[["marginalPlotCiLevel"]])
conf_min = matrix_conf[, 'lwr']
conf_max = matrix_conf[, 'upr']
}
else {
conf_min = NULL
conf_max = NULL
}
if (options$marginalPlotPredictionInterval == TRUE) {
matrix_pred = predict(fit,
newdata = dd_sim,
interval = "prediction",
level = options[["marginalPlotPredictionIntervalLevel"]])
pred_min = matrix_pred[, 'lwr']
pred_max = matrix_pred[, 'upr']
}
else {
pred_min = NULL
pred_max = NULL
}
.linregInsertPlot(marginalPlot,
.linregMarginalPlot,
xVar = xVar,
xlab = predictor,
yVar = fitted,
ylab = options$dependent,
conf_min = conf_min,
conf_max = conf_max,
pred_min = pred_min,
pred_max = pred_max)
}
.linregMarginalPlot <- function(xVar, xlab, yVar, ylab,
conf_min, conf_max, pred_min, pred_max, options) {
d <- data.frame(x = xVar,
y = yVar)
if (is.factor(xVar)) {
d_factor <- unique(d)
d_factor <- cbind(d_factor, group = 1)
basicMarginalPlot <- ggplot2::ggplot() +
ggplot2::geom_line(data = d_factor,
mapping = ggplot2::aes(x = x, y = y, group = group),
size = 1) +
ggplot2::xlab(xlab) +
ggplot2::ylab(ylab)
factorPoints <- jaspGraphs::geom_point(data = d_factor,
mapping = ggplot2::aes(x = x, y = y))
} else {
basicMarginalPlot <- ggplot2::ggplot() +
ggplot2::geom_line(data = d,
mapping = ggplot2::aes(x = x, y = y),
size = 1) +
ggplot2::xlab(xlab) +
ggplot2::ylab(ylab) +
ggplot2::geom_rug(data = d,
mapping = ggplot2::aes(x = x, y = y),
sides = "b",
alpha = 0.5)
factorPoints <- NULL
}
if (!is.null(conf_min)) {
d <- cbind(d, conf_lower = conf_min, conf_upper = conf_max)
if (is.factor(xVar)) {
d_factor <- unique(d)
confidenceBounds <- ggplot2::geom_errorbar(data = d_factor,
ggplot2::aes(x = x, y = y, ymin = conf_lower, ymax = conf_upper),
linetype = "solid",
width = 0.1,
size = 1)
} else {
confidenceBounds <- ggplot2::geom_ribbon(mapping = ggplot2::aes(x = x, ymin = conf_lower, ymax = conf_upper),
alpha = .1,
data = d,
size = 1)
}
} else {
confidenceBounds = NULL
}
if (!is.null(pred_min)) {
d <- cbind(d, pred_lower = pred_min, pred_upper = pred_max)
if (is.factor(xVar)) {
d_factor <- unique(d)
predictionBound1 <- ggplot2::geom_errorbar(data = d_factor,
ggplot2::aes(x = x, y = y, ymin = pred_lower, ymax = pred_upper),
linetype = "dashed",
width = 0.1,
size = 1)
predictionBound2 <- NULL
} else {
predictionBound1 <- ggplot2::geom_line(
mapping = ggplot2::aes(x = x, y = pred_lower),
#color = "red",
linetype = "dashed",
data = d,
size = 1)
predictionBound2 <-
ggplot2::geom_line(
mapping = ggplot2::aes(x = x, y = pred_upper),
#color = "red",
linetype = "dashed",
data = d,
size = 1)
}
} else {
predictionBounds <- predictionBound1 <- predictionBound2 <- NULL
}
finalMarginalPlot <- basicMarginalPlot +
confidenceBounds +
predictionBound1 +
predictionBound2 +
factorPoints +
jaspGraphs::geom_rangeframe() +
jaspGraphs::themeJaspRaw(axis.title.cex = 1.2)
return(finalMarginalPlot)
}
# function taken from {boot.pval} version 0.4
# adjusted precision bug (alpha close to 1 causes some boot.ci type to crash)
.boot.pval <- function(boot_res,
type = "perc",
theta_null = 0,
pval_precision = NULL,
...)
{
if(is.null(pval_precision)) { pval_precision = 1/boot_res$R }
# Create a sequence of alphas:
# EDITED:
alpha_seq <- seq(1e-10, 1-1e-10, pval_precision)
# END EDITED
# Compute the 1-alpha confidence intervals, and extract
# their bounds:
ci <- suppressWarnings(boot::boot.ci(boot_res,
conf = 1- alpha_seq,
type = type,
...))
bounds <- switch(type,
norm = ci$normal[,2:3],
basic = ci$basic[,4:5],
stud = ci$student[,4:5],
perc = ci$percent[,4:5],
bca = ci$bca[,4:5])
# Find the smallest alpha such that theta_null is not contained in the 1-alpha
# confidence interval:
alpha <- alpha_seq[which.min(theta_null >= bounds[,1] & theta_null <= bounds[,2])]
# Return the p-value:
return(alpha)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.