#' First check of assumptions to find suitable transformations
#'
#' Gives a first overview if a transformation is useful and which transformation
#' is promising to fulfill the model assumptions normality, homoscedasticity and
#' linearity.
#'
#' @param object an object of type \code{lm}.
#' @param method a character string. Different estimation methods can be used
#' for the estimation of the optimal transformation parameter:
#' (i) Maximum likelihood approach ("ml"), (ii) Skewness minimization ("skew"),
#' (iii) Kurtosis optimization ("kurt"), (iv) Divergence minimization by
#' Kolmogorov-Smirnov ("div.ks"), by Cramer-von-Mises ("div.cvm") or by
#' Kullback-Leibler ("div.kl"). Defaults to "ml".
#' @param std logical. If \code{TRUE}, the transformed model is returned based
#' on the standardized/scaled transformation. Defaults to \code{FALSE}.
#' @param ... other parameters that can be passed to the function, e.g. other
#' lambdaranges. Self-defined lambdaranges are given to the function as an
#' argument that is the combination of the name of the transformation and lr and
#' the range needs to be a numeric vector of length 2. For instance, changing the
#' lambdarange for the Manly transformation would mean to add an argument
#' \code{manly_lr = manly_lr = c(0.000005,0.00005)}. For the default values that
#' are used for the lambdaranges see the documentation for the provided
#' transformations.
#' @return A table with tests for normality and homoscedasticity. Furthermore,
#' scatterplots are returned to check the linearity assumption.
#' @seealso \code{\link{bickeldoksum}}, \code{\link{boxcox}}, \code{\link{dual}},
#' \code{\link{glog}}, \code{\link{gpower}}, \code{\link{log}},
#' \code{\link{logshiftopt}}, \code{\link{manly}}, \code{\link{modulus}},
#' \code{\link{neglog}}, \code{\link{sqrtshift}}, \code{\link{yeojohnson}}
#' @examples
#' # Load data
#' data("cars", package = "datasets")
#'
#' # Fit linear model
#' lm_cars <- lm(dist ~ speed, data = cars)
#'
#' assumptions(lm_cars)
#' assumptions(lm_cars, method = "skew", manly_lr = c(0.000005,0.00005))
#' @importFrom moments skewness kurtosis
#' @importFrom lmtest bptest
#' @export
assumptions <- function(object, method = "ml", std = FALSE, ...){
resid_One <- NULL
resid_Two <- NULL
check_assumptions(object = object, method = method, std = std)
lambdaranges <- list(...)
trafos <- list()
if (any(object$model[, paste0(formula(object)[2])] <= 0)) {
# Box-Cox shift
if ("boxcox_lr" %in% names(lambdaranges)) {
try(trafos[["boxcoxshift"]] <- boxcoxshift(object = object, method = method,
plotit = FALSE,
lambdarange = lambdaranges[["boxcox_lr"]]),
silent = TRUE)
} else if (!("boxcox_lr" %in% names(lambdaranges))) {
try(trafos[["boxcoxshift"]] <- boxcoxshift(object = object, method = method,
plotit = FALSE),
silent = TRUE)
}
if (is.null(trafos[["boxcoxshift"]])) {
warning("The default lambarange is not suitable for the shifted Box-Cox
transformation and this data. If the Box-Cox transformation is
supposed to be considered in the fast check, please add an argument
boxcox_lr which is a vector of two elements that define the lower and
the upper bound for the lambdarange, e.g. boxcox_lr = c(-2,2).")
} else {
trafos[["boxcoxshift"]] <- trafos[["boxcoxshift"]]
}
# Log shift
trafos[["logshift"]] <- logshift(object = object)
# Calculate shift
shift <- with_shift(y = object$model[, paste0(formula(object)[2])],
shift = 0)
cat(paste0(formula(object)[2]), " contains zero or negative values. Thus,
a shift equal to ",shift," is included to the Log and the Box-Cox
transformation such that y + shift > 0. The Dual transformation is
only suitable for positive response values.")
} else if (all(object$model[, paste0(formula(object)[2])] > 0)) {
# Box-Cox
if ("boxcox_lr" %in% names(lambdaranges)) {
try(trafos[["boxcox"]] <- boxcox(object = object, method = method,
plotit = FALSE,
lambdarange = lambdaranges[["boxcox_lr"]]),
silent = TRUE)
} else if (!("boxcox_lr" %in% names(lambdaranges))) {
try(trafos[["boxcox"]] <- boxcox(object = object, method = method,
plotit = FALSE),
silent = TRUE)
}
if (is.null(trafos[["boxcox"]])) {
warning("The lambarange is not suitable for the Box-Cox
transformation and this data. If the Box-Cox transformation is
supposed to be considered in the fast check, please add an argument
boxcox_lr which is a vector of two elements that define the lower and
the upper bound for the lambdarange, e.g. boxcox_lr = c(-2,2).")
} else {
trafos[["boxcox"]] <- trafos[["boxcox"]]
}
# Dual
if ("dual_lr" %in% names(lambdaranges)) {
try(trafos[["dual"]] <- dual(object = object, method = method,
plotit = FALSE,
lambdarange = lambdaranges[["dual_lr"]]),
silent = TRUE)
} else if (!("dual_lr" %in% names(lambdaranges))) {
try(trafos[["dual"]] <- dual(object = object, method = method,
plotit = FALSE), silent = TRUE)
}
if (is.null(trafos[["dual"]])) {
warning("The lambarange is not suitable for the Dual
transformation and this data. If the Dual transformation is
supposed to be considered in the fast check, please add an argument
dual_lr which is a vector of two elements that define the lower and
the upper bound for the lambdarange, e.g. dual_lr = c(0,2).")
} else {
trafos[["dual"]] <- trafos[["dual"]]
}
trafos[["log"]] <- logtrafo(object = object)
}
# Bickel-Doksum
if ("bickeldoksum_lr" %in% names(lambdaranges)) {
try(trafos[["bickeldoksum"]] <- bickeldoksum(object = object, method = method,
plotit = FALSE,
lambdarange = lambdaranges[["bickeldoksum_lr"]]),
silent = TRUE)
} else if (!("bickeldoksum_lr" %in% names(lambdaranges))) {
try(trafos[["bickeldoksum"]] <- bickeldoksum(object = object, method = method,
plotit = FALSE), silent = TRUE)
}
if (is.null(trafos[["bickeldoksum"]])) {
warning("The lambarange is not suitable for the Bickel-Doksum
transformation and this data. If the Bickel-Doksum transformation is
supposed to be considered in the fast check, please add an argument
bickeldoksum_lr which is a vector of two elements that define the lower and
the upper bound for the lambdarange, e.g. bickeldoksum_lr = c(-1e-11,2).")
} else {
trafos[["bickeldoksum"]] <- trafos[["bickeldoksum"]]
}
# Gpower
if ("gpower_lr" %in% names(lambdaranges)) {
try(trafos[["gpower"]] <- gpower(object = object, method = method,
plotit = FALSE,
lambdarange = lambdaranges[["gpower_lr"]]),
silent = TRUE)
} else if (!("gpower_lr" %in% names(lambdaranges))) {
try(trafos[["gpower"]] <- gpower(object = object, method = method,
plotit = FALSE), silent = TRUE)
}
if (is.null(trafos[["gpower"]])) {
warning("The lambarange is not suitable for the Gpower
transformation and this data. If the Gpower transformation is
supposed to be considered in the fast check, please add an argument
gpower_lr which is a vector of two elements that define the lower and
the upper bound for the lambdarange, e.g. gpower_lr = c(-2,2).")
} else {
trafos[["gpower"]] <- trafos[["gpower"]]
}
# Manly
if ("manly_lr" %in% names(lambdaranges)) {
try(trafos[["manly"]] <- manly(object = object, method = method,
plotit = FALSE,
lambdarange = lambdaranges[["manly_lr"]]),
silent = TRUE)
} else if (!("manly_lr" %in% names(lambdaranges))) {
try(trafos[["manly"]] <- manly(object = object, method = method,
plotit = FALSE), silent = TRUE)
}
if (is.null(trafos[["manly"]])) {
warning("The lambarange is not suitable for the Manly transformation
and this data. If the Manly transformation is
supposed to be considered in the fast check, please add an argument
manly_lr which is a vector of two elements that define the lower and
the upper bound for the lambdarange, e.g. manly_lr = c(-2,2).")
} else {
trafos[["manly"]] <- trafos[["manly"]]
}
# Modulus
if ("modulus_lr" %in% names(lambdaranges)) {
try(trafos[["modulus"]] <- modulus(object = object, method = method,
plotit = FALSE,
lambdarange = lambdaranges[["modulus_lr"]]),
silent = TRUE)
} else if (!("modulus_lr" %in% names(lambdaranges))) {
try(trafos[["modulus"]] <- modulus(object = object, method = method,
plotit = FALSE), silent = TRUE)
}
if (is.null(trafos[["modulus"]])) {
warning("The lambarange is not suitable for the Modulus
transformation and this data. If the Modulus transformation is
supposed to be considered in the fast check, please add an argument
modulus_lr which is a vector of two elements that define the lower and
the upper bound for the lambdarange, e.g. modulus_lr = c(-2,2).")
} else {
trafos[["modulus"]] <- trafos[["modulus"]]
}
# Log shift opt
if ("logshiftopt_lr" %in% names(lambdaranges)) {
try(trafos[["logshiftopt"]] <- logshiftopt(object = object, method = method,
plotit = FALSE,
lambdarange = lambdaranges[["logshiftopt_lr"]]),
silent = TRUE)
} else if (!("logshiftopt_lr" %in% names(lambdaranges))) {
try(trafos[["logshiftopt"]] <- suppressWarnings(logshiftopt(object = object, method = method,
plotit = FALSE)), silent = TRUE)
}
if (is.null(trafos[["logshiftopt"]])) {
warning("The lambarange is not suitable for the Log shift opt
transformation and this data. If the Log shift opt transformation is
supposed to be considered in the fast check, please add an argument
logshiftopt_lr which is a vector of two elements that define the lower and
the upper bound for the lambdarange, e.g. logshiftopt_lr = c(-10,10).")
} else {
trafos[["logshiftopt"]] <- trafos[["logshiftopt"]]
}
# Square-root shift
if ("sqrtshift_lr" %in% names(lambdaranges)) {
try(trafos[["sqrtshift"]] <- sqrtshift(object = object, method = method,
plotit = FALSE,
lambdarange = lambdaranges[["sqrtshift_lr"]]),
silent = TRUE)
} else if (!("sqrtshift_lr" %in% names(lambdaranges))) {
try(trafos[["sqrtshift"]] <- sqrtshift(object = object, method = method,
plotit = FALSE), silent = TRUE)
}
if (is.null(trafos[["sqrtshift"]])) {
warning("The lambarange is not suitable for the Square-root shift
transformation and this data. If the Square-root shift transformation is
supposed to be considered in the fast check, please add an argument
sqrtshift_lr which is a vector of two elements that define the lower and
the upper bound for the lambdarange, e.g. sqrtshift_lr = c(-10,10).")
} else {
trafos[["sqrtshift"]] <- trafos[["sqrtshift"]]
}
# Yeo-Johnson
if ("yeojohnson_lr" %in% names(lambdaranges)) {
try(trafos[["yeojohnson"]] <- yeojohnson(object = object, method = method,
plotit = FALSE,
lambdarange = lambdaranges[["yeojohnson_lr"]]),
silent = TRUE)
} else if (!("yeojohnson_lr" %in% names(lambdaranges))) {
try(trafos[["yeojohnson"]] <- yeojohnson(object = object, method = method,
plotit = FALSE), silent = TRUE)
}
if (is.null(trafos[["yeojohnson"]])) {
warning("The lambarange is not suitable for the Yeo-Johnson
transformation and this data. If the Yeo-Johnson transformation is
supposed to be considered in the fast check, please add an argument
yeojohnson_lr which is a vector of two elements that define the lower and
the upper bound for the lambdarange, e.g. yeojohnson_lr = c(-2,2).")
} else {
trafos[["yeojohnson"]] <- trafos[["yeojohnson"]]
}
trafos[["log"]] <- logtrafo(object = object)
trafos[["reciprocal"]] <- reciprocal(object = object)
trafos[["neglog"]] <- neglog(object = object)
trafos[["glog"]] <- glog(object = object)
# Get residuals
resid_orig <- residuals(object, level = 0, type = "pearson")
trafo_mod <- list()
resid <- list()
shapiroEst <- list()
shapiroP <- list()
skewness_trafo <- list()
kurtosis_trafo <- list()
breusch_pagan <- list()
scatter_trafos <- list()
for (transform in names(trafos)) {
trafo_mod[[transform]] <- get_modelt(object = object,
trans_mod = trafos[[transform]],
std = std)
resid[[transform]] <- residuals(trafo_mod[[transform]], level = 0,
type = "pearson")
if (length(resid[[transform]]) > 3 & length(resid[[transform]]) < 5000) {
# Original model or first transformed
shapiroEst_orig <- shapiro.test(resid_orig)$statistic[[1]]
shapiroP_orig <- shapiro.test(resid_orig)$p.value[[1]]
shapiroEst[[transform]] <- shapiro.test(resid[[transform]])$statistic[[1]]
shapiroP[[transform]] <- shapiro.test(resid[[transform]])$p.value[[1]]
namesNorm <- c("Skewness", "Kurtosis", "Shapiro_W", "Shapiro_p")
} else {
warning_AD <- "Number of domains exceeds 5000 or is lower then 3 and thus the
Shapiro-Wilk test is not applicable for residuals. Instead,
the results of the Anderson-Darling test is used."
shapiroEst_orig <- ad.test(resid_orig)$statistic[[1]]
shapiroP_orig <- ad.test(resid_orig)$p.value[[1]]
shapiroEst[[transform]] <- ad.test(resid[[transform]])$statistic[[1]]
shapiroP[[transform]] <- ad.test(resid[[transform]])$p.value[[1]]
}
skewness_orig <- skewness(resid_orig)
kurtosis_orig <- kurtosis(resid_orig)
skewness_trafo[[transform]] <- skewness(resid[[transform]])
kurtosis_trafo[[transform]] <- kurtosis(resid[[transform]])
breusch_pagan_orig <- bptest(formula(object$terms),
data = object$model)
breusch_pagan[[transform]] <- bptest(formula(trafo_mod[[transform]]$terms),
data = trafo_mod[[transform]]$model)
}
norm_resid <- data.frame(Skewness = round(skewness_orig, 4),
Kurtosis = round(kurtosis_orig, 4),
Shapiro_W = round(shapiroEst_orig,4),
Shapiro_p = round(shapiroP_orig,4))
hetero <- data.frame(BreuschPagan_V = round(breusch_pagan_orig$statistic, 4),
BreuschPagan_p = round(breusch_pagan_orig$p.value, 4))
for (transform in names(trafos)) {
norm_resid <- rbind(norm_resid,
data.frame(Skewness = round(skewness_trafo[[transform]], 4),
Kurtosis = round(kurtosis_trafo[[transform]], 4),
Shapiro_W = round(shapiroEst[[transform]],4),
Shapiro_p = round(shapiroP[[transform]], 4)))
hetero <- rbind(hetero,
data.frame(BreuschPagan_V = round(breusch_pagan[[transform]]$statistic, 4),
BreuschPagan_p = round(breusch_pagan[[transform]]$p.value, 4)))
}
rownames(norm_resid) <- c("untransformed", names(trafos))
rownames(hetero) <- c("untransformed", names(trafos))
norm_resid <- norm_resid[order(norm_resid$Shapiro_p, decreasing = TRUE),]
hetero <- hetero[order(hetero$BreuschPagan_p, decreasing = TRUE),]
cat("Test normality assumption \n")
print(norm_resid)
cat("\n")
cat("Test homoscedasticity assumption \n")
print(hetero)
cat("\n")
cat("Test linearity assumption \n")
cat("Press [enter] to continue")
line <- readline()
pairs(formula(object$terms), data = object$model, main = "Untransformed model",
lower.panel = panel.smooth, upper.panel = panel.cor)
for (transform in names(trafos)) {
cat("Press [enter] to continue")
line <- readline()
pairs(formula(trafo_mod[[transform]]$terms),
data = trafo_mod[[transform]]$model,
main = transform,
lower.panel = panel.smooth, upper.panel = panel.cor)
}
if (!(length(resid[[transform]]) > 3 & length(resid[[transform]]) < 5000)) {
warning(warning_AD)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.