Nothing
#' Perform Multiple Comparison Tests on a statistical model
#'
#' A function for comparing and ranking predicted means with Tukey's Honest Significant Difference (HSD) Test.
#'
#' @param model.obj An ASReml-R or aov model object. Will likely also work with `lme` ([nlme::lme()]), `lmerMod` ([lme4::lmer()]) models as well.
#' @param classify Name of predictor variable as string.
#' @param sig The significance level, numeric between 0 and 1. Default is 0.05.
#' @param int.type The type of confidence interval to calculate. One of `ci`, `1se` or `2se`. Default is `ci`.
#' @param trans Transformation that was applied to the response variable. One of `log`, `sqrt`, `logit`, `power` or `inverse`. Default is `NA`.
#' @param offset Numeric offset applied to response variable prior to transformation. Default is `NA`. Use 0 if no offset was applied to the transformed data. See Details for more information.
#' @param power Numeric power applied to response variable with power transformation. Default is `NA`. See Details for more information.
#' @param decimals Controls rounding of decimal places in output. Default is 2 decimal places.
#' @param descending Logical (default `FALSE`). Order of the output sorted by the predicted value. If `TRUE`, largest will be first, through to smallest last.
#' @param plot Automatically produce a plot of the output of the multiple comparison test? Default is `FALSE`. This is maintained for backwards compatibility, but the preferred method now is to use `autoplot(<multiple_comparisons output>)`. See [biometryassist::autoplot.mct()] for more details.
#' @param label_height Height of the text labels above the upper error bar on the plot. Default is 0.1 (10%) of the difference between upper and lower error bars above the top error bar.
#' @param rotation Rotate the text output as Treatments within the plot. Allows for easier reading of long treatment labels. Number between 0 and 360 (inclusive) - default 0
#' @param save Logical (default `FALSE`). Save the predicted values to a csv file?
#' @param savename A file name for the predicted values to be saved to. Default is `predicted_values`.
#' @param order Deprecated. Use `descending` instead.
#' @param pred Deprecated. Use `classify` instead.
#' @param pred.obj Deprecated. Predicted values are calculated within the function from version 1.0.1 onwards.
#' @param ... Other arguments passed through to `predict.asreml()`.
#'
#' @importFrom multcompView multcompLetters
#' @importFrom emmeans emmeans
#' @importFrom stats model.frame predict qtukey qt terms var
#' @importFrom utils packageVersion
#'
#' @details Some transformations require that data has a small offset applied, otherwise it will cause errors (for example taking a log of 0, or square root of negative values). In order to correctly reverse this offset, if the `trans` argument is supplied, an offset value must also be supplied. If there was no offset required for a transformation, then use a value of 0 for the `offset` argument.
#'
#' @return A list containing a data frame with predicted means, standard errors, confidence interval upper and lower bounds, and significant group allocations (named `predicted_values`), as well as a plot visually displaying the predicted values (named `predicted_plot`). If some of the predicted values are aliased, a warning is printed, and the aliased treatment levels are returned in the output (named `aliased`).
#'
#' @references Jørgensen, E. & Pedersen, A. R. (1997). How to Obtain Those Nasty Standard Errors From Transformed Data - and Why They Should Not Be Used. [<https://pure.au.dk/portal/en/publications/how-to-obtain-those-nasty-standard-errors-from-transformed-data--and-why-they-should-not-be-used(d649ca20-d15f-11db-8e26-000ea68e967b).html>](https://pure.au.dk/portal/en/publications/how-to-obtain-those-nasty-standard-errors-from-transformed-data--and-why-they-should-not-be-used(d649ca20-d15f-11db-8e26-000ea68e967b).html)
#'
#' @examples
#' # Fit aov model
#' model <- aov(Petal.Length ~ Species, data = iris)
#'
#' # Display the ANOVA table for the model
#' anova(model)
#'
#' # Determine ranking and groups according to Tukey's Test
#' pred.out <- multiple_comparisons(model, classify = "Species")
#'
#' # Display the predicted values table
#' pred.out
#'
#' # Show the predicted values plot
#' autoplot(pred.out, label_height = 0.5)
#'
#'
#'
#' \dontrun{
#' # ASReml-R Example
#' library(asreml)
#'
#' #Fit ASReml Model
#' model.asr <- asreml(yield ~ Nitrogen + Variety + Nitrogen:Variety,
#' random = ~ Blocks + Blocks:Wplots,
#' residual = ~ units,
#' data = asreml::oats)
#'
#' wald(model.asr) #Nitrogen main effect significant
#'
#' #Determine ranking and groups according to Tukey's Test
#' pred.out <- multiple_comparisons(model.obj = model.asr, classify = "Nitrogen",
#' descending = TRUE, decimals = 5)
#'
#' pred.out
#'
#' # Example using a box-cox transformation
#' set.seed(42) # See the seed for reproducibility
#' resp <- rnorm(n = 50, 5, 1)^3
#' trt <- as.factor(sample(rep(LETTERS[1:10], 5), 50))
#' block <- as.factor(rep(1:5, each = 10))
#' ex_data <- data.frame(resp, trt, block)
#'
#' # Change one treatment random values to get significant difference
#' ex_data$resp[ex_data$trt=="A"] <- rnorm(n = 5, 7, 1)^3
#'
#' model.asr <- asreml(resp ~ trt,
#' random = ~ block,
#' residual = ~ units,
#' data = ex_data)
#'
#' resplot(model.asr)
#'
#' # Perform Box-Cox transformation and get maximum value
#' out <- MASS::boxcox(ex_data$resp~ex_data$trt)
#' out$x[which.max(out$y)] # 0.3838
#'
#' # Fit cube root to the data
#' model.asr <- asreml(resp^(1/3) ~ trt,
#' random = ~ block,
#' residual = ~ units,
#' data = ex_data)
#' resplot(model.asr) # residual plots look much better
#'
#' #Determine ranking and groups according to Tukey's Test
#' pred.out <- multiple_comparisons(model.obj = model.asr,
#' classify = "trt",
#' trans = "power", power = (1/3))
#'
#' pred.out
#' autoplot(pred.out)
#' }
#'
#' @export
#'
multiple_comparisons <- function(model.obj,
classify,
sig = 0.05,
int.type = "ci",
trans = NA,
offset = NA,
power = NA,
decimals = 2,
descending = FALSE,
plot = FALSE,
label_height = 0.1,
rotation = 0,
save = FALSE,
savename = "predicted_values",
order,
pred.obj,
pred,
...) {
rlang::check_dots_used()
if(!missing(pred)) {
warning("Argument `pred` has been deprecated and will be removed in a future version. Please use `classify` instead.")
classify <- pred
}
if(!missing(order)) {
warning("Argument `order` has been deprecated and will be removed in a future version. Please use `descending` instead.")
}
if(sig > 0.5) {
warning("Significance level given by `sig` is high. Perhaps you meant ", 1-sig, "?", call. = FALSE)
}
# Get the individual names provided in classify
vars <- unlist(strsplit(classify, "\\:"))
reserved_col_names <- c("predicted.value", "std.error", "Df",
"groups", "PredictedValue", "ApproxSE", "ci", "low", "up")
if(any(vars %in% reserved_col_names)) {
stop("Invalid column name. Please change the name of column(s): ", vars[vars %in% reserved_col_names])
}
if(inherits(model.obj, "asreml")){
if(classify %!in% c(attr(stats::terms(model.obj$formulae$fixed), 'term.labels'),
attr(stats::terms(model.obj$formulae$random), 'term.labels'))) {
stop(classify, " is not a term in the model. Please check model specification.", call. = FALSE)
}
if(!missing(pred.obj)) {
warning("Argument `pred.obj` has been deprecated and will be removed in a future version. Predictions are now performed internally in the function.")
}
pred.obj <- quiet(asreml::predict.asreml(model.obj, classify = classify, sed = TRUE, trace = FALSE, ...))
# Check if all the predicted values are NA. If so, suggests the need of the `present` argument
if(all(is.na(pred.obj$pvals$predicted.value)) & all(is.na(pred.obj$pvals$std.error))) {
stop("All predicted values are aliased. Perhaps you need the `present` argument?")
}
# Check if any treatments are aliased, and remove them and print a warning
if(anyNA(pred.obj$pvals$predicted.value)) {
aliased <- which(is.na(pred.obj$pvals$predicted.value))
# Get the level values of the aliased treatments
# If only one treatment (classify does not contain :) all levels printed separated with ,
# If multiple treatments, first need to concatenate columns, then collapse rows
aliased_names <- pred.obj$pvals[aliased, !names(pred.obj$pvals) %in% c("predicted.value", "std.error", "status")]
# This pastes rows of the dataframe together across the columns, and turns into a vector
if(is.data.frame(aliased_names)) {
aliased_names <- apply(aliased_names, 1, paste, collapse = ":")
}
if(length(aliased_names) > 1) {
warn_string <- paste0("Some levels of ", classify, " are aliased. They have been removed from predicted output.\n Aliased levels are: ", paste(aliased_names, collapse = ", "), ".\n These levels are saved in the output object.")
}
else {
warn_string <- paste0("A level of ", classify, " is aliased. It has been removed from predicted output.\n Aliased level is: ", aliased_names, ".\n This level is saved as an attribute of the output object.")
}
pred.obj$pvals <- pred.obj$pvals[!is.na(pred.obj$pvals$predicted.value),]
pred.obj$pvals <- droplevels(pred.obj$pvals)
pred.obj$sed <- pred.obj$sed[-aliased, -aliased]
warning(warn_string, call. = FALSE)
}
#For use with asreml 4+
if(utils::packageVersion("asreml") > "4") {
pp <- pred.obj$pvals
sed <- pred.obj$sed
}
pp <- pp[!is.na(pp$predicted.value),]
pp$status <- NULL
dat.ww <- quiet(asreml::wald(model.obj, ssType = "conditional", denDF = "default", trace = FALSE)$Wald)
dendf <- data.frame(Source = row.names(dat.ww), denDF = dat.ww$denDF)
ifelse(grepl(":", classify),
pp$Names <- apply(pp[,vars], 1, paste, collapse = "_"),
pp$Names <- pp[[classify]])
ndf <- dendf$denDF[grepl(classify, dendf$Source) & nchar(classify) == nchar(as.character(dendf$Source))]
if(rlang::is_empty(ndf)) {
ndf <- model.obj$nedf
rand_terms <- vars[vars %in% attr(stats::terms(model.obj$formulae$random), 'term.labels')]
warning(rand_terms, " is not a fixed term in the model. The denominator degrees of freedom are estimated using the residual degrees of freedom. This may be inaccurate.", call. = FALSE)
}
crit.val <- 1/sqrt(2)*stats::qtukey((1-sig), nrow(pp), ndf)*sed
# Grab the response from the formula to create plot Y label
ylab <- model.obj$formulae$fixed[[2]]
}
else if(inherits(model.obj, c("aov", "lm", "lmerMod", "lmerModLmerTest"))) {
if(classify %!in% attr(stats::terms(model.obj), 'term.labels')) {
stop(classify, " is not a term in the model. Please check model specification.", call. = FALSE)
}
on.exit(options(emmeans = emmeans::emm_defaults))
emmeans::emm_options("msg.interaction" = FALSE, "msg.nesting" = FALSE)
pred.out <- emmeans::emmeans(model.obj, as.formula(paste("~", classify)))
sed <- pred.out@misc$sigma*sqrt(outer(1/pred.out@grid$.wgt., 1/pred.out@grid$.wgt., "+"))
pred.out <- as.data.frame(pred.out)
pred.out <- pred.out[,!grepl("CL", names(pred.out))]
pp <- pred.out
names(pp)[names(pp) == "emmean"] <- "predicted.value"
names(pp)[names(pp) == "SE"] <- "std.error"
# SED <- matrix(data = sed, nrow = nrow(pp), ncol = nrow(pp))
diag(sed) <- NA
ifelse(grepl(":", classify),
pp$Names <- apply(pp[,vars], 1, paste, collapse = "_"),
pp$Names <- pp[[classify]])
if(anyNA(pp$predicted.value)) {
aliased <- which(is.na(pp$predicted.value))
# Get the level values of the aliased treatments
# If only one treatment (classify does not contain :) all levels printed separated with ,
# If multiple treatments, first need to concatenate columns, then collapse rows
aliased_names <- pp[aliased, !names(pp) %in% c("predicted.value", "std.error", "df", "Names")]
# This pastes rows of the dataframe together across the columns, and turns into a vector
if(is.data.frame(aliased_names)) {
aliased_names <- apply(aliased_names, 1, paste, collapse = ":")
}
if(length(aliased_names) > 1) {
warn_string <- paste0("Some levels of ", classify, " are aliased. They have been removed from predicted output.\n Aliased levels are: ", paste(aliased_names, collapse = ", "), ".\n These levels are saved in the output object.")
}
else {
warn_string <- paste0("A level of ", classify, " is aliased. It has been removed from predicted output.\n Aliased level is: ", aliased_names, ".\n This level is saved as an attribute of the output object.")
}
pp <- pp[!is.na(pp$predicted.value),]
pp <- droplevels(pp)
sed <- sed[-aliased, -aliased]
warning(warn_string, call. = FALSE)
}
ndf <- pp$df[1]
crit.val <- 1/sqrt(2)*stats::qtukey((1-sig), nrow(pp), ndf)*sed
# Grab the response from the formula to create plot Y label
if(inherits(model.obj, c("lmerMod", "lmerModLmerTest"))) {
ylab <- model.obj@call[[2]][[2]]
}
else {
ylab <- model.obj$terms[[2]]
}
}
else {
stop("Models of type ", class(model.obj), " are not supported.")
}
# Check that the predicted levels don't contain a dash -, if they do replace and display warning
if(any(any(grepl("-", pp$Names) | grepl("-", pp[,1])))) {
levs <- unique(c(grep("-", pp[,1], value = TRUE), grep("-", pp$Names, value = TRUE)))
if(length(levs)>1) {
warning("The treatment levels ", paste(levs, collapse = ", "), " contained '-', which has been replaced in the final output with '_'")
}
else {
warning("The treatment level ", levs, " contained '-', which has been replaced in the final output with '_'")
}
pp[,1] <- gsub(pattern = "-", replacement = "_", pp[,1])
pp$Names <- gsub(pattern = "-", replacement = "_", pp$Names)
}
Names <- as.character(pp$Names)
# Determine pairs that are significantly different
diffs <- abs(outer(pp$predicted.value, pp$predicted.value, "-")) > crit.val
# (diffs*sqrt(2))/sed # check against ptukey (use top/bottom triangle)
diffs <- diffs[lower.tri(diffs)]
# Create a vector of treatment comparison names
m <- outer(pp$Names, pp$Names, paste, sep="-")
m <- m[lower.tri(m)]
names(diffs) <- m
ll <- multcompView::multcompLetters3("Names", "predicted.value", diffs, pp, reversed = !descending)
rr <- data.frame(groups = ll$Letters)
rr$Names <- row.names(rr)
pp.tab <- merge(pp,rr)
if(!is.na(trans)){
if(is.na(offset)) {
warning("Offset value assumed to be 0. Change with `offset` argument.")
offset <- 0
}
if(trans == "sqrt"){
pp.tab$PredictedValue <- (pp.tab$predicted.value)^2 - ifelse(!is.na(offset), offset, 0)
pp.tab$ApproxSE <- 2*abs(pp.tab$std.error)*sqrt(pp.tab$PredictedValue)
if(int.type == "ci"){
pp.tab$ci <- stats::qt(p = sig, ndf, lower.tail = FALSE) * pp.tab$std.error
}
if(int.type == "1se"){
pp.tab$ci <- pp.tab$std.error
}
if(int.type == "2se"){
pp.tab$ci <- 2*pp.tab$std.error
}
pp.tab$low <- (pp.tab$predicted.value - pp.tab$ci)^2 - ifelse(!is.na(offset), offset, 0)
pp.tab$up <- (pp.tab$predicted.value + pp.tab$ci)^2 - ifelse(!is.na(offset), offset, 0)
}
if(trans == "log"){
pp.tab$PredictedValue <- exp(pp.tab$predicted.value) - ifelse(!is.na(offset), offset, 0)
pp.tab$ApproxSE <- abs(pp.tab$std.error)*pp.tab$PredictedValue
if(int.type == "ci"){
pp.tab$ci <- stats::qt(p = sig, ndf, lower.tail = FALSE) * pp.tab$std.error
}
if(int.type == "1se"){
pp.tab$ci <- pp.tab$std.error
}
if(int.type == "2se"){
pp.tab$ci <- 2*pp.tab$std.error
}
pp.tab$low <- exp(pp.tab$predicted.value - pp.tab$ci) - ifelse(!is.na(offset), offset, 0)
pp.tab$up <- exp(pp.tab$predicted.value + pp.tab$ci) - ifelse(!is.na(offset), offset, 0)
}
if(trans == "logit"){
pp.tab$PredictedValue <- exp(pp.tab$predicted.value)/(1 + exp(pp.tab$predicted.value))
pp.tab$ApproxSE <- pp.tab$PredictedValue * (1 - pp.tab$PredictedValue)* abs(pp.tab$std.error)
if(int.type == "ci"){
pp.tab$ci <- stats::qt(p = sig, ndf, lower.tail = FALSE) * pp.tab$std.error
}
if(int.type == "1se"){
pp.tab$ci <- pp.tab$std.error
}
if(int.type == "2se"){
pp.tab$ci <- 2*pp.tab$std.error
}
pp.tab$ll <- pp.tab$predicted.value - pp.tab$ci
pp.tab$low <- exp(pp.tab$ll)/(1 + exp(pp.tab$ll))
pp.tab$uu <- pp.tab$predicted.value + pp.tab$ci
pp.tab$up <- exp(pp.tab$uu)/(1 + exp(pp.tab$uu))
pp.tab$ll <- NULL
pp.tab$uu <- NULL
}
if(trans == "power"){
pp.tab$PredictedValue <- (pp.tab$predicted.value)^(1/power) - ifelse(!is.na(offset), offset, 0)
pp.tab$ApproxSE <- pp.tab$std.error*(1/(power*pp.tab$PredictedValue^(power-1)))
if(int.type == "ci"){
pp.tab$ci <- stats::qt(p = sig, ndf, lower.tail = FALSE) * pp.tab$std.error
}
if(int.type == "1se"){
pp.tab$ci <- pp.tab$std.error
}
if(int.type == "2se"){
pp.tab$ci <- 2*pp.tab$std.error
}
pp.tab$low <- (pp.tab$predicted.value - pp.tab$ci)^(1/power) - ifelse(!is.na(offset), offset, 0)
pp.tab$up <- (pp.tab$predicted.value + pp.tab$ci)^(1/power) - ifelse(!is.na(offset), offset, 0)
}
if(trans == "inverse"){
pp.tab$PredictedValue <- 1/pp.tab$predicted.value
pp.tab$ApproxSE <- abs(pp.tab$std.error)*pp.tab$PredictedValue^2
if(int.type == "ci"){
pp.tab$ci <- stats::qt(p = sig, ndf, lower.tail = FALSE) * pp.tab$std.error
}
if(int.type == "1se"){
pp.tab$ci <- pp.tab$std.error
}
if(int.type == "2se"){
pp.tab$ci <- 2*pp.tab$std.error
}
pp.tab$low <- 1/(pp.tab$predicted.value - pp.tab$ci)
pp.tab$up <- 1/(pp.tab$predicted.value + pp.tab$ci)
}
}
else {
if(int.type == "ci"){
pp.tab$ci <- stats::qt(p = sig, ndf, lower.tail = FALSE) * pp.tab$std.error
}
if(int.type == "1se"){
pp.tab$ci <- pp.tab$std.error
}
if(int.type == "2se"){
pp.tab$ci <- 2*pp.tab$std.error
}
pp.tab$low <- pp.tab$predicted.value - pp.tab$ci
pp.tab$up <- pp.tab$predicted.value + pp.tab$ci
}
pp.tab <- pp.tab[base::order(pp.tab$predicted.value, decreasing = descending),]
pp.tab$Names <- NULL
trtindex <- max(unlist(lapply(paste0("^", vars, "$"), grep, x = names(pp.tab))))
trtnam <- names(pp.tab)[1:trtindex]
# Exclude reserved column names
trtnam <- trtnam[trtnam %!in% c("predicted.value", "std.error", "Df",
"groups", "PredictedValue", "ApproxSE", "ci", "low", "up")]
for(i in seq_along(trtnam)){
pp.tab[[trtnam[i]]] <- factor(pp.tab[[trtnam[i]]], levels = unique(pp.tab[[trtnam[i]]]))
}
# rounding to the correct number of decimal places
pp.tab <- rapply(object = pp.tab, f = round, classes = "numeric", how = "replace", digits = decimals)
if(save) {
write.csv(pp.tab, file = paste0(savename, ".csv"), row.names = FALSE)
}
# If there are brackets in the label, grab the text from inside
if(is.call(ylab)) {
ylab <- as.character(ylab)[2]
}
attr(pp.tab, "ylab") <- ylab
if(length(vars)>2) {
classify3 <- vars[3]
}
else if(length(vars) > 1) {
classify2 <- vars[2]
classify <- vars[1]
}
class(pp.tab) <- c("mct", class(pp.tab))
if(plot) {
print(autoplot(pp.tab))
}
if(exists("aliased_names")) {
attr(pp.tab, 'aliased') <- as.character(aliased_names)
}
# Add the critical value as an attribute
if(stats::var(as.vector(crit.val), na.rm = TRUE) < 1e-10) {
attr(pp.tab, 'HSD') <- crit.val[1,2]
}
else {
attr(pp.tab, 'HSD') <- crit.val
}
rownames(pp.tab) <- NULL
return(pp.tab)
}
#' Print output of multiple_comparisons
#'
#' @param x An mct object to print to the console.
#' @param ... Other arguments
#'
#' @return The original object invisibly.
#' @seealso [multiple_comparisons()]
#' @method print mct
#' @export
#' @examples
#' dat.aov <- aov(Petal.Width ~ Species, data = iris)
#' output <- multiple_comparisons(dat.aov, classify = "Species")
#' print(output)
print.mct <- function(x, ...) {
stopifnot(inherits(x, "mct"))
print.data.frame(x, ...)
if(!is.null(attr(x, "aliased"))) {
aliased <- attr(x, "aliased")
if(length(aliased) > 1) {
cat("\nAliased levels are:", paste(aliased[1:(length(aliased)-1)], collapse = ", "), "and", aliased[length(aliased)], "\n")
}
else {
cat("\nAliased level is:", aliased, "\n")
}
}
invisible(x)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.