Nothing
#' @title Summary of mean values by group
#' @name means_by_group
#'
#' @description Computes mean, sd and se for each sub-group (indicated by \code{grp})
#' of \code{dv}.
#'
#' @param x A (grouped) data frame.
#' @param dv Name of the dependent variable, for which the mean value, grouped
#' by \code{grp}, is computed.
#' @param grp Factor with the cross-classifying variable, where \code{dv} is
#' grouped into the categories represented by \code{grp}. Numeric vectors
#' are coerced to factors.
#' @param weights Name of variable in \code{x} that indicated the vector of
#' weights that will be applied to weight all observations. Default is
#' \code{NULL}, so no weights are used.
#' @param digits Numeric, amount of digits after decimal point when rounding
#' estimates and values.
#' @param file Destination file, if the output should be saved as file.
#' Only used when \code{out} is not \code{"txt"}.
#' @param encoding Character vector, indicating the charset encoding used
#' for variable and value labels. Default is \code{"UTF-8"}. Only used
#' when \code{out} is not \code{"txt"}.
#' @param out Character vector, indicating whether the results should be printed
#' to console (\code{out = "txt"}) or as HTML-table in the viewer-pane
#' (\code{out = "viewer"}) or browser (\code{out = "browser"}), of if the
#' results should be plotted (\code{out = "plot"}, only applies to certain
#' functions). May be abbreviated.
#'
#' @return For non-grouped data frames, \code{means_by_group()} returns a data frame with
#' following columns: \code{term}, \code{mean}, \code{N}, \code{std.dev},
#' \code{std.error} and \code{p.value}. For grouped data frames, returns
#' a list of such data frames.
#'
#' @details This function performs a One-Way-Anova with \code{dv} as dependent
#' and \code{grp} as independent variable, by calling
#' \code{lm(count ~ as.factor(grp))}. Then \code{\link[emmeans]{contrast}}
#' is called to get p-values for each sub-group. P-values indicate whether
#' each group-mean is significantly different from the total mean.
#'
#' @examples
#' data(efc)
#' means_by_group(efc, c12hour, e42dep)
#'
#' data(iris)
#' means_by_group(iris, Sepal.Width, Species)
#'
#' # also works for grouped data frames
#' if (require("dplyr")) {
#' efc %>%
#' group_by(c172code) %>%
#' means_by_group(c12hour, e42dep)
#' }
#'
#' # weighting
#' efc$weight <- abs(rnorm(n = nrow(efc), mean = 1, sd = .5))
#' means_by_group(efc, c12hour, e42dep, weights = weight)
#' @importFrom sjlabelled get_label drop_labels get_labels
#' @importFrom stats lm na.omit sd weighted.mean
#' @importFrom purrr map_chr map_df
#' @importFrom sjmisc to_value is_empty
#' @importFrom rlang enquo .data quo_name
#' @export
means_by_group <- function(x,
dv,
grp,
weights = NULL,
digits = 2,
out = c("txt", "viewer", "browser"),
encoding = "UTF-8",
file = NULL) {
out <- match.arg(out)
if (out != "txt" && !requireNamespace("sjPlot", quietly = TRUE)) {
message("Package `sjPlot` needs to be loaded to print HTML tables.")
out <- "txt"
}
# create quosures
grp.name <- rlang::quo_name(rlang::enquo(grp))
dv.name <- rlang::quo_name(rlang::enquo(dv))
# weights need extra checking, might be NULL
if (!missing(weights)) {
.weights <- try(rlang::quo_name(rlang::enquo(weights)), silent = TRUE)
if (inherits(.weights, "try-error")) .weights <- NULL
w.string <- try(eval(weights), silent = TRUE)
if (!inherits(w.string, "try-error") && !is.null(w.string) && is.character(w.string)) .weights <- w.string
if (sjmisc::is_empty(.weights) || .weights == "NULL") .weights <- NULL
} else
.weights <- NULL
# create string with variable names
vars <- c(grp.name, dv.name, .weights)
# get data
x <- suppressMessages(dplyr::select(x, !! vars))
# set value and row labels
varGrpLabel <- sjlabelled::get_label(x[[grp.name]], def.value = grp.name)
varCountLabel <- sjlabelled::get_label(x[[dv.name]], def.value = dv.name)
# first, drop unused labels
x[[grp.name]] <- sjlabelled::drop_labels(x[[grp.name]], drop.na = TRUE)
# now get valid value labels
value.labels <- sjlabelled::get_labels(
x[[grp.name]], attr.only = F, values = "n", non.labelled = TRUE
)
# return values
dataframes <- list()
# do we have a grouped data frame?
if (inherits(x, "grouped_df")) {
# get grouped data
grps <- get_grouped_data(x)
# now plot everything
for (i in seq_len(nrow(grps))) {
# copy back labels to grouped data frame
tmp <- sjlabelled::copy_labels(grps$data[[i]], x)
# get grouped means table
dummy <- means_by_group_helper(
x = tmp,
dv = dv.name,
grp = grp.name,
weight.by = .weights,
value.labels = value.labels,
varCountLabel = varCountLabel,
varGrpLabel = varGrpLabel
)
attr(dummy, "group") <- get_grouped_title(x, grps, i, sep = "\n")
# save data frame for return value
dataframes[[length(dataframes) + 1]] <- dummy
}
# add class-attr for print-method()
if (out == "txt")
class(dataframes) <- c("sj_grpmeans", "list")
else
class(dataframes) <- c("sjt_grpmeans", "list")
} else {
dataframes <- means_by_group_helper(
x = x,
dv = dv.name,
grp = grp.name,
weight.by = .weights,
value.labels = value.labels,
varCountLabel = varCountLabel,
varGrpLabel = varGrpLabel
)
# add class-attr for print-method()
if (out == "txt")
class(dataframes) <- c("sj_grpmean", class(dataframes))
else
class(dataframes) <- c("sjt_grpmean", class(dataframes))
}
# save how to print output
attr(dataframes, "print") <- out
attr(dataframes, "encoding") <- encoding
attr(dataframes, "file") <- file
attr(dataframes, "digits") <- digits
dataframes
}
#' @importFrom stats pf lm weighted.mean na.omit sd
#' @importFrom sjmisc to_value add_variables
#' @importFrom emmeans emmeans contrast
#' @importFrom dplyr pull select n_distinct
#' @importFrom purrr map_chr
#' @importFrom rlang .data
means_by_group_helper <- function(x, dv, grp, weight.by, value.labels, varCountLabel, varGrpLabel) {
# copy vectors from data frame
dv <- x[[dv]]
grp <- x[[grp]]
if (!is.null(weight.by))
weight.by <- x[[weight.by]]
else
weight.by <- 1
# convert values to numeric
dv <- sjmisc::to_value(dv)
# create data frame, for emmeans
mydf <- stats::na.omit(data.frame(
dv = dv,
grp = as.factor(grp),
weight.by = weight.by
))
# compute anova statistics for mean table
fit <- stats::lm(dv ~ grp, weights = weight.by, data = mydf)
# p-values of contrast-means
means.p <- fit %>%
emmeans::emmeans(specs = "grp") %>%
emmeans::contrast(method = "eff") %>%
summary() %>%
dplyr::pull("p.value")
## TODO
# efc %>%
# group_by(c172code, c161sex) %>%
# means_by_group(c12hour, e42dep)
# check if value labels length matches group count
if (dplyr::n_distinct(mydf$grp) != length(value.labels)) {
# get unique factor levels and check if these are numeric.
# if so, we match the values from value labels and the remaining
# factor levels, so we get the correct value labels for printing
nl <- unique(mydf$grp)
if (sjmisc::is_num_fac(nl))
value.labels <- value.labels[names(value.labels) %in% levels(nl)]
else
value.labels <- nl
}
# create summary
dat <- mydf %>%
dplyr::group_by(.data$grp) %>%
summarise(
mean = stats::weighted.mean(.data$dv, w = .data$weight.by, na.rm = TRUE),
N = round(sum(.data$weight.by)),
std.dev = weighted_sd(.data$dv, .data$weight.by),
std.error = weighted_se(.data$dv, .data$weight.by)
) %>%
mutate(p.value = means.p) %>%
dplyr::select(-.data$grp)
# finally, add total-row
dat <- dplyr::bind_rows(
dat,
data_frame(
mean = stats::weighted.mean(mydf$dv, w = mydf$weight.by, na.rm = TRUE),
N = nrow(mydf),
std.dev = weighted_sd(mydf$dv, mydf$weight.by),
std.error = weighted_se(mydf$dv, mydf$weight.by),
p.value = NA
)
)
# add row labels
dat <- sjmisc::add_variables(
dat,
term = c(unname(value.labels), "Total"),
.after = -1
)
# get anova statistics for mean table
sum.fit <- summary(fit)
# r-squared values
r2 <- sum.fit$r.squared
r2.adj <- sum.fit$adj.r.squared
# F-statistics
fstat <- sum.fit$fstatistic
pval <- stats::pf(fstat[1], fstat[2], fstat[3], lower.tail = F)
# copy as attributes
attr(dat, "r2") <- r2
attr(dat, "adj.r2") <- r2.adj
attr(dat, "fstat") <- fstat[1]
attr(dat, "p.value") <- pval
attr(dat, "dv.label") <- varCountLabel
attr(dat, "grp.label") <- varGrpLabel
dat
}
get_grouped_title <- function(x, grps, i, sep = "\n") {
# create title for first grouping level
tp <- get_title_part(x, grps, 1, i)
title <- sprintf("%s: %s", tp[1], tp[2])
# do we have another groupng variable?
if (length(dplyr::group_vars(x)) > 1) {
tp <- get_title_part(x, grps, 2, i)
title <- sprintf("%s%s%s: %s", title, sep, tp[1], tp[2])
}
# return title
title
}
get_title_part <- function(x, grps, level, i) {
# prepare title for group
var.name <- colnames(grps)[level]
# get values from value labels
vals <- sjlabelled::get_values(x[[var.name]])
# if we have no value labels, get values directly
if (is.null(vals)) {
vals <- unique(x[[var.name]])
lab.pos <- i
} else {
# find position of value labels for current group
lab.pos <- which(vals == grps[[var.name]][i])
}
# get variable and value labels
t1 <- sjlabelled::get_label(x[[var.name]], def.value = var.name)
t2 <- sjlabelled::get_labels(x[[var.name]])[lab.pos]
# if we have no value label, use value instead
if (is.null(t2)) t2 <- vals[lab.pos]
# generate title
c(t1, t2)
}
#' @rdname means_by_group
#' @export
grpmean <- means_by_group
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.