R/grpmean.R
In sjstats: Collection of Convenient Functions for Common Statistical Computations

Documented in means_by_group

#' @title Summary of mean values by group
#' @name means_by_group
#'
#' @description Computes mean, sd and se for each sub-group (indicated by \code{grp})
#'                of \code{dv}.
#'
#' @param x A (grouped) data frame.
#' @param dv Name of the dependent variable, for which the mean value, grouped
#'   by \code{grp}, is computed.
#' @param grp Factor with the cross-classifying variable, where \code{dv} is
#'   grouped into the categories represented by \code{grp}. Numeric vectors
#'   are coerced to factors.
#' @param weights Name of variable in \code{x} that indicated the vector of
#'   weights that will be applied to weight all observations. Default is
#'   \code{NULL}, so no weights are used.
#' @param digits Numeric, amount of digits after decimal point when rounding
#'   estimates and values.
#' @param file Destination file, if the output should be saved as file.
#'   Only used when \code{out} is not \code{"txt"}.
#' @param encoding Character vector, indicating the charset encoding used
#'   for variable and value labels. Default is \code{"UTF-8"}. Only used
#'   when \code{out} is not \code{"txt"}.
#' @param out Character vector, indicating whether the results should be printed
#'    to console (\code{out = "txt"}) or as HTML-table in the viewer-pane
#'    (\code{out = "viewer"}) or browser (\code{out = "browser"}), of if the
#'    results should be plotted (\code{out = "plot"}, only applies to certain
#'    functions). May be abbreviated.
#'
#' @return For non-grouped data frames, \code{means_by_group()} returns a data frame with
#'   following columns: \code{term}, \code{mean}, \code{N}, \code{std.dev},
#'   \code{std.error} and \code{p.value}. For grouped data frames, returns
#'   a list of such data frames.
#'
#' @details This function performs a One-Way-Anova with \code{dv} as dependent
#'   and \code{grp} as independent variable, by calling
#'   \code{lm(count ~ as.factor(grp))}. Then \code{\link[emmeans]{contrast}}
#'   is called to get p-values for each sub-group. P-values indicate whether
#'   each group-mean is significantly different from the total mean.
#'
#' @examples
#' data(efc)
#' means_by_group(efc, c12hour, e42dep)
#'
#' data(iris)
#' means_by_group(iris, Sepal.Width, Species)
#'
#' # also works for grouped data frames
#' if (require("dplyr")) {
#'   efc %>%
#'     group_by(c172code) %>%
#'     means_by_group(c12hour, e42dep)
#' }
#'
#' # weighting
#' efc$weight <- abs(rnorm(n = nrow(efc), mean = 1, sd = .5))
#' means_by_group(efc, c12hour, e42dep, weights = weight)
#' @importFrom sjlabelled get_label drop_labels get_labels
#' @importFrom stats lm na.omit sd weighted.mean
#' @importFrom purrr map_chr map_df
#' @importFrom sjmisc to_value is_empty
#' @importFrom rlang enquo .data quo_name
#' @export
means_by_group <- function(x,
                          dv,
                          grp,
                          weights = NULL,
                          digits = 2,
                          out = c("txt", "viewer", "browser"),
                          encoding = "UTF-8",
                          file = NULL) {

  out <- match.arg(out)

  if (out != "txt" && !requireNamespace("sjPlot", quietly = TRUE)) {
    message("Package `sjPlot` needs to be loaded to print HTML tables.")
    out <- "txt"
  }

  # create quosures
  grp.name <- rlang::quo_name(rlang::enquo(grp))
  dv.name <- rlang::quo_name(rlang::enquo(dv))

  # weights need extra checking, might be NULL
  if (!missing(weights)) {
    .weights <- try(rlang::quo_name(rlang::enquo(weights)), silent = TRUE)
    if (inherits(.weights, "try-error")) .weights <- NULL

    w.string <- try(eval(weights), silent = TRUE)
    if (!inherits(w.string, "try-error") && !is.null(w.string) && is.character(w.string)) .weights <- w.string

    if (sjmisc::is_empty(.weights) || .weights == "NULL") .weights <- NULL
  } else
    .weights <- NULL


  # create string with variable names
  vars <- c(grp.name, dv.name, .weights)

  # get data
  x <- suppressMessages(dplyr::select(x, !! vars))

  # set value and row labels
  varGrpLabel <- sjlabelled::get_label(x[[grp.name]], def.value = grp.name)
  varCountLabel <- sjlabelled::get_label(x[[dv.name]], def.value = dv.name)

  # first, drop unused labels
  x[[grp.name]] <- sjlabelled::drop_labels(x[[grp.name]], drop.na = TRUE)

  # now get valid value labels
  value.labels <- sjlabelled::get_labels(
    x[[grp.name]], attr.only = F, values = "n", non.labelled = TRUE
  )

  # return values
  dataframes <- list()

  # do we have a grouped data frame?
  if (inherits(x, "grouped_df")) {
    # get grouped data
    grps <- get_grouped_data(x)

    # now plot everything
    for (i in seq_len(nrow(grps))) {
      # copy back labels to grouped data frame
      tmp <- sjlabelled::copy_labels(grps$data[[i]], x)

      # get grouped means table
      dummy <- means_by_group_helper(
        x = tmp,
        dv = dv.name,
        grp = grp.name,
        weight.by = .weights,
        value.labels = value.labels,
        varCountLabel = varCountLabel,
        varGrpLabel = varGrpLabel
      )

      attr(dummy, "group") <- get_grouped_title(x, grps, i, sep = "\n")

      # save data frame for return value
      dataframes[[length(dataframes) + 1]] <- dummy
    }

    # add class-attr for print-method()
    if (out == "txt")
      class(dataframes) <- c("sj_grpmeans", "list")
    else
      class(dataframes) <- c("sjt_grpmeans", "list")

  } else {
    dataframes <- means_by_group_helper(
      x = x,
      dv = dv.name,
      grp = grp.name,
      weight.by = .weights,
      value.labels = value.labels,
      varCountLabel = varCountLabel,
      varGrpLabel = varGrpLabel
    )

    # add class-attr for print-method()
    if (out == "txt")
      class(dataframes) <- c("sj_grpmean", class(dataframes))
    else
      class(dataframes) <- c("sjt_grpmean", class(dataframes))
  }

  # save how to print output
  attr(dataframes, "print") <- out
  attr(dataframes, "encoding") <- encoding
  attr(dataframes, "file") <- file
  attr(dataframes, "digits") <- digits

  dataframes
}


#' @importFrom stats pf lm weighted.mean na.omit sd
#' @importFrom sjmisc to_value add_variables
#' @importFrom emmeans emmeans contrast
#' @importFrom dplyr pull select n_distinct
#' @importFrom purrr map_chr
#' @importFrom rlang .data
means_by_group_helper <- function(x, dv, grp, weight.by, value.labels, varCountLabel, varGrpLabel) {
  # copy vectors from data frame
  dv <- x[[dv]]
  grp <- x[[grp]]

  if (!is.null(weight.by))
    weight.by <- x[[weight.by]]
  else
    weight.by <- 1

  # convert values to numeric
  dv <- sjmisc::to_value(dv)

  # create data frame, for emmeans
  mydf <- stats::na.omit(data.frame(
    dv = dv,
    grp = as.factor(grp),
    weight.by = weight.by
  ))

  # compute anova statistics for mean table
  fit <- stats::lm(dv ~ grp, weights = weight.by, data = mydf)

  # p-values of contrast-means
  means.p <- fit %>%
    emmeans::emmeans(specs = "grp") %>%
    emmeans::contrast(method = "eff") %>%
    summary() %>%
    dplyr::pull("p.value")

  ## TODO
  # efc %>%
  #   group_by(c172code, c161sex) %>%
  #   means_by_group(c12hour, e42dep)


  # check if value labels length matches group count
  if (dplyr::n_distinct(mydf$grp) != length(value.labels)) {
    # get unique factor levels and check if these are numeric.
    # if so, we match the values from value labels and the remaining
    # factor levels, so we get the correct value labels for printing
    nl <- unique(mydf$grp)
    if (sjmisc::is_num_fac(nl))
      value.labels <- value.labels[names(value.labels) %in% levels(nl)]
    else
      value.labels <- nl
  }


  # create summary
  dat <- mydf %>%
    dplyr::group_by(.data$grp) %>%
    summarise(
      mean = stats::weighted.mean(.data$dv, w = .data$weight.by, na.rm = TRUE),
      N = round(sum(.data$weight.by)),
      std.dev = weighted_sd(.data$dv, .data$weight.by),
      std.error = weighted_se(.data$dv, .data$weight.by)
    ) %>%
    mutate(p.value = means.p) %>%
    dplyr::select(-.data$grp)

  # finally, add total-row
  dat <- dplyr::bind_rows(
    dat,
    data_frame(
      mean = stats::weighted.mean(mydf$dv, w = mydf$weight.by, na.rm = TRUE),
      N = nrow(mydf),
      std.dev = weighted_sd(mydf$dv, mydf$weight.by),
      std.error = weighted_se(mydf$dv, mydf$weight.by),
      p.value = NA
    )
  )


  # add row labels
  dat <- sjmisc::add_variables(
    dat,
    term = c(unname(value.labels), "Total"),
    .after = -1
  )


  # get anova statistics for mean table
  sum.fit <- summary(fit)

  # r-squared values
  r2 <- sum.fit$r.squared
  r2.adj <- sum.fit$adj.r.squared

  # F-statistics
  fstat <- sum.fit$fstatistic
  pval <- stats::pf(fstat[1], fstat[2], fstat[3], lower.tail = F)


  # copy as attributes
  attr(dat, "r2") <- r2
  attr(dat, "adj.r2") <- r2.adj
  attr(dat, "fstat") <- fstat[1]
  attr(dat, "p.value") <- pval
  attr(dat, "dv.label") <- varCountLabel
  attr(dat, "grp.label") <- varGrpLabel

  dat
}


get_grouped_title <- function(x, grps, i, sep = "\n") {
  # create title for first grouping level
  tp <- get_title_part(x, grps, 1, i)
  title <- sprintf("%s: %s", tp[1], tp[2])

  # do we have another groupng variable?
  if (length(dplyr::group_vars(x)) > 1) {
    tp <- get_title_part(x, grps, 2, i)
    title <- sprintf("%s%s%s: %s", title, sep, tp[1], tp[2])
  }

  # return title
  title
}


get_title_part <- function(x, grps, level, i) {
  # prepare title for group
  var.name <- colnames(grps)[level]

  # get values from value labels
  vals <- sjlabelled::get_values(x[[var.name]])
  # if we have no value labels, get values directly
  if (is.null(vals)) {
    vals <- unique(x[[var.name]])
    lab.pos <- i
  } else {
    # find position of value labels for current group
    lab.pos <- which(vals == grps[[var.name]][i])
  }

  # get variable and value labels
  t1 <- sjlabelled::get_label(x[[var.name]], def.value = var.name)
  t2 <- sjlabelled::get_labels(x[[var.name]])[lab.pos]

  # if we have no value label, use value instead
  if (is.null(t2)) t2 <- vals[lab.pos]

  # generate title
  c(t1, t2)
}


#' @rdname means_by_group
#' @export
grpmean <- means_by_group

Any scripts or data that you put into this service are public.

sjstats documentation built on Nov. 20, 2022, 1:06 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

sjstats
Collection of Convenient Functions for Common Statistical Computations

R/grpmean.R
In sjstats: Collection of Convenient Functions for Common Statistical Computations

Defines functions get_title_part get_grouped_title means_by_group_helper means_by_group

Documented in means_by_group

Try the sjstats package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

sjstats Collection of Convenient Functions for Common Statistical Computations

R/grpmean.R In sjstats: Collection of Convenient Functions for Common Statistical Computations

Defines functions get_title_part get_grouped_title means_by_group_helper means_by_group

Documented in means_by_group

Try the sjstats package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

sjstats
Collection of Convenient Functions for Common Statistical Computations

R/grpmean.R
In sjstats: Collection of Convenient Functions for Common Statistical Computations