R/plot_functions.R

Defines functions bar_error strip_error hist_norm qq_plot box_plot bland_altman multiple gf_star axis_labs

Documented in axis_labs bar_error bland_altman box_plot gf_star hist_norm multiple qq_plot strip_error

# Functions for Plotting

#' Apply labels from variables to axis-labels in plots.
#'
#' \code{axis_labs} takes labels from labelled data to use them as axis-labels for plots generated by \code{gformula} or \code{ggplot2}.
#'
#' @param object ggplot2 object (see examples).
#' @details This functions is helpful when data has been already labelled by \code{sjlabelled}. It retrives variable labels and use them for plotting.
#' @return A ggplot2 object.
#' @examples
#' data(kfm, package = "ISwR")
#' require(sjlabelled, quietly = TRUE)
#' kfm <- kfm |>
#'   var_labels(
#'     dl.milk = "Breast-milk intake (dl/day)",
#'     sex = "Sex",
#'     weight = "Child weight (kg)",
#'     ml.suppl = "Milk substitute (ml/day)",
#'     mat.weight = "Maternal weight (kg)",
#'     mat.height = "Maternal height (cm)"
#'   )
#'
#' kfm |>
#'   gf_point(weight ~ dl.milk) |>
#'   gf_lm(col = 2, interval = "confidence", col = 2) |>
#'   axis_labs()
#'
#' kfm |>
#'   box_plot(dl.milk ~ sex, fill = "thistle", alpha = 0.8) |>
#'   axis_labs() |>
#'   gf_star(x1 = 1, y1 = 10.9, x2 = 2, y2 = 11, y3 = 11.2)
#' @export
axis_labs <- function(object) {
  dt <- object$data
  y <- ifelse(is.null(object$labels$sample), object$labels$y, object$labels$sample)
  x <- object$labels$x
  outcome <- dt[[y]]
  exposure <- dt[[x]]
  yl <- ifelse(is.null(sjlabelled::get_label(outcome)), y,
    sjlabelled::get_label(outcome)
  )
  xl <- ifelse(is.null(sjlabelled::get_label(exposure)), x,
    sjlabelled::get_label(exposure)
  )
  object |>
    ggformula::gf_labs(x = xl, y = yl)
}

#' Annotating a plot to display differences between groups.
#'
#' \code{gf_star} Is a function used to display differences between groups (see details).
#'
#' @param fig A gformula object.
#' @param x1 Position in \code{x} for the start of the horizontal line.
#' @param y1 Position in \code{y} for the start of the vertical line, below to the horizontal line.
#' @param x2 Position in \code{x} for the end of the horizontal line.
#' @param y2 Position in \code{y} where the horizontal line is drawn.
#' @param y3 Position in \code{y} where the text is added.
#' @param legend Character text used for annotating the plot.
#' @param ... Additional information passed to \code{\link[ggformula]{gf_text}}.
#' @details This function draws an horizontal line from coordinate (\code{x1}, \code{y2})
#' to coordinate (\code{x2}, \code{y2}). Draws vertical lines below the horizontal line,
#' towards data, from (\code{x1}, \code{y1}) to (\code{x1}, \code{y2}) and from
#' (\code{x2}, \code{y1}) to (\code{x2}, \code{y2}). Finally, adds
#' text above the horizontal line, at the mid point between \code{x1} and \code{x2}.
#' See examples.
#' @examples
#' data(kfm, package = "ISwR")
#' require(sjlabelled, quietly = TRUE)
#' kfm <- kfm |>
#'   var_labels(
#'     dl.milk = "Breast-milk intake (dl/day)",
#'     sex = "Sex",
#'     weight = "Child weight (kg)",
#'     ml.suppl = "Milk substitute (ml/day)",
#'     mat.weight = "Maternal weight (kg)",
#'     mat.height = "Maternal height (cm)"
#'   )
#'
#' kfm |>
#'   box_plot(dl.milk ~ sex, fill = "thistle", alpha = 0.8) |>
#'   gf_star(x1 = 1, y1 = 10.9, x2 = 2, y2 = 11, y3 = 11.2)
#'
#' kfm |>
#'   box_plot(dl.milk ~ sex, fill = "thistle", alpha = 0.8) |>
#'   gf_star(1, 10.9, 2, 11, 11.4, legend = "p = 0.035", size = 2.5)
#'
#' data(energy, package = "ISwR")
#' energy <- energy |>
#'   var_labels(
#'     expend = "Energy expenditure (MJ/day)",
#'     stature = "Stature"
#'   )
#'
#' energy |>
#'   strip_error(expend ~ stature, col = "red") |>
#'   gf_star(1, 13, 2, 13.2, 13.4, "**")
#' @export
gf_star <- function(fig, x1, y1, x2, y2, y3, legend = "*", ...) {
  dt <- data.frame(x1, y1, x2, y2, mid = mean(c(x1, x2)), y3)
  fig |>
    ggformula::gf_segment(y2 + y2 ~ x1 + x2, data = dt, inherit = FALSE) |>
    ggformula::gf_segment(y1 + y2 ~ x1 + x1, data = dt, inherit = FALSE) |>
    ggformula::gf_segment(y1 + y2 ~ x2 + x2, data = dt, inherit = FALSE) |>
    ggformula::gf_text(y3 ~ mid, data = dt, label = legend, inherit = FALSE, ...)
}

#' Multiple comparisons with plot.
#'
#' \code{multiple} displays results from post-doc analysis and constructs corresponding plot.
#'
#' @param model A fitted model supported by \code{emmeans}, such as the result of a call to \code{aov}, \code{lm}, \code{glm}, etc.
#' @param formula A formula with shape: \code{~ y} or \code{~ y|x} (for interactions). Where \code{y} is the term of the model on which comparisons are made and \code{x} is a term interacting with \code{y}.
#' @param adjust Method to adjust CIs and p-values (see details).
#' @param type Type of prediction  (matching "linear.predictor", "link", or "response").
#' @param reverse Logical argument. Determines the direction of comparisons.
#' @param level Confidence interval significance level.
#' @param digits Number of digits for rounding (default = 2).
#' @param ... Further arguments passed to \code{\link[emmeans]{emmeans}}.
#' @details
#' The default adjusting method is "mvt" which uses the multivariate t distribution.
#' Other options are: "bonferroni", "holm", "hochberg", "tukey" and "none". The default option for argument \code{reverse} is to make reverse comparisons, i.e., against the reference level matching comparisons from \code{lm} and \code{glm}.
#' @return A list with objects: \code{df} A data frame with adjusted p-values, \code{fig_ci} a plot with estimates and adjusted confidence intervals and \code{fig_pval} a plot comparing adjusted p-values.
#' @seealso \code{\link[emmeans]{emmeans}}, \code{\link[emmeans]{pwpp}}.
#' @examples
#' data(birthwt, package = "MASS")
#' birthwt$race <- factor(birthwt$race, labels = c("White", "African American", "Other"))
#'
#' model_1 <- aov(bwt ~ race, data = birthwt)
#' multiple(model_1, ~race)$df
#'
#' multiple(model_1, ~race)$fig_ci |>
#'   gf_labs(y = "Race", x = "Difference in birth weights (g)")
#'
#' multiple(model_1, ~race)$fig_pval |>
#'   gf_labs(y = "Race")
#' @export
multiple <- function(model, formula, adjust = "mvt", type = "response",
                     reverse = TRUE, level = 0.95, digits = 2, ...) {
  term_emm <- emmeans::emmeans(model, formula, type = type, ...)
  emm_df <- as.data.frame(pairs(term_emm, adjust = adjust, reverse = reverse))
  emm_ci <- as.data.frame(confint(pairs(term_emm, adjust = adjust, reverse = reverse), level = level))
  log10_pval <- log10(emm_df[["p.value"]])
  emm_df$p.value <- round_pval(emm_df[["p.value"]])
  emm_df <- emm_df |>
    dplyr::select(-df)
  emm_ci <- emm_ci |>
    dplyr::select(-df)
  vars <- all.vars(formula)
  n <- length(vars)
  if (n == 1) {
    names(emm_ci)[4:5] <- c("lower.CL", "upper.CL")
  } else {
    names(emm_ci)[5:6] <- c("lower.CL", "upper.CL")
  }
  emm_df <- cbind(emm_df, lower.CL = emm_ci[["lower.CL"]], upper.CL = emm_ci[["upper.CL"]])
  emm_df <- sjmisc::round_num(emm_df, digits = digits)
  emm_plot <- emm_df
  emm_plot$p_val <- log10_pval
  if (n == 1) {
    names(emm_plot)[2] <- "Effect"
    if (names(emm_df)[2] == "estimate") {
      fig_ci <- emm_plot |>
        ggformula::gf_pointrange(contrast ~ Effect + lower.CL + upper.CL, col = ~log10_pval, ylab = " ") |>
        ggformula::gf_vline(xintercept = ~0, lty = 2, col = "indianred")
    } else {
      fig_ci <- emm_plot |>
        ggformula::gf_pointrange(contrast ~ Effect + lower.CL + upper.CL, col = ~log10_pval, ylab = " ") |>
        ggformula::gf_vline(xintercept = ~1, lty = 2, col = "indianred")
    }
  } else {
    names(emm_plot)[3] <- "Effect"
    names(emm_plot)[2] <- "confounder"
    if (names(emm_df)[3] == "estimate") {
      fig_ci <- emm_plot |>
        ggformula::gf_pointrange(contrast ~ Effect + lower.CL + upper.CL | confounder, col = ~log10_pval, ylab = " ") |>
        ggformula::gf_vline(xintercept = ~0, lty = 2, col = "indianred")
    } else {
      fig_ci <- emm_plot |>
        ggformula::gf_pointrange(contrast ~ Effect + lower.CL + upper.CL | confounder, col = ~log10_pval, ylab = " ") |>
        ggformula::gf_vline(xintercept = ~1, lty = 2, col = "indianred")
    }
  }
  fig_pval <- emmeans::pwpp(term_emm, adjust = adjust)
  res <- list(df = emm_df, fig_ci = fig_ci, fig_pval = fig_pval)
}

#' Bland-Altman agreement plots.
#'
#' @details \code{bland_altman} constructs Bland-Altman agreement plots.
#' @details Variables in \code{formula} are continuous paired observations. When the distribution of the outcome
#' is not normal, but becomes normal with a log-transformation, \code{bland_altman} can plot the ratio between
#' outcomes (difference in the log scale) by using option \code{transform = TRUE}.
#'
#' @param object When chaining, this holds an object produced in the earlier portions of the chain. Most users can safely ignore this argument. See details and examples.
#' @param formula A formula with shape: y ~ x (see details).
#' @param data A data frame where the variables in the \code{formula} can be found.
#' @param pch Symbol for plotting data.
#' @param size Size of the symbol using to plot data.
#' @param col Colour used for the symbol to plot data.
#' @param transform Logical, should ratios instead of difference be used to construct the plot?
#' @param ... Further arguments passed to \code{\link[ggformula]{gf_point}}.
#' @examples
#' data(wright, package = "ISwR")
#'
#' wright |>
#'   bland_altman(mini.wright ~ std.wright,
#'     pch = 16,
#'     ylab = "Large-mini expiratory flow rate (l/min)",
#'     xlab = "Mean expiratory flow rate (l/min)"
#'   ) |>
#'   gf_labs(
#'     y = "Large-mini expiratory flow rate (l/min)",
#'     x = "Mean expiratory flow rate (l/min)"
#'   )
#'
#' data(Sharples)
#'
#' Sharples |>
#'   bland_altman(srweight ~ weight, transform = TRUE) |>
#'   gf_labs(x = "Mean of weights (kg)", y = "Measured weight / Self-reported weight")
#' @export
bland_altman <- function(object = NULL, formula = NULL, data = NULL,
                         pch = 20, size = 1, col = "black", transform = FALSE, ...) {
  if (inherits(object, "formula")) {
    formula <- object
    object <- NULL
  }
  if (inherits(object, "data.frame")) {
    data <- object
    object <- NULL
  }
  vars <- all.vars(formula)
  resp <- vars[1]
  expl <- vars[2]
  diff <- data[[expl]] - data[[resp]]
  ratio <- data[[expl]] / data[[resp]]
  avg <- apply(cbind(data[[expl]], data[[resp]]), 1, mean, na.rm = TRUE)
  df <- data.frame(Difference = diff, Ratio = ratio, Average = avg)
  lw1 <- as.numeric(reference_range(
    mean(diff, na.rm = TRUE),
    sd(diff, na.rm = TRUE)
  ))[1]
  up1 <- as.numeric(reference_range(
    mean(diff, na.rm = TRUE),
    sd(diff, na.rm = TRUE)
  ))[2]
  lw2 <- as.numeric(reference_range(
    mean(ratio, na.rm = TRUE),
    sd(ratio, na.rm = TRUE)
  ))[1]
  up2 <- as.numeric(reference_range(
    mean(ratio, na.rm = TRUE),
    sd(ratio, na.rm = TRUE)
  ))[2]
  if (transform == TRUE) {
    ggformula::gf_point(Ratio ~ Average,
      data = df, pch = pch,
      size = size, col = col, ...
    ) |>
      ggformula::gf_hline(yintercept = ~ mean(Ratio), col = "indianred3") |>
      ggformula::gf_hline(yintercept = ~lw2, col = "indianred3", lty = 2) |>
      ggformula::gf_hline(yintercept = ~up2, col = "indianred3", lty = 2)
  } else {
    ggformula::gf_point(Difference ~ Average,
      data = df, pch = pch,
      size = size, col = col, ...
    ) |>
      ggformula::gf_hline(yintercept = ~ mean(Difference), col = "indianred3") |>
      ggformula::gf_hline(yintercept = ~lw1, col = "indianred3", lty = 2) |>
      ggformula::gf_hline(yintercept = ~up1, col = "indianred3", lty = 2)
  }
}


#' Construct box plots.
#'
#' \code{box_plot} is a wrap function that calls \code{gf_boxplot} to construct more aesthetic box plots.
#' @param object When chaining, this holds an object produced in the earlier portions of the chain. Most users can safely ignore this argument. See details and examples.
#' @param formula A formula with shape: \code{y ~ x} where \code{y} is a numerical variable and \code{x} is a factor.
#' @param data A data frame where the variables in the \code{formula} can be found.
#' @param fill Colour used for the box passed to \code{\link[ggformula]{gf_boxplot}}.
#' @param alpha Opacity (0 = invisible, 1 = opaque).
#' @param outlier.shape Shape (\code{pch}) used as symbol for the outliers.
#' @param outlier.size Size of the outlier symbol.
#' @param ... Further arguments passed to \code{\link[ggformula]{gf_boxplot}}.
#' @examples
#' data(kfm, package = "ISwR")
#' require(sjlabelled, quietly = TRUE)
#' kfm <- kfm |>
#'   var_labels(
#'     dl.milk = "Breast-milk intake (dl/day)",
#'     sex = "Sex",
#'     weight = "Child weight (kg)",
#'     ml.suppl = "Milk substitute (ml/day)",
#'     mat.weight = "Maternal weight (kg)",
#'     mat.height = "Maternal height (cm)"
#'   )
#'
#' kfm |>
#'   box_plot(dl.milk ~ sex, fill = "thistle", alpha = 0.8)
#'
#' t.test(dl.milk ~ sex, data = kfm)
#'
#' kfm |>
#'   box_plot(dl.milk ~ sex, fill = "thistle", alpha = 0.8) |>
#'   gf_star(1, 10.9, 2, 11, 11.4, legend = "p = 0.035", size = 2.5)
#' @export
box_plot <- function(object = NULL, formula = NULL, data = NULL,
                     fill = "indianred3", alpha = 0.7,
                     outlier.shape = 20, outlier.size = 1, ...) {
  if (inherits(object, "formula")) {
    formula <- object
    object <- NULL
  }
  if (inherits(object, "data.frame")) {
    data <- object
    object <- NULL
  }
  vars <- all.vars(formula)
  y <- vars[1]
  x <- vars[2]
  outcome <- data[[y]]
  exposure <- data[[x]]
  ggformula::gf_boxplot(formula,
    data = data, fill = fill, alpha = alpha,
    outlier.shape = outlier.shape,
    outlier.size = outlier.size, ...
  )
}

#' Quantile-quantile plots against the standard Normal distribution.
#'
#' \code{qq_plot} constructs quantile-quantile plots against the standard normal distribution
#' (also known as quantile-normal plots).
#' @param object When chaining, this holds an object produced in the earlier portions of the chain. Most users can safely ignore this argument. See details and examples.
#' @param formula A formula with shape: \code{ ~ x} or \code{ ~ x|z} where \code{x} is a numerical variable and \code{z} is a factor.
#' @param data A data frame where the variables in the \code{formula} can be found.
#' @param pch Point character passed to \code{\link[ggformula]{gf_qq}}.
#' @param col Colour of the reference line, passed to \code{\link[ggformula]{gf_line}}.
#' @param ylab Optional character passed as label for the y-axis.
#' @param ... Further arguments passed to \code{\link[ggformula]{gf_qq}}.
#' @examples
#' data(kfm, package = "ISwR")
#' require(sjlabelled, quietly = TRUE)
#' kfm <- kfm |>
#'   var_labels(
#'     dl.milk = "Breast-milk intake (dl/day)",
#'     sex = "Sex",
#'     weight = "Child weight (kg)",
#'     ml.suppl = "Milk substitute (ml/day)",
#'     mat.weight = "Maternal weight (kg)",
#'     mat.height = "Maternal height (cm)"
#'   )
#'
#' kfm |>
#'   qq_plot(~dl.milk)
#'
#' qq_plot(~ dl.milk | sex, data = kfm)
#' @export
qq_plot <- function(object = NULL, formula = NULL, data = NULL, pch = 20,
                    col = "indianred3", ylab = NULL, ...) {
  if (inherits(object, "formula")) {
    formula <- object
    object <- NULL
  }
  if (inherits(object, "data.frame")) {
    data <- object
    object <- NULL
  }
  vars <- all.vars(formula)
  y <- vars[1]
  yl <- ifelse(is.null(ylab), y, ylab)
  ggformula::gf_qq(formula,
    data = data, pch = pch, xlab = "Theoretical quantiles",
    ylab = yl, ...
  ) |>
    ggformula::gf_qqline(col = col, linetype = 1, lwd = 1)
}

#' Histogram with Normal density curve.
#'
#' \code{hist_norm} constructs histograms and adds corresponding Normal density curve.
#'
#' @param object When chaining, this holds an object produced in the earlier portions of the chain. Most users can safely ignore this argument. See details and examples.
#' @param formula A formula with shape: \code{ ~ y} or \code{~ y|x} where \code{y} is a numerical variable and \code{x} is a factor.
#' @param data A data frame where the variables in the \code{formula} can be found.
#' @param bins Number of bins of the histogram.
#' @param fill Colour to fill the bars of the histogram.
#' @param color Colour used for the border of the bars.
#' @param alpha Opacity (0 = invisible, 1 = opaque).
#' @param ... Further arguments passed to \code{\link[ggformula]{gf_dhistogram}}.
#' @examples
#' require(dplyr, quietly = TRUE)
#' require(sjlabelled, quietly = TRUE)
#' data(birthwt, package = "MASS")
#' birthwt <- birthwt |>
#'   mutate(
#'     smoke = factor(smoke, labels = c("Non-smoker", "Smoker")),
#'     Race = factor(race > 1, labels = c("White", "Non-white"))
#'   ) |>
#'   var_labels(
#'     bwt = "Birth weight (g)",
#'     smoke = "Smoking status"
#'   )
#'
#' birthwt |>
#'   hist_norm(~bwt, alpha = 0.7, bins = 20, fill = "cadetblue")
#'
#' birthwt |>
#'   hist_norm(~ bwt | smoke, alpha = 0.7, bins = 20, fill = "cadetblue")
#' @export
hist_norm <- function(object = NULL, formula = NULL, data = NULL,
                      bins = 20, fill = "indianred3",
                      color = "black", alpha = 0.4, ...) {
  if (inherits(object, "formula")) {
    formula <- object
    object <- NULL
  }
  if (inherits(object, "data.frame")) {
    data <- object
    object <- NULL
  }
  vars <- all.vars(formula)
  y <- vars[1]
  outcome <- data[[y]]
  ggformula::gf_dhistogram(formula,
    data = data, fill = fill, bins = bins,
    alpha = alpha, ylab = "Density", color = color, ...
  ) |>
    ggformula::gf_fitdistr(lwd = 0.7)
}
#' Strip plots with error bars.
#'
#' \code{strip_error} constructs strip plots with error bars showing 95% bootstrapped
#' confidence intervals around mean values.
#' @param object When chaining, this holds an object produced in the earlier portions of the chain. Most users can safely ignore this argument. See details and examples.
#' @param formula A formula with shape: \code{y ~ x} or \code{y ~ x|z} where \code{y} is a
#' numerical variable and both \code{x} and \code{z} are factors.
#' @param data A data frame where the variables in the \code{formula} can be found.
#' @param pch Point character passed to \code{\link[ggformula]{gf_point}} or \code{\link[ggformula]{gf_jitter}}.
#' @param size Size of the symbol (\code{pch}) for representing data values.
#' @param alpha Opacity of the symbol (0 = invisible, 1 = opaque).
#' @param col A colour or formula used for mapping colour.
#' @param ... Additional information passed to \code{\link[ggformula]{gf_jitter}} or \code{\link[ggformula]{gf_point}}.
#' @examples
#' data(energy, package = "ISwR")
#' require(sjlabelled, quietly = TRUE)
#' energy <- energy |>
#'   var_labels(
#'     expend = "Energy expenditure (MJ/day)",
#'     stature = "Stature"
#'   )
#'
#' energy |>
#'   strip_error(expend ~ stature, col = "red")
#'
#' t.test(expend ~ stature, data = energy)
#'
#' ## Adding an horizontal line to show significant difference:
#' energy |>
#'   strip_error(expend ~ stature, col = "red") |>
#'   gf_star(1, 13, 2, 13.2, 13.4, "**")
#'
#' data(birthwt, package = "MASS")
#' require(dplyr, quietly = TRUE)
#' birthwt <- birthwt |>
#'   mutate(
#'     smoke = factor(smoke, labels = c("Non-smoker", "Smoker")),
#'     Race = factor(race > 1, labels = c("White", "Non-white"))
#'   ) |>
#'   var_labels(
#'     bwt = "Birth weight (g)",
#'     smoke = "Smoking status"
#'   )
#'
#' birthwt |>
#'   strip_error(bwt ~ smoke | Race, col = "darksalmon")
#'
#' birthwt |>
#'   strip_error(bwt ~ smoke, col = ~Race)
#'
#' birthwt |>
#'   strip_error(bwt ~ smoke, pch = ~Race, col = ~Race)
#'
#' birthwt |>
#'   strip_error(bwt ~ smoke | Race)
#' @export
strip_error <- function(object = NULL, formula = NULL, data = NULL,
                        pch = 20, size = 1, alpha = 0.7, col = "indianred3", ...) {
  if (inherits(object, "formula")) {
    formula <- object
    object <- NULL
  }
  if (inherits(object, "data.frame")) {
    data <- object
    object <- NULL
  }
  if (is.character(col) & is.numeric(pch)) {
    ggformula::gf_jitter(formula,
      data = data, width = 0.1, height = 0, alpha = alpha,
      pch = pch, size = size, col = col, ...
    ) |>
      ggformula::gf_summary(fun.data = "mean_cl_boot", geom = "pointrange", fatten = 1)
  } else {
    ggformula::gf_point(formula,
      data = data, alpha = alpha,
      pch = pch, size = size, col = col,
      position = ggplot2::position_jitterdodge(
        dodge.width = 0.7,
        jitter.width = 0.1
      ), ...
    ) |>
      ggformula::gf_summary(
        fun.data = "mean_cl_boot", geom = "pointrange", fatten = 1,
        position = ggplot2::position_dodge(width = 0.7)
      )
  }
}

#' Bar charts with error bars.
#'
#' \code{bar_error} constructs bar charts in with error bars showing 95% bootstrapped
#' confidence intervals around mean values. High of bars represent mean values.
#' @param object When chaining, this holds an object produced in the earlier portions of the chain. Most users can safely ignore this argument. See details and examples.
#' @param formula A formula with shape: \code{y ~ x} or \code{y ~ x|z} where \code{y} is a
#' numerical variable and both \code{x} and \code{z} are factors.
#' @param data A data frame where the variables in the \code{formula} can be found.
#' @param fill Colour used to fill the bars.
#' @param col Colour used for the borders of the bars.
#' @param alpha Opacity of the colour fill (0 = invisible, 1 = opaque).
#' @param ... Additional information passed to \code{\link[ggformula]{gf_summary}}.
#' @examples
#' require(dplyr, quietly = TRUE)
#' require(sjlabelled, quietly = TRUE)
#' data(birthwt, package = "MASS")
#'
#' birthwt <- birthwt |>
#'   mutate(
#'     smoke = factor(smoke, labels = c("Non-smoker", "Smoker")),
#'     Race = factor(race > 1, labels = c("White", "Non-white"))
#'   ) |>
#'   var_labels(
#'     bwt = "Birth weight (g)",
#'     smoke = "Smoking status"
#'   )
#'
#' birthwt |>
#'   bar_error(bwt ~ smoke, fill = "plum3")
#'
#' birthwt |>
#'   bar_error(bwt ~ smoke | Race, fill = "plum3")
#'
#' birthwt |>
#'   bar_error(bwt ~ smoke, fill = ~Race)
#' @export
bar_error <- function(object = NULL, formula = NULL, data = NULL,
                      fill = "indianred3", col = "black", alpha = 0.7, ...) {
  if (inherits(object, "formula")) {
    formula <- object
    object <- NULL
  }
  if (inherits(object, "data.frame")) {
    data <- object
    object <- NULL
  }
  if (is.character(fill) & is.character(col)) {
    ggformula::gf_summary(formula,
      fun = "mean", data = data, geom = "bar",
      fill = fill, alpha = alpha, col = col, ...
    ) |>
      ggformula::gf_summary(fun.data = "mean_cl_boot", geom = "errorbar", width = 0.3)
  } else {
    ggformula::gf_summary(formula,
      data = data, fill = fill, fun = "mean", geom = "bar",
      alpha = alpha, col = col, position = ggplot2::position_dodge(), ...
    ) |>
      ggformula::gf_summary(
        fun.data = "mean_cl_boot", geom = "errorbar", width = 0.3,
        position = ggplot2::position_dodge(width = 1)
      )
  }
}

#' Generate a data frame with estimate and bootstrap CIs.
#'
#' \code{gen_bst_df} is a function called that generates a data frame with
#' confidence intervals of a continuous variable by levels of one or two categorical ones (factors).
#' @param object When chaining, this holds an object produced in the earlier portions of the chain. Most users can safely ignore this argument. See details and examples.
#' @param formula A formula with shape: \code{y ~ x} or \code{y ~ x|z} where \code{y} is a
#' numerical variable and both \code{x} and \code{z} are factors.
#' @param data A data frame where the variables in the \code{formula} can be found.
#' @param stat Statistic used for \link{bst}.
#' @param ... Passes optional arguments to \link{bst}.
#' @return A data frame with the confidence intervals by level.
#' @examples
#' data(kfm, package = "ISwR")
#' require(sjlabelled, quietly = TRUE)
#' kfm <- kfm |>
#'   var_labels(
#'     dl.milk = "Breast-milk intake (dl/day)",
#'     sex = "Sex",
#'     weight = "Child weight (kg)",
#'     ml.suppl = "Milk substitute (ml/day)",
#'     mat.weight = "Maternal weight (kg)",
#'     mat.height = "Maternal height (cm)"
#'   )
#'
#' kfm |>
#'   gen_bst_df(dl.milk ~ sex)
#'
#' data(birthwt, package = "MASS")
#' require(dplyr, quietly = TRUE)
#' birthwt <- mutate(birthwt,
#'   smoke = factor(smoke, labels = c("Non-smoker", "Smoker")),
#'   Race = factor(race > 1, labels = c("White", "Non-white"))
#' )
#'
#' birthwt <- birthwt |>
#'   var_labels(
#'     bwt = "Birth weight (g)",
#'     smoke = "Smoking status"
#'   )
#'
#' gen_bst_df(bwt ~ smoke | Race, data = birthwt)
#' @export
gen_bst_df = function (object = NULL, formula = NULL, data = NULL, stat = "mean", ...)
{
  if (inherits(object, "formula"))
  {
    formula <- object
    object <- NULL
  }
  if (inherits(object, "data.frame")) {
    data <- object
    object <- NULL
  }
  vars <- all.vars(formula)
  resp <- vars[1]
  expl <- vars[2]
  outcome <- data[[resp]]
  exposure <- data[[expl]]
  nv <- length(vars)
  if (nv == 2) {
    n <- length(levels(exposure))
    first <- tapply(outcome, exposure, bst, stat, ...)
    second <- numeric(n * 3)
    dim(second) <- c(n, 3)
    pos <- c(2, 4, 5)
    for (i in 1:n) second[i, ] <- as.numeric(first[[i]][pos])
    df <- as.data.frame(second)
    names(df) <- c(resp, "LowerCI", "UpperCI")
    final <- data.frame(df, factor(levels(exposure), levels = levels(exposure)))
    resp <- ifelse(is.null(sjlabelled::get_label(outcome)),
                   resp, sjlabelled::get_label(outcome))
    expl <- ifelse(is.null(sjlabelled::get_label(exposure)),
                   expl, sjlabelled::get_label(exposure))
    names(final)[1] <- resp
    names(final)[4] <- expl
    final
  }
  else {
    condit <- vars[3]
    strat <- data[[condit]]
    n1 <- length(levels(exposure))
    n2 <- length(levels(strat))
    n <- n1 * n2
    first <- tapply(outcome, survival::strata(exposure, strat),
                    bst, stat, ...)
    second <- numeric(n * 3)
    dim(second) <- c(n, 3)
    pos <- c(2, 4, 5)
    for (i in 1:n) second[i, ] <- as.numeric(first[[i]][pos])
    df <- as.data.frame(second)
    names(df) <- c(resp, "LowerCI", "UpperCI")
    gp1 <- factor(rep(levels(exposure), each = n2), levels = levels(exposure))
    gp2 <- factor(rep(levels(strat), n1), levels = levels(strat))
    final <- data.frame(df, gp1, gp2)
    resp <- ifelse(is.null(sjlabelled::get_label(outcome)),
                   resp, sjlabelled::get_label(outcome))
    expl <- ifelse(is.null(sjlabelled::get_label(exposure)),
                   expl, sjlabelled::get_label(exposure))
    strat <- ifelse(is.null(sjlabelled::get_label(strat)),
                    expl, sjlabelled::get_label(strat))
    names(final)[1] <- resp
    names(final)[4] <- expl
    names(final)[5] <- condit
    final
  }
}

Try the pubh package in your browser

Any scripts or data that you put into this service are public.

pubh documentation built on Oct. 8, 2024, 9:08 a.m.