epicoda: Supports epidemiological analyses using compositional exposure variables

Documented in forest_plot_comp

#' Produce a forest plot of model predictions
#'
#' This function takes a named list of compositions, and plots model predictions for each composition. Please note that with the default \code{terms = TRUE} predictions are relative to at the (compositional) mean composition in the data used to create the model. Please note that if there is missing data in the original data (e.g. in covariates or outcomes) this may not be the same as the compositional mean in the original data.
#'
#' @param composition_list Named list of compositions. Note each composition should be stored as a data frame. For example, use the output of \code{change_composition}.
#' @param models_list If \code{model} is \code{NULL} (or not set), a named list of models for which to plot model predictions in the forest plot. All models should have the same type.
#' @param x_label x-axis label.
#' @param xllimit Minimum value for x-axis.
#' @param xulimit Maximum value for x-axis.
#' @param text_settings An optional argument to specify text formatting. It should be an \code{fpTxtGp} object (\code{forestplot} package).
#' @param plot_log If this is \code{TRUE}, the x-axis will be log-transformed.
#' @param boxsize Sets the size of boxes for estimates on the forest plot.
#' @param pred_name Name for column of predictions in plot.
#' @inheritParams predict_fit_and_ci
#' @inheritDotParams forestplot::forestplot
#' @return Forest plot showing model predictions.
#' @examples
#' # Example using a list of models
#'   # First we set up composition list
#' df <- as.data.frame(comp_mean(data = simdata,
#'                              comp_labels = c("vigorous", "moderate",
#'                              "light", "sedentary", "sleep"),
#'                              units = "hr/day", rounded_zeroes = FALSE))
#' new_comp <- change_composition(composition = df, main_part = "moderate",
#'                              main_change = +0.5,
#'                              comp_labels = c("vigorous", "moderate",
#'                              "light", "sedentary", "sleep"))
#' new_comp2 <- change_composition(composition = df, main_part = "sedentary",
#'                              main_change = -3.5,
#'                              comp_labels = c("vigorous", "moderate",
#'                              "light", "sedentary", "sleep"))
#' list_for_plot <- list("Extra 0.5 hr/day moderate" = new_comp,
#'                       "3.5 hr/day less sedentary" = new_comp2)
#'
#'  # Then calculate models
#' lm_BMI_unadjusted <- comp_model(type = "linear", outcome = "BMI",
#'                                 data = simdata,
#'                                 comp_labels = c("vigorous", "moderate",
#'                                  "light", "sedentary", "sleep"), rounded_zeroes = FALSE)
#' lm_BMI_age_group_only <- comp_model(type = "linear", outcome = "BMI",
#'                                     covariates = c("agegroup"), data = simdata,
#'                                     comp_labels = c("vigorous", "moderate",
#'                                      "light", "sedentary", "sleep"), rounded_zeroes = FALSE)
#'
#'   # Finally, plot
#' forest_plot_comp(composition_list = list_for_plot,
#'                  models_list = list("Unadjusted" = lm_BMI_unadjusted,
#'                  "Age-adjusted" = lm_BMI_age_group_only),
#'                  comp_labels = c("vigorous", "moderate",
#'                  "light", "sedentary", "sleep"))
#' @export
forest_plot_comp <-
  function(composition_list,
           model = NULL ,
           models_list = NULL,
           comp_labels,
           x_label = NULL,
           xllimit = NULL,
           xulimit = NULL,
           plot_log = FALSE,
           text_settings = NULL,
           pred_name = NULL,
           boxsize = 0.05,
           terms = TRUE,
           units = "unitless",
           specified_units = NULL,
           part_1 = NULL,
           fixed_values = NULL,
           ...) {
    # Check list
    if (!is.list(composition_list)) {
      stop('`composition_list` should be a list.')
    }

    # Process column label
    if (is.null(pred_name)) {
      pred_name <- "Model prediction (95% CI)"
    }

    # Process formatting
    if (is.null(text_settings)) {
      text_settings <- forestplot::fpTxtGp(
        label = grid::gpar(
          fontfamily = "sans",
          cex = 1,
          fontface = 2
        ),
        xlab = grid::gpar(
          fontfamily = "sans",
          cex = 1,
          fontface = 2
        ),
        ticks = grid::gpar(cex = 0.75, fontface = 2)
      )
    }
    # Process model
    if (!is.null(model)) {
      type <- process_model_type(model)
      # We prompt consideration of a log scale for ORs/HRs
      prompt_scale(model = model,
                   terms = terms,
                   plot_log = plot_log)
    }
    if (is.null(model)) {
      type <- process_model_type(models_list[[1]])

      n_model1 <- nrow(stats::model.frame(models_list[[1]]))
      # Check all models have same type and roughly that on same data frame [not exact, just check of number of rows]
      for (i in 2:length(models_list)) {
        if (process_model_type(models_list[[i]]) != type) {
          stop("Not all models are of the same type.")
        }

        if (nrow(stats::model.frame(models_list[[i]])) != n_model1) {
          stop(
            "Models appear to be calculated on different datasets. This is currently not supported: the current defaults mean the comparator for different models would be different in different datasets, leading to confusing or uninterpretable results. See the vignette for more details."
          )
        }
      }

      # We prompt consideration of a log scale for ORs/HRs
      prompt_scale(model = models_list[[1]],
                   terms = terms,
                   plot_log = plot_log)

    }




    # Process label
    x_label <-
      process_axis_label(label = x_label,
                         type = type,
                         terms = terms)


    if (terms) {
      if (type == "cox" | type == "logistic") {
        vline_loc <- 1
      }
      if (type == "linear") {
        vline_loc <- 0
      }
    }
    if (!terms) {
      vline_loc <- NA
    }

    col_of_names <- names(composition_list)
    df <- data.table::rbindlist(composition_list, use.names = TRUE)

    if (!is.null(model)) {
      dNew <- predict_fit_and_ci(
        model = model,
        new_data = df,
        comp_labels = comp_labels,
        terms = terms,
        part_1 = part_1,
        units = units,
        specified_units = specified_units,
        fixed_values = fixed_values
      )



      if (is.null(xllimit)) {
        xllimit <- min(dNew$lower_CI)
      }
      if (is.null(xulimit)) {
        xulimit <- max(dNew$upper_CI)
      }

      if (terms) {
        xllimit <- min(xllimit, vline_loc)
        xulimit <- max(xulimit, vline_loc)
      }

      if (((xulimit - xllimit) / 0.05) <= 10) {
        req_seq <-
          seq(floor((xllimit - 0.05) / 0.05) * 0.05, ceiling((xulimit + 0.05) / 0.05) *
                0.05, by = 0.05)
        req_seq_labs <- formatC(req_seq, format = "f", digits = 2)
      }
      if ((((xulimit - xllimit) / 0.05) > 10) &
          (((xulimit - xllimit) / 0.05) <= 20)) {
        req_seq <-
          seq(floor((xllimit - 0.1) / 0.1) * 0.1, ceiling((xulimit + 0.1) / 0.1) *
                0.1, by = 0.1)
        req_seq_labs <- formatC(req_seq, format = "f", digits = 1)
      }
      if ((((xulimit - xllimit) / 0.05) > 20)) {
        req_seq <-
          seq(floor((xllimit - 0.5) / 0.5) * 0.5, ceiling((xulimit + 0.5) / 0.5) *
                0.5, by = 0.5)
        req_seq_labs <- formatC(req_seq, format = "f", digits = 1)
      }

      attr(req_seq, "labels") <- req_seq_labs


      data_frame_for_forest_plot <-
        dNew[, c("fit", "lower_CI", "upper_CI")]
      colnames(data_frame_for_forest_plot) <-
        c("coef", "low", "high")

      data_frame_for_forest_plot <-
        rbind(data.frame(
          "coef" = c(NA, vline_loc),
          "low" = c(NA, vline_loc),
          "high" = c(NA, vline_loc)
        ),
        data_frame_for_forest_plot)




      text_col <-
        paste(
          format(
            round(data_frame_for_forest_plot$coef , digits = 2),
            nsmall = 2
          ),
          " (",
          format(round(
            data_frame_for_forest_plot$low, digits = 2
          ), nsmall = 2),
          ", ",
          format(
            round(data_frame_for_forest_plot$high, digits = 2),
            nsmall = 2
          ),
          ")",
          sep = ""
        )
      if (terms) {
        tabletext <-
          cbind(
            c(NA, "REFERENCE: At compositional mean", col_of_names),
            c(pred_name, vline_loc, text_col[3:nrow(data_frame_for_forest_plot)])
          )
      }
      if (!(terms)) {
        tabletext <-
          cbind(c(NA, NA, col_of_names),
                c(pred_name, vline_loc, text_col[3:nrow(data_frame_for_forest_plot)]))
      }

      fp <- forestplot::forestplot(
        tabletext,
        graph.pos = 2,
        data_frame_for_forest_plot,
        xlog = plot_log,
        clr.line = "black",
        clip = c(xllimit, xulimit),
        xticks = req_seq,
        xlab = x_label,
        zero = vline_loc,
        txt_gp = text_settings,
        boxsize = boxsize,
        ...
      )
    }

    if (is.null(model)) {
      if (is.null(models_list)) {
        return(" Either model or models_list must be set.")
      }
      data_frame_for_forest_plot <-
        data.frame("coef" = c(),
                   "low" = c(),
                   "high" = c())
      for (i in 1:nrow(df)) {
        coef <- c()
        low <- c()
        high <- c()
        for (j in 1:length(models_list)) {
          dPred <- predict_fit_and_ci(
            model = models_list[[j]],
            new_data = df[i, ],
            fixed_values = fixed_values,
            part_1 = part_1,
            comp_labels = comp_labels,
            units = units,
            specified_units = specified_units,
            terms = terms
          )
          coef <- cbind(coef, dPred$fit)
          low <- cbind(low, dPred$lower_CI)
          high <- cbind(high, dPred$upper_CI)

        }
        data_frame_for_forest_plot <-
          rbind(data_frame_for_forest_plot,
                data.frame(
                  "coef" = coef,
                  "low" = low,
                  "high" = high
                ))

      }

      if (is.null(xllimit)) {
        xllimit <- min(data_frame_for_forest_plot)
      }
      if (is.null(xulimit)) {
        xulimit <- max(data_frame_for_forest_plot)
      }


      if (terms) {
        xllimit <- min(xllimit, vline_loc)
        xulimit <- max(xulimit, vline_loc)
      }

      if (((xulimit - xllimit) / 0.05) <= 10) {
        req_seq <-
          seq(floor((xllimit - 0.05) / 0.05) * 0.05, ceiling((xulimit + 0.05) / 0.05) *
                0.05, by = 0.05)
        req_seq_labs <- formatC(req_seq, format = "f", digits = 2)
      }
      if ((((xulimit - xllimit) / 0.05) > 10) &
          (((xulimit - xllimit) / 0.05) <= 20)) {
        req_seq <-
          seq(floor((xllimit - 0.1) / 0.1) * 0.1, ceiling((xulimit + 0.1) / 0.1) *
                0.1, by = 0.1)
        req_seq_labs <- formatC(req_seq, format = "f", digits = 1)
      }
      if ((((xulimit - xllimit) / 0.05) > 20)) {
        req_seq <-
          seq(floor((xllimit - 0.5) / 0.5) * 0.5, ceiling((xulimit + 0.5) / 0.5) *
                0.5, by = 0.5)
        req_seq_labs <- formatC(req_seq, format = "f", digits = 1)
      }

      attr(req_seq, "labels") <- req_seq_labs

      data_frame_for_forest_plot <-
        rbind(vline_loc, data_frame_for_forest_plot)

      if (terms) {
        tabletext <-
          cbind(c("REFERENCE: At compositional mean", col_of_names))
      }
      if (!(terms)) {
        tabletext <- cbind(c(NA, col_of_names))
      }

      col_vec <-
        grDevices::hcl.colors(n = length(models_list), palette = "dark2")
      fp <- forestplot::forestplot(
        tabletext,
        graph.pos = 2,
        mean = data_frame_for_forest_plot[, colnames(data_frame_for_forest_plot)[grepl("coef", colnames(data_frame_for_forest_plot))]],
        lower = data_frame_for_forest_plot[, colnames(data_frame_for_forest_plot)[grepl("low", colnames(data_frame_for_forest_plot))]],
        upper = data_frame_for_forest_plot[, colnames(data_frame_for_forest_plot)[grepl("high", colnames(data_frame_for_forest_plot))]],
        xlog = plot_log,
        clip = c(xllimit - 0.05, xulimit + 0.05),
        xticks = req_seq,
        xlab = x_label,
        zero = vline_loc,
        txt_gp = text_settings,
        legend = names(models_list),
        col = forestplot::fpColors(
          box = col_vec,
          line = col_vec,
          zero = "black"
        ),
        boxsize = boxsize,
        ...
      )
    }

    return(fp)
  }
OxWearables/epicoda documentation built on Dec. 7, 2022, 9:07 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
OxWearables/epicoda
Supports epidemiological analyses using compositional exposure variables

R/forest_plot_examples.R
In OxWearables/epicoda: Supports epidemiological analyses using compositional exposure variables

Defines functions forest_plot_comp

Documented in forest_plot_comp

R Package Documentation

Browse R Packages

We want your feedback!

OxWearables/epicoda Supports epidemiological analyses using compositional exposure variables

R/forest_plot_examples.R In OxWearables/epicoda: Supports epidemiological analyses using compositional exposure variables

Defines functions forest_plot_comp

Documented in forest_plot_comp

R Package Documentation

Browse R Packages

We want your feedback!

OxWearables/epicoda
Supports epidemiological analyses using compositional exposure variables

R/forest_plot_examples.R
In OxWearables/epicoda: Supports epidemiological analyses using compositional exposure variables