R/quantify_numeric.R

#' @title Quantify Numeric Variable
#'
#' @description This function allows you to calculate mean and sd for a numeric variable
#' @param covariate Numeric variable of interest
#' @param df Dataset containing covariate
#' @param grouping_var Variable to group by (will be columns of table)
#' @param num_display How should results be displayed? ('PM' for mean +- sd, 'PRS' for mean (sd))
#' @param show_pval Logical.  Should the p-value results be displayed?
#' @param digits Number of digits to round decimals to
#' @export
#' @import dplyr
#' @importFrom rlang abort eval_tidy expr warn
#' @importFrom stats anova lm
#' @importFrom tidyr spread
#' @return A data frame summarizing mean/sd of covariate at each level of grouping variable
#' @examples \dontrun{
#' quantify_numeric(covariate = age, df = obpv_baseline, grouping_var = obpv_quintile, digits = 1)
#' }
#'



quantify_numeric <- function(covariate, df, grouping_var, num_display = 'PM', show_pval = TRUE, digits = 1){

  grouping_var <- dplyr::enquo(grouping_var)
  covariate <- dplyr::enquo(covariate)
  cov_name <- dplyr::quo_name(covariate)

  # Filter out NA's, produce warning

  fil_df <- df %>%
    dplyr::select(!!grouping_var, !!covariate) %>%
    dplyr::filter(!is.na(!!covariate))

  num_na <- nrow(df) - nrow(fil_df)

  if(num_na > 0){
    rlang::warn(paste0("There were ", num_na, " NA's removed for ", rlang::eval_tidy(rlang::expr(!!cov_name))))
  }

  # Get p-value from ANOVA test (this is t-test if only two groups)

  pval <- rlang::eval_tidy(

    rlang::expr(stats::anova(stats::lm(!!covariate ~ as.factor(!!grouping_var)))$'Pr(>F)'[1]),

    data = fil_df)

  # Create * flag for p-values < 0.05 and say name of test

  significance <- dplyr::if_else(pval < 0.05, '*', '')
  test <- 'T-test/ANOVA'
  p_value <- format.pval(pval, digits = 2, eps = 0.001, nsmall = 3)
  pv <- cbind(p_value, significance, test)

  # Calculate mean and sd

  if (toupper(num_display) == 'PM' | is.null(num_display)){
    res <- fil_df %>%
      dplyr::group_by(!!grouping_var) %>%
      dplyr::summarize(!!cov_name := paste0(format(round(mean(!!covariate), digits), nsmall = digits),
                                            " \u00B1 ",
                                            format(round(sd(!!covariate), digits), nsmall = digits))) %>%
      tidyr::spread(!!grouping_var, !!cov_name)
  } else if(toupper(num_display) == 'PRS') {
    res <- fil_df %>%
      dplyr::group_by(!!grouping_var) %>%
      dplyr::summarize(!!cov_name := paste0(format(round(mean(!!covariate), digits), nsmall = digits),
                                            " (",
                                            format(round(sd(!!covariate), digits), nsmall = digits),
                                            ")")) %>%
      tidyr::spread(!!grouping_var, !!cov_name)
  } else {
   rlang::abort('Incorrect specification for numeric display.')
  }

  # Combine results

  var <- cov_name

  if (show_pval == TRUE){
    res <- cbind(var, res, pv, stringsAsFactors = FALSE)
  } else {
    res <- cbind(var, res, stringsAsFactors = FALSE)
  }
  invisible(res)
}
jjwillard/wfbmcphsr documentation built on May 14, 2019, 5:01 a.m.