R/extract_stats.R

Defines functions pool_mean_sd err_msg summarise_glm summarise_lm is.monomorphic is.binary extract_sum_stats getmode

Documented in err_msg extract_sum_stats getmode is.binary is.monomorphic pool_mean_sd summarise_glm summarise_lm

#' Extract mode from a numeric vector
#' 
#' @param v numeric vector
#' @export
#' @return mode of numeric vector
getmode <- function(v) {
    uniqv <- unique(v)
    tabv <- tabulate(match(v, uniqv))
    modev <- uniqv[which(tabv == max(tabv))]
    if (length(modev) > 1) {
        warning("There are multiple modes")
    }
    return(modev)
}

#' Extract summary statistics from numeric vector
#' 
#' @param dat numeric vector
#' @param min extract minimum value
#' @param q1 extract first quartile
#' @param med extract median
#' @param mean extract mean
#' @param q3 extract third quartile
#' @param max extract maximum value
#' @param na extract number of NA values
#' @export
#' @return data.frame of summary stats selected
extract_sum_stats <- function(dat, min = T, q1 = T, med = T, mean = T, q3 = T, max = T, na = T) {
  sum_tab <- data.frame(min = NA, q1 = NA, med = NA, mean = NA, q3 = NA, max = NA, na = NA)

  y <- c(sum(min), sum(q1), sum(med), sum(mean), sum(q3), sum(max), sum(na))
  names(y) <- 1:7
  y <- y[y == 1]

  for (i in names(y)) {
    j <- as.numeric(i)
    sum_tab[1, j] <- summary(dat)[j]
  }
  
  sum_tab <- sum_tab[, !is.na(sum_tab)]

  return(sum_tab)
}

#' Check if a vector is binary
#' 
#' @param v vector
#' @export
#' @return TRUE or FALSE depending on whether the vector is binary
is.binary <- function(v) {
  x <- unique(v)
  length(x) - sum(is.na(x)) == 2L
}

#' Check if a vector is monomorphic
#' 
#' @param v vector
#' @export
#' @return TRUE or FALSE depending on whether the vector is monomorphic
is.monomorphic <- function(v) {
  x <- unique(v)
  length(x) - sum(is.na(x)) == 1L
}


#' Extract summary stats from lm()
#'  
#' @param fit regression output from lm() function
#' @param outcome the outcome variable
#' @param exposure the exposure variable 
#' @export 
#' @return data.frame containing outcomes, estimate, se, p and CIs, residuals and the input
summarise_lm <- function(fit, outcome, exposure) {
  stopifnot(class(fit) == "lm")
  summ <- as.matrix(c(summary(fit)$coef[exposure, ], confint(fit)[exposure, ], summary(fit)$adj.r.squared))
  summ <- as.data.frame(t(summ))
  sum_tab <- dplyr::mutate(summ, outcome = outcome)
  sum_tab <-  dplyr::select(sum_tab, outcome, everything(), -`t value`)

  colnames(sum_tab) <- c("outcome", "estimate", "se", "p", "CI_low", "CI_up", "adj_r2") 
  
  res <- resid(fit)
  covars <- names(fit$coef)[!(names(fit$coef) %in% c("(Intercept)", exposure))]

  out <- list(sum_tab, res, covars, fit)
  names(out) <- c("summary_data", "residuals", "covars", "fit")
  return(out)
}

#' Extract summary stats from glm() when doing logistic regression
#'  
#' @param fit regression output from glm() function
#' @param outcome the outcome variable
#' @param exposure the exposure variable 
#' 
#' @export 
#' @return data.frame containing outcomes, estimate, se, p and CIs, residuals and the input
summarise_glm <- function(fit, outcome, exposure) {
  stopifnot(class(fit)[1] == "glm")
  summ <- as.matrix(c(summary(fit)$coef[exposure, ], confint(fit)[exposure, ], summary(fit)$adj.r.squared))
  summ <- as.data.frame(t(summ))
  sum_tab <- dplyr::mutate(summ, outcome = outcome)
  sum_tab <- dplyr::select(sum_tab, outcome, everything(), -`t value`)

  colnames(sum_tab) <- c("outcome", "estimate", "se", "p", "CI_low", "CI_up") 
  
  res <- resid(fit)
  covars <- names(fit$coef)[!(names(fit$coef) %in% c("(Intercept)", exposure))]

  out <- list(sum_tab, res, covars, fit)
  names(out) <- c("summary_data", "residuals", "covars", "fit")
  return(out)
}

#' Function for dealing with error messages when using tryCatch
#' 
#' @param e error message from function 
#' @param r_msg print the error message given by the function
#' @param user_msg a message given by the user
#' @param to_return what should be returned if there is an error 
#' 
#' @export
#' @return what is chosen by the user to be returned, default = NA 
err_msg <- function(e, r_msg = TRUE, user_msg = NULL, to_return = NA) {
  if (r_msg) print(e)
  if (!is.null(user_msg)) print(user_msg)
  return(to_return)
}


#' Pool mean and SD estimates from two groups or cohorts
#' 
#' @param n1 sample size of group 1
#' @param n2 sample size of group 2
#' @param m1 mean of group 1
#' @param m2 mean of group 2
#' @param s1 SD of group 1
#' @param s2 SD of group 2
#' 
#' @export
#' @return list with pooled mean and SD estimates
pool_mean_sd <- function(n1, n2, m1, m2, s1, s2)
{
  sd_out <- sqrt(((n1-1)*s1*s1 + (n2-1)*s2*s2 + n1 * n2 / (n1 + n2) * (m1*m1 + m2*m2 - 2 * m1 * m2)) / (n1 + n2 -1))
  mean_out <- ((mean(m1) * n1) + (mean(m2) * n2)) / (n1 + n2)
  return(list(sd = sd_out, mean = mean_out))
}
thomasbattram/usefunc documentation built on April 24, 2023, 1:46 p.m.