In avallecam/epitidy: What the Package Does (One Line, Title Case)

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.path = "man/figures/README-",
  out.width = "100%",
  echo = TRUE,
  message = FALSE,
  warning = FALSE
)
knitr::opts_chunk$set(fig.width=10, fig.height=4)
options(tidyverse.quiet = TRUE)

epitidy

The goal of epitidy is to provide a workflow to summarise, tidy up model outputs and generate clean tables in a Epidemiologist-way.

Installation

You can install the development version from GitHub with:

if(!require("remotes")) install.packages("remotes")
remotes::install_github("avallecam/epitidy")

Example

This is a basic example which shows you how to solve a common problem:

core functions

epi_tidymodel_*: summmarize core estimates for OR, RR, PR regression models and linear regression coefficients.
epi_tidymodel_up: update raw models to generate various simple models or adjusted by one parsimous model
all of these are based on broom

reproducible workflow

Here we show how to:

import a dataset from the mosaicData R package
clean it to create variables using dplyr::mutate
create a classic table 1 and table 2 with compareGroups::create*
create a null regression model using epitidy::epi_tidymodel_rr
create a simple model by scratch or updating the null model with one variable using epitidy::epi_tidymodel_up
create more than one simple models using purrr::map
create more than one multiple models with one common set of confunders using all tools above
create a final table mixing the simple and multiple regression models because tidyverse
perform a nested selection model procedure with epitidy::epi_tidynested

library(epitidy)
## basic example code

# paquetes ----------------------------------------------------------------

set.seed(33)

library(tidyverse)
library(mosaicData)
# library(avallecam)

# imporat base ------------------------------------------------------------

data("Whickham")
smoke <- Whickham %>% as_tibble()

# limpieza ----------------------------------------------------------------

smoke_clean <- smoke %>%
  mutate(
    #desenlace
    outcome_1=as.numeric(outcome),
    outcome_1=outcome_1-1,
    outcome_2=fct_rev(outcome),
    #exposiciÃ³n
    smoker_2=fct_rev(smoker),
    #confusor
    #agegrp=cut(age,breaks = c(18,44,64,Inf),include.lowest = T))
    agegrp=case_when(
      age %in% 18:44 ~ "18-44",
      age %in% 45:64 ~ "45-64",
      age > 64 ~ "65+"),
    agegrp=as.factor(agegrp),
    random_cov1=rnorm(n = n()),
    random_cov2=rnorm(n = n(),mean = 5,sd = 10),
  )

# table 1 -----------------------------------------------------------------

# outcome_1: 1 is dead
smoke_clean %>%
  mutate(outcome_1=as.factor(outcome_1)) %>%
  compareGroups::compareGroups(~.,data = .) %>%
  compareGroups::createTable()

# table 2 -----------------------------------------------------------------

smoke_clean %>%
  mutate(outcome_1=as.factor(outcome_1)) %>%
  compareGroups::compareGroups(outcome~.,
                               data = .) %>%
  compareGroups::createTable()

# null model --------------------------------------------------------------

smoke_clean %>% pull(outcome_1) %>% mean()

glm_null <- glm(outcome_1 ~ 1,
                data = smoke_clean,
                family = poisson(link = "log"),
                na.action = na.exclude)

glm_null %>% epi_tidymodel_rr()

# one simple model ------------------------------------------------------------

# write all
glm(outcome_1 ~ smoker,
    data = smoke_clean,
    family = poisson(link = "log"),
    na.action = na.exclude) %>%
  epi_tidymodel_rr()

# or just an update
epi_tidymodel_up(reference_model = glm_null,
                 variable = dplyr::sym("smoker")) %>%
  epi_tidymodel_rr()

# more than one simple model ------------------------------------------------------------

simple_models <- smoke_clean %>%
  #transform columnames to tibble
  colnames() %>%
  enframe(name = NULL) %>%
  #remove non required variables
  filter(!magrittr::is_in(value,c("outcome","outcome_1",
                                  "outcome_2","smoker_2"))) %>%
  #purrr::map
  #create symbol, update null model, tidy up the results
  mutate(variable=map(value,dplyr::sym),
         simple_rawm=map(.x = variable, .f = epi_tidymodel_up, reference_model=glm_null),
         simple_tidy=map(.x = simple_rawm, .f = epi_tidymodel_rr)
  ) %>%
  #unnest coefficients
  unnest(cols = c(simple_tidy)) %>%
  #filter out intercepts
  filter(term!="(Intercept)")

simple_models

# multiple model ----------------------------------------------------------

# _ bivariate selection ---------------------------------------------------

# define confounder set
glm_adjusted <- epi_tidymodel_up(reference_model = glm_null,
                                 variable = dplyr::sym("agegrp"))

multiple_model <- simple_models %>%
  #keep variables over a p value threshold
  filter(p.value<0.05) %>%
  #keep those variables
  select(value) %>%
  distinct(.keep_all = T) %>%
  #remove unwanted covariates: e.g. confounder related
  filter(!magrittr::is_in(value,c("agegrp","age"))) %>%
  #add new themaic covariates to evaluate as exposure
  add_row(value="random_cov1") %>% #add one thematic importat covariate
  #purrr::map
  #create symbol, update simple models, tidy up the results
  mutate(variable=map(value,dplyr::sym),
         multiple_rawm=map(variable,epi_tidymodel_up,reference_model=glm_adjusted),
         multiple_tidy=map(multiple_rawm,epi_tidymodel_rr)
  ) %>%
  unnest(cols = c(multiple_tidy)) %>%
  filter(term!="(Intercept)") %>%
  select(-variable,-multiple_rawm) %>%
  #remove confounders from estimated coefficients
  distinct(term,.keep_all = T) %>%
  #CAREFULL!
  #this only remove confunders, requires manual changes!
  slice(-(1:2))

multiple_model

# _ final table -----------------------------------------------------------

simple_models %>%
  select(-variable,-simple_rawm) %>%
  full_join(multiple_model,by = "term",suffix=c(".s",".m")) %>%
  #filter(!is.na(p.value.m)) %>%
  #add to upper rows to add covariate name and reference category
  group_by(value.s) %>%
  nest() %>%
  mutate(data=map(.x = data,
                  .f = ~add_row(.data = .x,
                                term=".ref",
                                .before = 1)),
         data=map(.x = data,
                  .f = ~add_row(.data = .x,
                                term=".name",
                                .before = 1))) %>%
  unnest(cols = c(data)) %>%
  #retire columns
  select(-contains("log.rr"),-contains("se.")) %>%
  # round numeric values
  mutate_at(.vars = vars(rr.s,conf.low.s,conf.high.s,
                         rr.m,conf.low.m,conf.high.m),
            .funs = round, digits=2) %>%
  mutate_at(.vars = vars(p.value.s,p.value.m),
            .funs = round, digits=3) %>%
  #join confidence intervals
  mutate(ci.s=str_c(conf.low.s," - ",conf.high.s),
         ci.m=str_c(conf.low.m," - ",conf.high.m)) %>%
  #remove and reorder columns
  select(starts_with("value"),term,
         starts_with("rr"),starts_with("ci"),starts_with("p.val"),
         -starts_with("conf")) %>%
  select(starts_with("value"),term,ends_with(".s"),ends_with(".m")) %>%
  select(-value.m) %>%
  #add ref to estimates
  mutate(rr.s=if_else(str_detect(term,".ref"),"Ref.",as.character(rr.s)),
         rr.m=if_else(str_detect(term,".ref"),"Ref.",as.character(rr.m))) %>%
  ungroup()


# _ nested selection ------------------------------------------------------

#source: http://www.cookbook-r.com/Formulas/Creating_a_formula_from_a_string/
measurevar <- "outcome"
groupvars  <- smoke_clean %>%
  select_if(.predicate = !magrittr::is_in(x = colnames(.),
                                          table = c("outcome","outcome_1",
                                                    "outcome_2","smoker_2"))) %>%
  colnames()

# This returns the formula:
myformula <- as.formula(paste(measurevar,
                              paste(groupvars, collapse=" + "),
                              sep=" ~ "))

add1(glm_null,
     scope = myformula,
     test = "LRT") %>%
  epi_tidynested(1) #-> rank_l1

add1(update(glm_null, ~ . + age),
     scope = myformula,
     test = "LRT") %>%
  epi_tidynested(2) #-> rank_l2

add1(update(glm_null, ~ . + age + agegrp),
     scope = myformula,
     test = "LRT") %>%
  epi_tidynested(3) #-> rank_l3

glm_nested <- update(glm_null, ~ . + age + agegrp)
glm_nested %>% epi_tidymodel_or()