knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.path = "man/figures/README-", out.width = "100%", echo = TRUE, message = FALSE, warning = FALSE ) knitr::opts_chunk$set(fig.width=10, fig.height=4) options(tidyverse.quiet = TRUE)
The goal of epitidy is to provide a workflow to summarise, tidy up model outputs and generate clean tables in a Epidemiologist-way.
You can install the development version from GitHub with:
if(!require("remotes")) install.packages("remotes") remotes::install_github("avallecam/epitidy")
This is a basic example which shows you how to solve a common problem:
epi_tidymodel_*
: summmarize core estimates for OR, RR, PR regression models and linear regression coefficients.epi_tidymodel_up
: update raw models to generate various simple models or adjusted by one parsimous modelbroom
Here we show how to:
mosaicData
R packagedplyr::mutate
compareGroups::create*
epitidy::epi_tidymodel_rr
epitidy::epi_tidymodel_up
purrr::map
tidyverse
epitidy::epi_tidynested
library(epitidy) ## basic example code
# paquetes ---------------------------------------------------------------- set.seed(33) library(tidyverse) library(mosaicData) # library(avallecam) # imporat base ------------------------------------------------------------ data("Whickham") smoke <- Whickham %>% as_tibble() # limpieza ---------------------------------------------------------------- smoke_clean <- smoke %>% mutate( #desenlace outcome_1=as.numeric(outcome), outcome_1=outcome_1-1, outcome_2=fct_rev(outcome), #exposición smoker_2=fct_rev(smoker), #confusor #agegrp=cut(age,breaks = c(18,44,64,Inf),include.lowest = T)) agegrp=case_when( age %in% 18:44 ~ "18-44", age %in% 45:64 ~ "45-64", age > 64 ~ "65+"), agegrp=as.factor(agegrp), random_cov1=rnorm(n = n()), random_cov2=rnorm(n = n(),mean = 5,sd = 10), ) # table 1 ----------------------------------------------------------------- # outcome_1: 1 is dead smoke_clean %>% mutate(outcome_1=as.factor(outcome_1)) %>% compareGroups::compareGroups(~.,data = .) %>% compareGroups::createTable() # table 2 ----------------------------------------------------------------- smoke_clean %>% mutate(outcome_1=as.factor(outcome_1)) %>% compareGroups::compareGroups(outcome~., data = .) %>% compareGroups::createTable() # null model -------------------------------------------------------------- smoke_clean %>% pull(outcome_1) %>% mean() glm_null <- glm(outcome_1 ~ 1, data = smoke_clean, family = poisson(link = "log"), na.action = na.exclude) glm_null %>% epi_tidymodel_rr() # one simple model ------------------------------------------------------------ # write all glm(outcome_1 ~ smoker, data = smoke_clean, family = poisson(link = "log"), na.action = na.exclude) %>% epi_tidymodel_rr() # or just an update epi_tidymodel_up(reference_model = glm_null, variable = dplyr::sym("smoker")) %>% epi_tidymodel_rr() # more than one simple model ------------------------------------------------------------ simple_models <- smoke_clean %>% #transform columnames to tibble colnames() %>% enframe(name = NULL) %>% #remove non required variables filter(!magrittr::is_in(value,c("outcome","outcome_1", "outcome_2","smoker_2"))) %>% #purrr::map #create symbol, update null model, tidy up the results mutate(variable=map(value,dplyr::sym), simple_rawm=map(.x = variable, .f = epi_tidymodel_up, reference_model=glm_null), simple_tidy=map(.x = simple_rawm, .f = epi_tidymodel_rr) ) %>% #unnest coefficients unnest(cols = c(simple_tidy)) %>% #filter out intercepts filter(term!="(Intercept)") simple_models # multiple model ---------------------------------------------------------- # _ bivariate selection --------------------------------------------------- # define confounder set glm_adjusted <- epi_tidymodel_up(reference_model = glm_null, variable = dplyr::sym("agegrp")) multiple_model <- simple_models %>% #keep variables over a p value threshold filter(p.value<0.05) %>% #keep those variables select(value) %>% distinct(.keep_all = T) %>% #remove unwanted covariates: e.g. confounder related filter(!magrittr::is_in(value,c("agegrp","age"))) %>% #add new themaic covariates to evaluate as exposure add_row(value="random_cov1") %>% #add one thematic importat covariate #purrr::map #create symbol, update simple models, tidy up the results mutate(variable=map(value,dplyr::sym), multiple_rawm=map(variable,epi_tidymodel_up,reference_model=glm_adjusted), multiple_tidy=map(multiple_rawm,epi_tidymodel_rr) ) %>% unnest(cols = c(multiple_tidy)) %>% filter(term!="(Intercept)") %>% select(-variable,-multiple_rawm) %>% #remove confounders from estimated coefficients distinct(term,.keep_all = T) %>% #CAREFULL! #this only remove confunders, requires manual changes! slice(-(1:2)) multiple_model # _ final table ----------------------------------------------------------- simple_models %>% select(-variable,-simple_rawm) %>% full_join(multiple_model,by = "term",suffix=c(".s",".m")) %>% #filter(!is.na(p.value.m)) %>% #add to upper rows to add covariate name and reference category group_by(value.s) %>% nest() %>% mutate(data=map(.x = data, .f = ~add_row(.data = .x, term=".ref", .before = 1)), data=map(.x = data, .f = ~add_row(.data = .x, term=".name", .before = 1))) %>% unnest(cols = c(data)) %>% #retire columns select(-contains("log.rr"),-contains("se.")) %>% # round numeric values mutate_at(.vars = vars(rr.s,conf.low.s,conf.high.s, rr.m,conf.low.m,conf.high.m), .funs = round, digits=2) %>% mutate_at(.vars = vars(p.value.s,p.value.m), .funs = round, digits=3) %>% #join confidence intervals mutate(ci.s=str_c(conf.low.s," - ",conf.high.s), ci.m=str_c(conf.low.m," - ",conf.high.m)) %>% #remove and reorder columns select(starts_with("value"),term, starts_with("rr"),starts_with("ci"),starts_with("p.val"), -starts_with("conf")) %>% select(starts_with("value"),term,ends_with(".s"),ends_with(".m")) %>% select(-value.m) %>% #add ref to estimates mutate(rr.s=if_else(str_detect(term,".ref"),"Ref.",as.character(rr.s)), rr.m=if_else(str_detect(term,".ref"),"Ref.",as.character(rr.m))) %>% ungroup() # _ nested selection ------------------------------------------------------ #source: http://www.cookbook-r.com/Formulas/Creating_a_formula_from_a_string/ measurevar <- "outcome" groupvars <- smoke_clean %>% select_if(.predicate = !magrittr::is_in(x = colnames(.), table = c("outcome","outcome_1", "outcome_2","smoker_2"))) %>% colnames() # This returns the formula: myformula <- as.formula(paste(measurevar, paste(groupvars, collapse=" + "), sep=" ~ ")) add1(glm_null, scope = myformula, test = "LRT") %>% epi_tidynested(1) #-> rank_l1 add1(update(glm_null, ~ . + age), scope = myformula, test = "LRT") %>% epi_tidynested(2) #-> rank_l2 add1(update(glm_null, ~ . + age + agegrp), scope = myformula, test = "LRT") %>% epi_tidynested(3) #-> rank_l3 glm_nested <- update(glm_null, ~ . + age + agegrp) glm_nested %>% epi_tidymodel_or()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.