inst/doc/missing.R

## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----eval=FALSE---------------------------------------------------------------
#  # Make sure finalfit is up-to-date
#  install.packages("finalfit")

## -----------------------------------------------------------------------------
library(finalfit) 

# Create some extra missing data
## Smoking missing completely at random
set.seed(1)
colon_s$smoking_mcar = 
  sample(c("Smoker", "Non-smoker", NA), 
    dim(colon_s)[1], replace=TRUE, 
    prob = c(0.2, 0.7, 0.1)) %>% 
  factor() %>% 
	ff_label("Smoking (MCAR)")

## Smoking missing conditional on patient sex
colon_s$smoking_mar[colon_s$sex.factor == "Female"] = 
  sample(c("Smoker", "Non-smoker", NA), 
    sum(colon_s$sex.factor == "Female"), 
    replace = TRUE,
    prob = c(0.1, 0.5, 0.4))

colon_s$smoking_mar[colon_s$sex.factor == "Male"] = 
  sample(c("Smoker", "Non-smoker", NA), 
    sum(colon_s$sex.factor == "Male"), 
    replace=TRUE, prob = c(0.15, 0.75, 0.1))
colon_s$smoking_mar = factor(colon_s$smoking_mar) %>% 
	ff_label("Smoking (MAR)")

# Examine with ff_glimpse
explanatory = c("age", "sex.factor", 
  "nodes", "obstruct.factor",  
  "smoking_mcar", "smoking_mar")
dependent = "mort_5yr"

colon_s %>% 
  ff_glimpse(dependent, explanatory)

## ----eval=FALSE---------------------------------------------------------------
#  colon_s %>%
#    ff_glimpse()

## ----eval=FALSE---------------------------------------------------------------
#  library(dplyr)
#  colon_s %>%
#    select(-hospital) %>%
#    ff_glimpse()

## ----eval=FALSE---------------------------------------------------------------
#  colon_s %>%
#    missing_plot()

## -----------------------------------------------------------------------------
explanatory = c("age", "sex.factor", 
  "obstruct.factor",  
  "smoking_mcar", "smoking_mar")
dependent = "mort_5yr"

colon_s %>% 
  missing_pattern(dependent, explanatory)

## -----------------------------------------------------------------------------
# Explanatory or confounding variables
explanatory = c("age", "sex.factor", 
  "nodes",  
  "smoking_mcar", "smoking_mar")

# Explanatory variable of interest
dependent = "obstruct.factor" # Bowel obstruction

colon_s %>% 
  summary_factorlist(dependent, explanatory, 
  na_include=TRUE, p=TRUE)

## ----eval=FALSE---------------------------------------------------------------
#  explanatory = c("age", "sex.factor",
#    "nodes", "obstruct.factor",
#    "smoking_mcar", "smoking_mar")
#  dependent = "mort_5yr"
#  colon_s %>%
#    missing_pairs(dependent, explanatory)

## ----eval=FALSE---------------------------------------------------------------
#  colon_s %>%
#    missing_pairs(dependent, explanatory, position = "fill", )

## -----------------------------------------------------------------------------
explanatory = c("age", "sex.factor", 
  "nodes", "obstruct.factor")
dependent = "smoking_mcar"
colon_s %>% 
  missing_compare(dependent, explanatory) %>% 
	knitr::kable(row.names=FALSE, align = c("l", "l", "r", "r", "r")) # Omit when you run

## -----------------------------------------------------------------------------
dependent = "smoking_mar"
colon_s %>% 
  missing_compare(dependent, explanatory) %>% 
	knitr::kable(row.names=FALSE, align = c("l", "l", "r", "r", "r")) # Omit when you run

## ----eval=FALSE---------------------------------------------------------------
#  library(finalfit)
#  library(dplyr)
#  library(MissMech)
#  explanatory = c("age", "nodes")
#  dependent = "mort_5yr"
#  
#  colon_s %>%
#    select(explanatory) %>%
#    MissMech::TestMCARNormality()

## -----------------------------------------------------------------------------
explanatory = c("age", "sex.factor", 
  "nodes", "obstruct.factor",  
  "smoking_mcar")
dependent = "mort_5yr"
colon_s %>% 
	finalfit(dependent, explanatory) %>% 
	knitr::kable(row.names=FALSE, align = c("l", "l", "r", "r", "r", "r")) # Omit when you run

## ----fig.height=3, fig.width=10, message=FALSE, warning=FALSE-----------------
# Multivariate Imputation by Chained Equations (mice)
library(finalfit)
library(dplyr)
library(mice)
explanatory = c("age", "sex.factor", 
  "nodes", "obstruct.factor", "smoking_mar")
dependent = "mort_5yr"

# Choose whether to impute missing values
# from particular variables, e.g. explanatory variable of interest
# or outcome variable. 
# Outcome often imputed as best practice, dropped here for demonstration. 
colon_s %>% 
	select(dependent, explanatory) %>% 
	missing_predictorMatrix(
		drop_from_imputed = c("mort_5yr"),
		drop_from_imputer = ""
		) -> predM

# This is needed to drop from imputed
m0 = mice(colon_s %>% 
            select(dependent, explanatory), maxit=0)
m0$method[c("mort_5yr")] = ""

fits = colon_s %>% 
  select(dependent, explanatory) %>% 
  # Usually run imputation with 10 imputed sets, 4 here for demonstration
  mice(m = 4, predictorMatrix = predM, method = m0$method) %>% 
  # Run logistic regression on each imputed set
  with(glm(formula(ff_formula(dependent, explanatory)), 
    family="binomial"))

# Examples of extracting metrics from fits
## AICs
fits %>% 
	getfit() %>% 
	purrr::map(AIC)

# C-statistic
fits %>% 
	getfit() %>% 
	purrr::map(~ pROC::roc(.x$y, .x$fitted)$auc)

  
# Pool  results
fits_pool = fits %>% 
  pool()

## Can be passed to or_plot
colon_s %>% 
	or_plot(dependent, explanatory, glmfit = fits_pool, table_text_size=4)

# Summarise and put in table
fit_imputed = fits_pool %>%                                  
	fit2df(estimate_name = "OR (multiple imputation)", exp = TRUE)

# Use finalfit merge methods to create and compare results
colon_s %>% 
  summary_factorlist(dependent, explanatory, fit_id = TRUE) -> summary1

colon_s %>% 
  glmuni(dependent, explanatory) %>% 
  fit2df(estimate_suffix = " (univariable)") -> fit_uni

colon_s %>% 
  glmmulti(dependent, explanatory) %>% 
  fit2df(estimate_suffix = " (multivariable inc. smoking)") -> fit_multi

explanatory = c("age", "sex.factor", 
  "nodes", "obstruct.factor")
colon_s %>% 
  glmmulti(dependent, explanatory) %>% 
  fit2df(estimate_suffix = " (multivariable)") -> fit_multi_r

# Combine to final table
summary1 %>% 
  ff_merge(fit_uni) %>% 
  ff_merge(fit_multi_r) %>% 
  ff_merge(fit_multi) %>% 
  ff_merge(fit_imputed) %>% 
  select(-fit_id, -index) %>% 
	knitr::kable(row.names=FALSE, align = c("l", "l", "r", "r", "r","r", "r", "r"))

## ----warning=FALSE, message=FALSE---------------------------------------------
library(dplyr)
explanatory = c("age", "sex.factor", 
  "nodes", "obstruct.factor", "smoking_mar")
colon_s %>% 
  mutate(
    smoking_mar = forcats::fct_na_value_to_level(smoking_mar, level = "(Missing)")
  ) %>% 
  finalfit(dependent, explanatory) %>% 
	knitr::kable(row.names=FALSE, align = c("l", "l", "r", "r", "r", "r"))

Try the finalfit package in your browser

Any scripts or data that you put into this service are public.

finalfit documentation built on Nov. 17, 2023, 1:09 a.m.