knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(tibble)
library(knitr)
library(dplyr)
library(forcats)
library(ggplot2)
library(modelr)
library(tidyr)
library(registryr)
library(scales)

Overview

registryr provides tools for the analysis of the clinical registry datasets. These tools will be described in this vignette.

Load data

Let's firstly load a fake dataset which comes with the package.

data(fake_data)

head(fake_data)

Population pyramid plot

We will firstly create a population dataset and then use the population_pyramid function to make a population pyramid plot

population_df <-
  fake_data %>% 
  select(-sex) %>% 
  rename(sex = sex_cat) %>%
  mutate(age_group = cut(age, c(-Inf, seq(20, 70, 10), Inf))) %>%
  count(sex, age_group) 

population_df
population_df %>%
  population_pyramid()

Funnel plots

Funnel plot is a visual representation of how individual medical units (e.g. hospitals) perform compared to their peers and the overall average. It also helps to identify the outliers (i.e the ones who are performing better or worse than the average). The stat_funnelcount and stat_funnelcontinuous functions provide easy options to add funnels to an existing ggplot. We will use the average mortality rate and length of dtay as the benchmarks and will then make funnel plots using the stat_funnelcount and stat_funnelcontinuous functions.

n_sites <- fake_data %>% pull(site_id) %>% n_distinct()

benchmark_mortality <-
  fake_data %>%
  summarise(benchmark = sum(dead) / n()) %>%
  pull(benchmark)

fake_data_grouped <-
  fake_data %>%
  group_by(site_id) %>%
  summarise(n_deaths = sum(dead),
            mean_los = mean(los),
            n = n())

fake_data_grouped <-
  fake_data_grouped %>%
  mutate(death_rate = n_deaths / n,
         site_id = as_factor(site_id)) %>%
  # we need to manually change the denominator for the funnel plots, to make it look nicer :)
  mutate(n = seq(100, 2000, length.out = n_sites),
         sd_los = rpois(dplyr::n(), 5))

fake_data_grouped %>%
  ggplot(aes(x = n, y = death_rate)) +
  geom_hline(yintercept = benchmark_mortality,
             col = "dark blue") +
  geom_point() +
  stat_funnelcount(fill = "dark blue",
              alpha = 0.5,
              ci_limits = 0.95)+
  stat_funnelcount(fill = "light blue",
              alpha = 0.5,
              ci_limits = 0.998) +
  geom_point() +
  scale_y_continuous(expand = c(0, 0),
                     limits = c(0, 1),
                     labels = percent_format()
                     ) +
  coord_cartesian(ylim = c(0, NA),
                  xlim = c(0, NA)) +
  theme_bw() +
  labs(
    x = "Number of surgeries",
    y = "Mortality rate"
  )

benchmark_los <-
  fake_data %>%
  summarise(benchmark = mean(los)) %>%
  pull(benchmark)

fake_data_grouped %>%
  ggplot(aes(x = n, y = mean_los, sd = sd_los)) +
  geom_hline(yintercept = benchmark_los,
             col = "dark blue") +
  stat_funnelcontinuous(fill = "dark blue",
              alpha = 0.5,
              ci_limits = 0.95) +
  stat_funnelcontinuous(fill = "light blue",
              alpha = 0.5,
              ci_limits = 0.998) +
  scale_y_continuous(expand = c(0, 0),
                     limits = c(35, 48),
                     ) +
  geom_point() +
  labs(
    x = "Number of surgeries",
    y = "Mean length of stay (days)"
  ) +
  theme_bw()

Stepwise model selection

Let's find out what model we should use to adjust the outcome, we can do that by using the stepwise function

model_mortality <-
  stepwise(out_var = "dead",
         ind_var = c("sex_cat", "age"),
         data = fake_data,
         model = "logistic")

mortality_adj_vars <- labels(terms(formula(model_mortality)))
model_los <-
  stepwise(out_var = "los",
         ind_var = c("sex_cat", "age"),
         data = fake_data,
         model = "linear")

los_adj_vars <- labels(terms(formula(model_mortality)))

Now, let's risk adjust the data using the models found using the stepwise function. We can apply risk adjustment by risk_adj function.

fake_data <- 
  fake_data %>%
  ungroup() %>%
  drop_na(sex, age)

risk_adjust(data = fake_data, 
            site_id = "site_id",
            outcome = "dead",
            outcome_type = "binary",
            adj_vars = mortality_adj_vars)

risk_adjust(data = fake_data, 
            site_id = "site_id",
            outcome = "los",
            outcome_type = "continuous",
            adj_vars = los_adj_vars)

Outlier detection

iris %>% outlier(n = 2)
iris %>% outlier(n = 3)


farhadsalimi/registryr documentation built on June 24, 2022, 12:23 a.m.