Setup

# Load environment file
source(here::here("setup_environment.R"))

# Tidylog messages make notebook output messy
detach("package:tidylog", unload = TRUE)

library(glue)      # for glue()
library(DT)        # for tables

source("checking_functions.R")

# What is the minimum % difference to show in tables?
table_pc_cutoff = 5

Get Data

get_t2_data = function(date_str, sheet_name = "Raw Data"){

  path = glue("{data_folder}{date_str}/output/{date_str}-Table2-Crude-Mortality-subgroups.xlsx")

  data = 
    read.xlsx(path, sheet = sheet_name) %>%
    select(location_name, quarter_short, agg_label, sub_grp, label, 
           deaths, pats, crd_rate)

  return(data)

}

t2_old_full = get_t2_data(previous_pub) %>% 
  # this chunk shouldn't be required after July 2022
  mutate(sub_grp = case_when(sub_grp == "All Admissions" ~ "All admissions",
                             sub_grp == "Age Group" ~ "Age group",
                             sub_grp == "Admission Type" ~ "Admission type",
                             sub_grp == "Place of Death" ~ "Place of death",
                             T ~ sub_grp),
         label = case_when(label == "All Admissions" ~ "All admissions",
                           label == "Non-Elective" ~ "Non-elective",
                           label == "1 - Most Deprived" ~ "1 - most deprived",
                           label == "5 - Least Deprived" ~ "5 - least deprived",
                           label == "Died in Community" ~ "Died in community",
                           label == "Died in Hospital" ~ "Died in hospital",
                           T ~ label))
t2_new_full = get_t2_data(pub_day)

We can only compare the quarters that overlap, find those:

overlap_quarters = 
  semi_join(t2_new_full, t2_old_full, by = "quarter_short") %>%
  select(quarter_short) %>%
  distinct()

Now only keep the quarters that overlap:

t2_old = filter(t2_old_full, quarter_short %in% overlap_quarters$quarter_short)
t2_new = filter(t2_new_full, quarter_short %in% overlap_quarters$quarter_short)

# We also don't need all of the columns here
t2_old = select(t2_old, -agg_label)
t2_new = select(t2_new, -agg_label)

Compare to previous publication

Changes for the first overlapping quarter (1st quarter in this release, 2nd quarter in previous release), the last overlapping quarter (last quarter in previous release, 2nd last quarter in this release), and the remaining quarters are given in separate tables.

Get a dataframe with the change from the last publication:

# Columns containing the labels
join_cols = c("location_name", "quarter_short", "sub_grp", "label")

t2_diff = compare_df(t2_new, t2_old, join_by = join_cols, 
                     df1_name = "new", df2_name = "old")

Patients

t2_diff_px = 
  t2_diff %>%
  select(any_of(join_cols), starts_with("pats"))

writeLines(glue("Max % diff: {max(t2_diff_px[, 'pats.diff_pc'], na.rm = T)}
                 Min % diff: {min(t2_diff_px[, 'pats.diff_pc'], na.rm = T)}"))

Compare absolute % difference to previous value:

make_change_plot(t2_diff_px, "pats")

First Quarter

Now look more closely at the larger % differences.

Start with the first quarter of overlapping data only (i.e. first quarter in new publication):

make_change_table(t2_diff_px, measure = "pats",  
                  pc_cutoff = table_pc_cutoff, quarter_filter = "first only")

Last Quarter

Now last quarter of overlapping data (i.e. last quarter in previous publication):

make_change_table(t2_diff_px, measure = "pats",  
                  pc_cutoff = table_pc_cutoff, quarter_filter = "last only")

Middle Quarters

Now the rest of the quarters:

make_change_table(t2_diff_px, measure = "pats",  
                  pc_cutoff = table_pc_cutoff, quarter_filter = "middle only")

Deaths

t2_diff_deaths = 
  t2_diff %>%
  select(any_of(join_cols), starts_with("deaths"))

writeLines(glue("Max % diff: {max(t2_diff_deaths[, 'deaths.diff_pc'], na.rm = T)}
                 Min % diff: {min(t2_diff_deaths[, 'deaths.diff_pc'], na.rm = T)}"))

Compare absolute % difference to previous value:

make_change_plot(t2_diff_deaths, "deaths")

First Quarter

Now look more closely at the larger % differences.

Start with the first quarter of overlapping data only (i.e. first quarter in new publication):

make_change_table(t2_diff_deaths, measure = "deaths",  
                  pc_cutoff = table_pc_cutoff, quarter_filter = "first only")

Last Quarter

Now last quarter of overlapping data (i.e. last quarter in previous publication):

make_change_table(t2_diff_deaths, measure = "deaths", 
                  pc_cutoff = table_pc_cutoff, quarter_filter = "last only")

Middle Quarters

Now the rest of the quarters:

make_change_table(t2_diff_deaths, measure = "deaths", 
                  pc_cutoff = table_pc_cutoff, quarter_filter = "middle only")

Crude Mortality Rate

t2_diff_crd_rate = 
  t2_diff %>%
  select(any_of(join_cols), starts_with("crd_rate"))

writeLines(glue("Max % diff: {max(t2_diff_crd_rate[, 'crd_rate.diff_pc'], na.rm = T)}
                 Min % diff: {min(t2_diff_crd_rate[, 'crd_rate.diff_pc'], na.rm = T)}"))

Compare absolute % difference to previous value:

make_change_plot(t2_diff_crd_rate, "crd_rate")

First Quarter

Now look more closely at the larger % differences.

Start with the first quarter of overlapping data only (i.e. first quarter in new publication):

make_change_table(t2_diff_crd_rate, measure = "crd_rate", 
                  pc_cutoff = table_pc_cutoff, quarter_filter = "first only")

Last Quarter

Now last quarter of overlapping data (i.e. last quarter in previous publication):

make_change_table(t2_diff_crd_rate, measure = "crd_rate", 
                  pc_cutoff = table_pc_cutoff, quarter_filter = "last only")

Middle Quarters

Now the rest of the quarters:

make_change_table(t2_diff_crd_rate, measure = "crd_rate", 
                  pc_cutoff = table_pc_cutoff, quarter_filter = "middle only")

Check splits sum to totals

Get the difference between the sum of each split and the Scotland totals

t2_split_tot = 
  split_tot_df(t2_new_full, hsmr_table = 2) %>%
  # Only some hospitals included, so not expected to sum up to total
  filter(agg_label != "Hospital")

split_id_cols = c("agg_label", "quarter_short", "sub_grp")

t2_split_tot_deaths = 
  select(t2_split_tot, any_of(split_id_cols), starts_with("deaths"))

t2_split_tot_pats = 
  select(t2_split_tot, any_of(split_id_cols), starts_with("pats"))

Patients

make_split_table(t2_split_tot_pats)

Deaths

make_split_table(t2_split_tot_deaths)


NHS-NSS-transforming-publications/hsmr documentation built on June 30, 2024, 11:48 a.m.