#' Report Weekly Admissions
#'
#' Shows the number of admissions to each each in terms of patients and episodes
#' stratified in weeks
#'
#' @param distinct_episodes from reference table
#' @return a summary containing patient and episode numbers
#'
#' @importFrom dplyr group_by summarise n_distinct
#' @importFrom magrittr %>%
#' @importFrom lubridate day month year
weekly_admissions <- function(distinct_episodes = NULL) {
admissions <- distinct_episodes %>%
dplyr::mutate(
year = lubridate::year(start_date),
month = lubridate::month(start_date, label = TRUE),
week_of_month = as.integer(ceiling(lubridate::day(start_date) / 7))
) %>%
dplyr::group_by(site, year, month, week_of_month) %>%
dplyr::summarise(
patients = dplyr::n_distinct(nhs_number),
episodes = dplyr::n_distinct(episode_id)
)
}
#' Reports Case Numbers according to day
#'
#' @param unique_cases_tbl from \code{pull_cases_all}
#'
#' @return breakdown of unique daily cases
#'
#' @importFrom dplyr group_by summarise n_distinct
#' @importFrom magrittr %>%
#' @importFrom lubridate day month year wday
report_cases_daily <- function(unique_cases_tbl = NULL) {
cases <- unique_cases_tbl %>%
mutate(
year = lubridate::year(start_date),
month = lubridate::month(start_date, label = TRUE),
week_of_month = as.integer(ceiling(lubridate::day(start_date) / 7)),
wday = lubridate::wday(start_date, label = TRUE)
) %>%
dplyr::group_by(site, year, month, week_of_month, wday) %>%
dplyr::summarise(
patients = dplyr::n_distinct(nhs_number),
episodes = dplyr::n_distinct(episode_id)
)
}
#' Daily Admissions for each Site
#'
#' Calculates the number of admissions for each calendar day, stratified
#' by site. This is a complete table, i.e. days with 0 admissions are not
#' listed
#'
#' @param reference the reference table generated by \code{\link{make_reference}}
#' @param by_site the named site of interest as a character
#'
#' @return a tibble with the number of unique episodes admitted for a given day
#'
#' @importFrom dplyr filter mutate group_by summarise n_distinct
#' @importFrom lubridate date
daily_admissions <- function(reference = NULL, by_site = NULL) {
admissions <- reference %>%
filter(site == by_site) %>%
mutate(date = lubridate::date(start_date)) %>%
group_by(date) %>%
summarise(episodes = n_distinct(episode_id)) %>%
filter(episodes > 0)
}
#' Event Occurrances for each Site
#'
#' Calculates the number of event occurances for each calendar day, stratified
#' by site. Days with 0 event submissions are not listed
#'
#' @param extracted_event extracted HIC event
#' @param by_site a site code as a character vector
#'
#' @return a tibble with the number of unique episodes admitted for a given day
event_occurrances <- function(extracted_event = NULL, by_site = "UCL") {
occurances <- extracted_event %>%
filter(site == by_site) %>%
mutate(date = lubridate::date(datetime)) %>%
group_by(date) %>%
summarise(events = n_distinct(internal_id)) %>%
filter(events > 0)
return(occurances)
}
#' Report Admission Numbers for each Unit
#'
#' Reports on ICNARC CMP unit codes to describe the numbers of reported cases by
#' site. This is dependent on accurate parsing of the xml schema for hic code
#' 0002 (INCARC CMP unit code). And as such, may dramatically under-report. To
#' see a comprehensive list of acual cases, see the
#' \code{\link{weekly_admissions}} function.
#'
#' @param events_table the events table
#' @param reference_table the reference table
#' @export
#'
#' @importFrom dplyr filter select collect right_join group_by summarise
#' n_distinct rename
#' @importFrom rlang .data
#' @importFrom magrittr %>%
#'
#' @return a tibble with unique episodes and patients reported by ICU
unit_admissions <- function(events_table = NULL, reference_table = NULL) {
unit_numbers <- events_table %>%
filter(.data$code_name == "NIHR_HIC_ICU_0002") %>%
select(.data$episode_id, .data$string) %>%
collect() %>%
right_join(reference_table, by = "episode_id") %>%
select(.data$episode_id, .data$nhs_number,
.data$string, .data$site) %>%
group_by(.data$site, .data$string) %>%
summarise(
patients = n_distinct(.data$nhs_number),
episodes = n_distinct(.data$episode_id)
) %>%
rename(unit = .data$string)
return(unit_numbers)
}
#' Characterise Episodes
#'
#' There are several ways to potentially define an episode. Fundamentally, we
#' need a start and an end datetime. The start is already required to onboard
#' patients into the database and so is 100% complete. This function primary
#' serves to identify the end of an epsiode, which is often much less clearly
#' defined. Potential sources of information, in order of precidence include:
#' \itemize{
#' \item date and time of in unit death: 0042 and 0043
#' \item date and time of brainstem death: 0044 and 0045
#' \item episode end date time: 0412
#' \item date and time of body removed: 0038 and 0039
#' \item the last regularly recorded observation from HR (0108) and SpO2
#' (0129)
#' }
#' The goal is not to create a perfect record, as doing so drops too many cases.
#' Rather, the goal is to characterise the episode in a meaningful way. This
#' means that episodes cannot have negative episode length, and you should not
#' be able to record a vital sign after a death has occurred. If date and time
#' of death preceed the discharge time we overwrite the episode end datetime
#' with the date and time of death. The same is not true for brainstem death and
#' body removal. This would result in a situation whereby a body is kept
#' physiologically active on the ICU, but the episode is now listed as finished.
#'
#' Since a patient can only die once, checks are also performed to ensure that
#' death events, can only be linked to a single episode. This is relevent for
#' the following situations:
#' \itemize{
#' \item More than one episode has become linked with the same death event. In
#' this situation, the final episode is closed with an end datetime of the
#' datetime of death. The death is then uncoupled from any prior episodes
#' \item A patient was discharged alive, but subseqnetly died.
#' }
#' Checks are taken to ensure that episodes are non-overlapping, though they can
#' run immediately from one to the next.
#'
#' @param connection a connection to the CC-HIC database
#'
#' @return a tibble that characterises each episode. The attribute
#' "invalid_records" contains information related to invalid records and the
#' reason for invalidation
#' @export
#'
#' @importFrom rlang abort
#' @importFrom dplyr setdiff tbl
#' @importFrom tidyselect ends_with
characterise_episodes <- function(connection = NULL) {
if (is.null(connection)) {
rlang::abort("connection must be supplied")
}
df_extract <- tibble::tribble(
~codes, ~names,
"NIHR_HIC_ICU_0411", "epi_start_dttm",
"NIHR_HIC_ICU_0412", "src_end_dttm",
"NIHR_HIC_ICU_0042", "death_date",
"NIHR_HIC_ICU_0043", "death_time",
"NIHR_HIC_ICU_0044", "bsd_date",
"NIHR_HIC_ICU_0045", "bsd_time",
"NIHR_HIC_ICU_0038", "body_date",
"NIHR_HIC_ICU_0039", "body_time",
"NIHR_HIC_ICU_0073", "nhs",
"NIHR_HIC_ICU_0097", "outcome",
"NIHR_HIC_ICU_0400", "bsd"
)
df <- extract_demographics(
connection = connection,
code_names = df_extract$codes,
rename = df_extract$names)
## For this particular purpse, we need to add in columns that might
## be all NA.
missing_names <- setdiff(df_extract$names, names(df))
for (i in seq_along(missing_names)) {
df[[missing_names[i]]] <- as.character(NA)
}
df <- df %>% mutate_at(vars(ends_with("time")), hms::as_hms)
df <- df %>% mutate_at(vars(ends_with("date")), lubridate::ymd)
df <- df %>% mutate_at(vars(ends_with("dttm")), lubridate::ymd_hms)
df <- df %>%
mutate(
death_dttm = if_else(
!is.na(.data$death_date) & !is.na(.data$death_time),
paste(
format(.data$death_date),
format(.data$death_time)), as.character(NA)
)
) %>%
mutate(death_dttm = if_else(
!is.na(.data$death_dttm),
lubridate::ymd_hms(.data$death_dttm),
as.POSIXct(NA))) %>%
mutate(
bsd_dttm = if_else(
!is.na(.data$bsd_date) & !is.na(.data$bsd_time),
paste(format(.data$bsd_date),
format(.data$bsd_time)), as.character(NA)
)
) %>%
mutate(
bsd_dttm = if_else(
!is.na(.data$bsd_dttm),
lubridate::ymd_hms(.data$bsd_dttm),
as.POSIXct(NA))) %>%
mutate(
body_dttm = if_else(
!is.na(.data$body_date) & !is.na(.data$body_time),
paste(format(.data$body_date),
format(.data$body_time)), as.character(NA)
)
) %>%
mutate(
body_dttm = if_else(
!is.na(.data$body_dttm),
lubridate::ymd_hms(.data$body_dttm), as.POSIXct(NA)))
df <- df %>%
select(-ends_with("date"), -ends_with("time")) %>%
mutate(nhs_validation = if_else(
verify_nhs(.data$nhs), 1L, 0L
))
invalid_records <- df %>% filter(.data$nhs_validation == 0) %>%
select(.data$episode_id) %>%
mutate(reason = "invalid nhs number")
df <- df %>%
filter(.data$nhs_validation == 1) %>%
select(-.data$nhs_validation)
invalid_records <- df %>%
filter(.data$outcome == "E") %>%
select(.data$episode_id) %>%
mutate(reason = "open episode") %>%
bind_rows(invalid_records)
df <- df %>% filter(.data$outcome != "E")
## Discharged alive and no src end date is invalid
## Disacharge alive, with an end date, automatically VALID
df <- df %>%
mutate(
epi_end_dttm = case_when(
.data$outcome == "A" & is.na(.data$src_end_dttm) ~ as.POSIXct(NA),
.data$outcome == "A" & !is.na(.data$src_end_dttm)
~ .data$src_end_dttm,
.data$outcome == "D" & !is.na(.data$death_dttm) & .data$bsd == 0 | is.na(.data$bsd)
~ .data$death_dttm,
.data$outcome == "D" & bsd == 1 & !is.na(.data$death_dttm)
~ .data$bsd_dttm,
TRUE ~ as.POSIXct(NA)
)
)
broken_timings <- df %>%
filter(is.na(.data$epi_end_dttm) | .data$epi_end_dttm < .data$epi_start_dttm) %>%
select(-.data$epi_end_dttm)
if (nrow(broken_timings) > 0) {
recover_timings <- tbl(connection, "events") %>%
filter(.data$episode_id %in% !!broken_timings$episode_id,
.data$code_name %in% c("NIHR_HIC_ICU_0108", "NIHR_HIC_ICU_0129")) %>%
select(.data$episode_id, .data$datetime) %>%
collect() %>%
group_by(.data$episode_id) %>%
summarise(epi_end_dttm = max(.data$datetime)) %>%
filter(!is.na(.data$epi_end_dttm)) %>%
left_join(broken_timings, ., by = "episode_id")
df <- df %>%
filter(!(.data$episode_id %in% broken_timings$episode_id)) %>%
bind_rows(recover_timings)
invalid_records <- df %>%
filter(is.na(.data$epi_end_dttm)) %>%
select(.data$episode_id) %>%
mutate(reason = "no end datetime") %>%
bind_rows(invalid_records)
invalid_records <- df %>%
filter(!is.na(.data$epi_end_dttm)) %>%
filter(.data$epi_end_dttm < .data$epi_start_dttm) %>%
select(.data$episode_id) %>%
mutate(reason = "end time prior to admission") %>%
bind_rows(invalid_records)
df <- df %>%
filter(!is.na(.data$epi_end_dttm)) %>%
filter(.data$epi_end_dttm >= .data$epi_start_dttm)
}
duplicate_start <- df %>%
ungroup() %>%
distinct(.data$nhs, .data$epi_start_dttm, .keep_all = TRUE) %>%
select(.data$episode_id) %>%
anti_join(df, by = "episode_id") %>%
select(.data$episode_id) %>%
mutate(reason = "duplicate start datetime")
duplicate_end <- df %>%
ungroup() %>%
distinct(.data$nhs, .data$epi_start_dttm, .keep_all = TRUE) %>%
select(.data$episode_id) %>%
anti_join(df, by = "episode_id") %>%
select(.data$episode_id) %>%
mutate(reason = "duplicate end datetime")
overlapping <- df %>%
group_by(.data$nhs) %>%
arrange(.data$nhs, .data$epi_start_dttm) %>%
mutate(time_out = difftime(
.data$epi_start_dttm, lag(.data$epi_end_dttm))) %>%
ungroup() %>%
filter(.data$time_out < 0) %>%
select(.data$episode_id) %>%
mutate(reason = "overlapping episode")
invalid_records <- bind_rows(
invalid_records,
duplicate_start,
duplicate_end,
overlapping
)
df <- df %>%
select(
.data$episode_id, .data$nhs, .data$epi_start_dttm,
.data$epi_end_dttm, .data$outcome) %>%
rename(nhs_number = .data$nhs) %>%
anti_join(invalid_records, by = "episode_id") %>%
arrange(.data$nhs_number, .data$epi_start_dttm) %>%
mutate(los_days = as.numeric(
difftime(
.data$epi_end_dttm, .data$epi_start_dttm, units = "hours"))/24)
df <- left_join(
tbl(connection, "episodes"),
tbl(connection, "provenance"),
by = c("provenance" = "file_id")
) %>%
select(.data$episode_id, .data$site) %>%
collect() %>%
left_join(df, ., by = "episode_id")
attr(df, "invalid_records") <- invalid_records
return(df)
}
#' Characterise Spells
#'
#' Some sites have patients check out of one ICU and into another (for example
#' ICU stepdown to HDU). This checks to see if patients are discharged from one
#' unit and admitted to another wihtin a pre-defined time period, specified in
#' the minutes argument.
#'
#' This only evaluates episodes that have already been flagged as valid by the
#' \code{\link{characterise_episodes}} function.
#'
#' @param df episode length table
#' @param minutes numeric scalar to define transition period
#' @export
#'
#' @return a table with episodes reconciled as spells
characterise_spells <- function(df = NULL, minutes = 30) {
df %>%
arrange(.data$nhs_number, .data$epi_start_dttm) %>%
group_by(.data$nhs_number) %>%
mutate(time_out = .data$epi_start_dttm[-1] %>%
difftime(
.data$epi_end_dttm[-length(.data$epi_end_dttm)], units = "mins") %>%
as.integer() %>%
c(NA)) %>%
mutate(new_spell = if_else(
lag(.data$time_out) > minutes | is.na(lag(.data$time_out)),
TRUE, FALSE)) %>%
ungroup() %>%
mutate(spell_id = cumsum(.data$new_spell)) %>%
select(.data$spell_id, .data$episode_id, .data$nhs_number, .data$site,
.data$epi_start_dttm, .data$epi_end_dttm, .data$los_days)
}
#' Resolve DateTime
#'
#' Many events in CC-HIC are stored in separate date and time columns/objects.
#' This function attempts to reconcile and combine these times when possible. Of
#' note, date and time information is not always stored with consistent rules.
#' For example, death date and time, are often stored for every patient in every
#' episode, even though the patient can only die once. The following are some
#' date and time pairings that denote a singular event:
#' \itemize{
#' \item "NIHR_HIC_ICU_0042", "NIHR_HIC_ICU_0043" - Unit Death
#' \item "NIHR_HIC_ICU_0038", "NIHR_HIC_ICU_0039" - Body Removal
#' \item "NIHR_HIC_ICU_0044", "NIHR_HIC_ICU_0045" - Brain stem death
#' \item "NIHR_HIC_ICU_0048", "NIHR_HIC_ICU_0049" - Treatment Withdrawal
#' \item "NIHR_HIC_ICU_0050", "NIHR_HIC_ICU_0051" - Discharge ready
#' }
#' If a date or time component is missing, nothing is returned as the datetime
#' cannot be accurately formed.
#'
#' @param df a table that contains columns for the date and time of interest
#' @param date_code the column name for the date of interest
#' @param time_code the column name for the time of interest
#'
#' @return a table with the correct datetime pairing for the codes given
#'
#' @importFrom rlang .data sym
resolve_date_time <- function(df = NULL,
date_code = NULL,
time_code = NULL) {
if (any(is.null(c(df, date_code, time_code)))) {
rlang::abort("you must supply a dataframe and two column names")
}
dc <- rlang::enquo(date_code)
tc <- rlang::enquo(time_code)
df <- df %>%
mutate(
dttm = if_else(
!is.na(!!dc) & !is.na(!!tc),
paste0(format(!!dc), " ", format(!!tc)), as.character(NA)
)
) %>%
mutate(
dttm = if_else(!is.na(dttm), lubridate::ymd_hms(dttm), as.POSIXct(NA)))
return(df)
}
#' Summarise Non-Verifiable Episodes
#'
#' Provides an overview of the reasons for episode invalidation
#'
#' @param df the episode table returned from \code{\link{characterise_episodes}}
#' @export
#'
#' @return a tibble containing summary information for validation at episode
#' level
episode_varacity <- function(df) {
attr(df, "invalid_records") %>%
group_by(reason) %>%
tally()
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.