#' link_bundles
#'
#' @param bundles the bundle data
#' @param episodes the episode data
#'
#' @return dataframe with new column linked.spell containing spell_number of the spell associated with this bundle
#' @export
#'
link_bundles <- function(bundles = clahrcnwlhf::bundle_data_clean,
episodes = clahrcnwlhf::emergency_adms,
show.working = TRUE,
bundle_date_col = "Admission.Datetime",
bundle_id_col = "PseudoID",
episode_id_col = "PseudoID",
episode_date_col = "CSPAdmissionTime",
episode_date_disch_col = "CSPDischargeTime",
episode_spell_col = "spell_number",
episode_new_spell = "new_spell") {
bundles$linked.spell <- NA
# Run preparatory analysis
bundles <- bundle_in_spell(bundles = bundles, episodes = episodes,
bundle_date_col = bundle_date_col,
bundle_id_col = bundle_id_col,
episode_id_col = episode_id_col,
episode_date_col = episode_date_col,
episode_date_disch_col = episode_date_disch_col,
episode_spell_col = episode_spell_col,
episode_new_spell = episode_new_spell)
# 1. First link all bundles "in" their previous admission to that admission.
type1s <- which(bundles$bundle.in.spell == TRUE)
bundles[type1s,"linked.spell"] <- bundles[type1s,"prev.spell"]
# 2. Next link all bundles not already linked, with lag to next admission <= 3 days,
# to that next admission.
type2s <- which((bundles$bundle.in.spell == FALSE | is.na(bundles$bundle.in.spell)) & bundles$lag.to.next.adm <= 3)
bundles[type2s,"linked.spell"] <- bundles[type2s,"next.spell"]
# 3. The remaining bundles cannot be linked at this time.
if (!show.working) {
wcs <- c("prev.spell", "next.spell", "lag.from.prev.adm", "lag.to.next.adm", "bundle.in.spell")
bundles <- bundles[,!(colnames(bundles) %in% wcs)]
}
bundles
}
#' link_nicor
#'
#' @param nicor the NICOR data
#' @param episodes dataframe of hospital admissions from data warehouse
#' @param show.working include helper columns in output
#' @param bundle_date_col column in NICOR data containing date pertaining to admission
#' @param bundle_id_col column in NICOR data containing patient (pseudo-)ID
#' @param episode_id_col column in episodes containing patient (pseudo-)ID
#' @param episode_date_col column in episodes containing datetime of admission
#' @param episode_date_disch_col column in episodes containing datetime of discharge
#' @param episode_spell_col column in episodes containing spell ID to return
#' @param episode_new_spell column in episodes indicating first episodes in spell
#'
#' @return nicor with linked spells, and helper columns if requested
#' @export
#'
link_nicor <- function(nicor = clahrcnwlhf::nicor_data_clean,
episodes = clahrcnwlhf::emergency_adms,
show.working = TRUE,
bundle_date_col = "Date.of.Visit",
bundle_id_col = "PseudoID",
episode_id_col = "PseudoID",
episode_date_col = "CSPAdmissionTime",
episode_date_disch_col = "CSPDischargeTime",
episode_spell_col = "spell_number",
episode_new_spell = "new_spell") {
nicor$linked.spell <- NA
# Run preparatory analysis
nicor <- bundle_in_spell(bundles = nicor, episodes = episodes,
bundle_date_col = bundle_date_col,
bundle_id_col = bundle_id_col,
episode_id_col = episode_id_col,
episode_date_col = episode_date_col,
episode_date_disch_col = episode_date_disch_col,
episode_spell_col = episode_spell_col,
episode_new_spell = episode_new_spell)
# 1. First link all NICOR records with Date.of.Visit within a spell,
# to that spell.
type1s <- which(nicor$bundle.in.spell == TRUE)
nicor[type1s, "linked.spell"] <- nicor[type1s,"prev.spell"]
# 2. Next link all NICOR records with Date.of.Visit less than 2 days before
# an admission, to that admission.
type2s <- which((nicor$bundle.in.spell == FALSE | is.na(nicor$bundle.in.spell)) & nicor$lag.to.next.adm <= as.difftime(2, units = "days"))
nicor[type2s,"linked.spell"] <- nicor[type2s,"next.spell"]
# Finally, remove any links to spells that do not contain the nicor site
link_output <- apply(nicor, 1, function(x){
nicor_site <- x["Hospital"]
lspell <- as.numeric(trimws(x["linked.spell"],which = "both"))
site.in.spell <- any(episodes[which(episodes$spell_number == lspell),"StartWardSite"] == nicor_site)
if (site.in.spell) {
list('lspell'=lspell)
} else {list('lspell'=NA)}
})
nicor <- cbind(nicor, do.call(rbind.data.frame, as.list(link_output)))
names(nicor)[names(nicor)=="linked.spell"] <- "pre.linked.spell"
names(nicor)[names(nicor)=="lspell"] <- "linked.spell"
nicor
}
#' remove_duplicate_bundle_links
#'
#' @param linked_bundles output of link_bundles
#'
#' @return linked_bundles with duplicate links removed
#' @export
#'
remove_duplicate_bundle_links <- function(linked_bundles) {
unlinked_bundles <- linked_bundles[which(is.na(linked_bundles$linked.spell)),]
linked_bundles <- linked_bundles[which(!is.na(linked_bundles$linked.spell)),]
episode_bundles <- split(linked_bundles, linked_bundles$linked.spell)
deduped_ep_bun <- lapply(episode_bundles, function(x) {
lowest_na_count_rows <- x[which(x$number.nas == min(x$number.nas)),]
maxdate <- max(as.POSIXct(lowest_na_count_rows$Admission.Datetime))
lowest_na_count_rows[which(as.POSIXct(lowest_na_count_rows$Admission.Datetime) == maxdate),][1,]
})
deduped_linked <- do.call(rbind.data.frame, deduped_ep_bun)
rbind(deduped_linked, unlinked_bundles)
}
#' nearest_spells
#'
#' @param bundles dataframe of care bundle audit sheets
#' @param episodes dataframe of hospital admissions from data warehouse
#'
#' @return copy of bundles with additional columns added giving information on adjacent admissions
#' @export
#'
nearest_spells <- function(bundles, episodes, bundle_date_col = "Admission.Datetime",
bundle_id_col = "PseudoID", episode_id_col = "PseudoID",
episode_date_col = "CSPAdmissionTime",
episode_spell_col = "spell_number", episode_new_spell = "new_spell") {
bundles <- bundles[!is.na(bundles[,bundle_date_col]),]
#TODO: consider refactoring these two apply calls into one.
bundles_prv_nxt_spells <- apply(bundles, 1, function(x) {
# Extract the patient id and admission datetime for this bundle
pt_id <- x[bundle_id_col]
bun_dt <- x[bundle_date_col]
# Identify the most recent admission for this patient
# prior to or on the bundle admission datetime
pt_eps <- episodes[episodes[,episode_id_col] == pt_id,]
date_logical <- as.POSIXct(pt_eps[,episode_date_col]) <= as.POSIXct(bun_dt)
date_logical[is.na(date_logical)] <- FALSE
pt_eps <- pt_eps[date_logical,]
if(nrow(pt_eps)==0) {prv_adm <- NA} else {
prv_adm <- pt_eps[which.max(as.POSIXct(pt_eps[,episode_date_col])),episode_spell_col]
}
# Identify the first subsequent admission for this patient
# after the bundle admission datetime
pt_eps <- episodes[episodes[,episode_id_col] == pt_id,]
date_logical <- as.POSIXct(pt_eps[,episode_date_col]) > as.POSIXct(bun_dt)
date_logical[is.na(date_logical)] <- FALSE
pt_eps <- pt_eps[date_logical,]
if(nrow(pt_eps)==0) {nxt_adm <- NA} else {
nxt_adm <- pt_eps[which.min(as.POSIXct(pt_eps[,episode_date_col])),episode_spell_col]
}
list('prev.spell'=prv_adm, 'next.spell'=nxt_adm)
})
bundles <- cbind(bundles, do.call(rbind.data.frame, bundles_prv_nxt_spells))
bundles_lags <- apply(bundles, 1, function(x) {
p_sp_id <- as.numeric(trimws(x["prev.spell"],which = "both"))
n_sp_id <- as.numeric(trimws(x["next.spell"],which = "both"))
bun_dt <- x[bundle_date_col]
# Get lag from previous admission
if (is.na(p_sp_id)) {
lfpa <- NA_integer_
pa.dt <- as.POSIXct(strptime(NA, format = "%Y-%m-%d %H:%M:%S"))
} else {
spell.start <- episodes[episodes[,episode_spell_col] == p_sp_id & episodes[,episode_new_spell] == TRUE,episode_date_col]
lfpa <- difftime(as.POSIXct(bun_dt), as.POSIXct(spell.start), units = "days")
pa.dt <- as.POSIXct(spell.start, origin="1970-01-01")
}
# Get lag to next admission
if (is.na(n_sp_id)) {
ltna <- NA_integer_
na.dt <- as.POSIXct(strptime(NA, format = "%Y-%m-%d %H:%M:%S"))
} else {
spell.start <- episodes[episodes[,episode_spell_col] == n_sp_id & episodes[,episode_new_spell] == TRUE,episode_date_col]
ltna <- difftime(as.POSIXct(spell.start), as.POSIXct(bun_dt), units = "days")
na.dt <- as.POSIXct(spell.start, origin="1970-01-01")
}
list('lag.from.prev.adm'=lfpa, 'lag.to.next.adm'=ltna, 'prev.adm.dt'=pa.dt, 'next.adm.dt'=na.dt)
#lfpa
})
bundles <- cbind(bundles, do.call(rbind.data.frame, bundles_lags))
bundles$lag.from.prev.adm <- as.difftime(bundles$lag.from.prev.adm, units = "days")
bundles$lag.to.next.adm <- as.difftime(bundles$lag.to.next.adm, units = "days")
bundles$prev.adm.dt <- as.POSIXct(bundles$prev.adm.dt, origin="1970-01-01")
bundles$next.adm.dt <- as.POSIXct(bundles$next.adm.dt, origin="1970-01-01")
bundles
}
#' bundle_in_spell
#'
#' @param bundles dataframe of care bundle audit sheets
#' @param episodes dataframe of hospital admissions from data warehouse
#'
#' @return copy of bundles with additional columns added giving information on adjacent admissions
#' @export
#'
bundle_in_spell <- function(bundles, episodes = clahrcnwlhf::emergency_adms,
bundle_date_col = "Admission.Datetime",
bundle_id_col = "PseudoID",
episode_id_col = "PseudoID",
episode_date_col = "CSPAdmissionTime",
episode_date_disch_col = "CSPDischargeTime",
episode_spell_col = "spell_number",
episode_new_spell = "new_spell") {
bundles <- nearest_spells(bundles = bundles, episodes = episodes,
bundle_date_col = bundle_date_col,
bundle_id_col = bundle_id_col,
episode_id_col = episode_id_col,
episode_date_col = episode_date_col,
episode_spell_col = episode_spell_col,
episode_new_spell = episode_new_spell)
bundles <- bundles[!is.na(bundles[,bundle_date_col]),]
bundles$bundle.in.spell <- apply(bundles, 1, function(x) {
# Extract the patient id and admission datetime for this bundle
sp_id <- as.numeric(trimws(x["prev.spell"],which = "both"))
bun_dt <- x[bundle_date_col]
# If there is no previous spell, return NA
if (is.na(sp_id)) {
bis <- NA
} else {
# Otherwise, establish whether or not the bundle_date_col from the bundle
# lies inside the spell
spell.start <- episodes[episodes[,episode_spell_col] == sp_id & episodes[,episode_new_spell] == TRUE,episode_date_col]
spell.end <- episodes[episodes[,episode_spell_col] == sp_id & episodes[,episode_new_spell] == TRUE,episode_date_disch_col]
bis <- (as.POSIXct(spell.start) <= as.POSIXct(bun_dt)) && (as.POSIXct(bun_dt) <= as.POSIXct(spell.end))
}
bis
})
bundles
}
#' plot_lag_dist
#'
#' @param bundles dataframe of care bundle audit sheets
#' @param episodes dataframe of hospital admissions from data warehouse
#' @param bis optional pre-analysed output of bundle_in_spell
#' @param prev toggle between lags from previous admission and to next
#' @param cumulative show cumulative distributions vs distributions
#'
#' @return plot of the lag distribution
#' @export
#'
plot_lag_dist <- function(bundles = clahrcnwlhf::bundle_data_clean, episodes = clahrcnwlhf::emergency_adms, bis = NULL, prev = TRUE, cumulative = FALSE, facet = TRUE, max_lag = NULL) {
if (is.null(bis)) {
bis <- clahrcnwlhf::bundle_in_spell(bundles = bundles, episodes = episodes)
}
if (!is.null(max_lag)) {
if (prev) {
bis <- bis[which(bis$lag.from.prev.adm <= max_lag),]
} else {
bis <- bis[which(bis$lag.to.next.adm <= max_lag),]
}
}
if (prev) {
p <- ggplot2::ggplot(bis, ggplot2::aes(lag.from.prev.adm))
} else {
p <- ggplot2::ggplot(bis, ggplot2::aes(lag.to.next.adm))
}
if (facet) {
p <- p + ggplot2::facet_wrap(~bundle.in.spell, scales = "free_x")
}
if (cumulative) {
p <- p + ggplot2::stat_ecdf()
} else {
p <- p + ggplot2::geom_histogram()
}
p
}
#' link_diag_table
#'
#' @param bis output of bundle_in_spell
#' @param lag_cutoff cutoff for classifying lags
#'
#' @return table of lag diagnostic information
#' @export
#'
link_diag_table <- function(bis, lag_cutoff = 3) {
table(bis$bundle.in.spell, bis$lag.to.next.adm <= as.difftime(lag_cutoff, units = "days"), useNA = "always")
}
#' plot_linking_venn
#'
#' @param episodes dataframe of hospital admissions from data warehouse
#' @param linked_bundles output of link_bundles
#' @param linked_nicor output of link_nicor
#' @param plot_vd logical to indicate whether to actually draw the plot
#'
#' @return venn diagram showing linked data sources
#' @export
#'
plot_linking_venn <- function(episodes = clahrcnwlhf::emergency_adms,
linked_bundles, linked_nicor, plot_vd = TRUE) {
allspells <- unique(episodes[which(!is.na(episodes$spell_number)),"spell_number"])
bundlespells <- unique(linked_bundles[which(!is.na(linked_bundles$linked.spell)),"linked.spell"])
nicorspells <- unique(linked_nicor[which(!is.na(linked_nicor$linked.spell)),"linked.spell"])
futile.logger::flog.threshold(futile.logger::ERROR, name = "VennDiagramLogger")
venn.plot <- VennDiagram::venn.diagram(x = list(admissions = allspells, bundles = bundlespells, nicor = nicorspells), filename = NULL)
if (!plot_vd) {
venn.plot
} else {
grid::grid.newpage()
grid::grid.draw(venn.plot)
}
}
#' duplicated_links
#'
#' @param linked_dataset output of link_bundles
#'
#' @return linked_dataset restricted to only those entries linked to a spell with multiple linked entries
#' @export
#'
duplicated_links <- function(linked_dataset) {
bls <- linked_dataset[,"linked.spell"]
duplicated_bls <- unique(bls[duplicated(bls)])
duplicated_bls <- duplicated_bls[!is.na(duplicated_bls)]
duplicated_bls_data <- linked_dataset[which(linked_dataset$linked.spell %in% duplicated_bls),]
duplicated_bls_data
}
#' dupe_link_details
#'
#' @param dupe_bundles output of duplicated_links
#'
#' @return dupe_bundles with details of the multiple bundles associated with each spell
#' @export
#'
dupe_link_details <- function(dupe_bundles) {
dupe_bundles <- dupe_bundles[order(dupe_bundles$linked.spell),]
spell_frame_list <- split(dupe_bundles, f = dupe_bundles$linked.spell)
spell_min_max <- lapply(spell_frame_list, function(x) {
smin <- as.POSIXct(min(x$Admission.Datetime), origin="1970-01-01")
smax <- as.POSIXct(max(x$Admission.Datetime), origin="1970-01-01")
n <- nrow(x)
ais <- all(x$bundle.in.spell)
anyis <- any(x$bundle.in.spell)
list("min.bun.adm"=smin, "max.bun.adm"=smax, "num.buns"=n,
"all.in.spell"=ais, "any.in.spell"=anyis)
})
dld <- cbind(unique(dupe_bundles$linked.spell),
do.call(rbind.data.frame, spell_min_max))
dld$min.bun.adm <- as.POSIXct(dld$min.bun.adm, origin="1970-01-01")
dld$max.bun.adm <- as.POSIXct(dld$max.bun.adm, origin="1970-01-01")
dld$diff.bun.adm <- difftime(dld$max.bun.adm, dld$min.bun.adm, units = "days")
dld
}
#' link_and_save
#'
#' @param episodes the emergency admissions data
#' @param bundles the care bundle data
#' @param nicor the nicor data
#'
#' @return Saves linked bundle and nicor data to the package
#' @export
#'
link_and_save <- function(episodes = clahrcnwlhf::emergency_adms,
bundles = clahrcnwlhf::bundle_data_clean,
nicor = clahrcnwlhf::nicor_data_clean) {
linked_bundle_data <- link_bundles(episodes = episodes, bundles = bundles)
linked_nicor_data <- link_nicor(episodes = episodes, nicor = nicor)
linked_bundle_data <- remove_duplicate_bundle_links(linked_bundles = linked_bundle_data)
devtools::use_data(linked_bundle_data, overwrite = TRUE)
devtools::use_data(linked_nicor_data, overwrite = TRUE)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.