#' Link A&E to Inpatient records
#'
#' @description
#' `r lifecycle::badge('experimental')`
#'
#'
#' Link together ECDS A&E records to HES/SUS inpatient records on
#' NHS number, Hospital Number and Date of Birth and organisation code.
#' To note that the inpatient records should already be aggregated into
#' spells at the desired level (standard, CIP or Mega)
#'
#' @seealso group_time continuous_inpatient_spells
#'
#' @import data.table
#'
#' @param ae a list to provide data and columns for the A&E (ECDS) data; all arguments provided quoted unless specified
#' \describe{
#' \item{`data`}{the ECDS A&E dataset provided unquoted}
#' \item{`record_id`}{a unique id within the dataset to be retained; optional}
#' \item{`arrival_date`}{the ECDS arrival date}
#' \item{`departure_date`}{the ECDS discharge date}
#' \item{`nhs_number`}{the patient NHS number}
#' \item{`hospital_number`}{the patient Hospital numbers also known as the local patient identifier}
#' \item{`patient_dob`}{patient date of birth}
#' \item{`org_code`}{the NHS trust organisation codes}
#' }
#' @param inp a list to provide data and columns for the inpatient (SUS/HES) data
#' \describe{
#' \item{`data`}{the HES/SUS inpatient dataset provided unquoted}
#' \item{`record_id`}{a unique id within the dataset to be retained; optional}
#' \item{`spell_start_date`}{a string containing the inpatient (SUS/HES) admission date column name; all arguments provided quoted unless specified}
#' \item{`spell_id`}{the HES/SUS spell id}
#' \item{`nhs_number`}{the patient NHS number}
#' \item{`hospital_number`}{the patient Hospital numbers also known as the local patient identifier}
#' \item{`patient_dob`}{patient date of birth}
#' \item{`org_code`}{the NHS trust organisation codes}
#' }
#' @param .forceCopy a boolean to control if you want to copy the dataset before
#' linking together
#'
#' @return a patient level linked hospital record
#'
#' @keywords internal
#' @examples
#' \dontrun{
#' sample_ae <- data.table::data.table(
#' nhs_number = c("645114517",
#' "645114517","645114517","382940103","321908341",
#' "321908341","321908341","599534707",
#' "403454211","349959089","252341591","180554160",
#' "180554160","281980720","473372796","369773534",
#' "474044124","639064842","662468568","115641745",
#' "115641745","821084975","479637024",
#' "527021626","527021626","233168855","702650869",NA,
#' "537235036",NA,"517229961",NA,"480142132",
#' "368288558","554675340"),
#' local_patient_identifier = c(NA,NA,NA,
#' "I3348707",NA,NA,NA,"P1350948",NA,NA,
#' "Q4157514",NA,NA,"D1101843","K2440769","E1366499",
#' NA,"K1494229","R4678220","J5206297","J5206297",
#' "S1945338","F2159102",NA,NA,"D6300794",NA,
#' "W1208900","Z4975449","G7439612","T1266485",
#' "N4842033","Q5566884","P2689566",NA),
#' patient_birth_date = c("2021-03-03",
#' "2021-03-03","2021-03-03","2003-08-24",
#' "2001-06-21","2001-06-21","2001-06-21","1991-10-08",
#' "1987-02-03","1962-06-17","1991-10-07",
#' "1985-10-16","1985-10-16","1990-09-24","1984-11-14",
#' "1994-05-05","1999-08-11","1983-01-04",
#' "2017-06-01","1975-09-04","1975-09-04","1993-07-13",
#' "2014-01-05","1995-09-30","1995-09-30",
#' "1976-06-25","2000-06-02","2017-06-11","2007-05-03",
#' "1986-08-28","2016-10-14","2016-02-04",
#' "2004-03-02","1979-01-17","1974-06-14"),
#' organisation_code = c("P3P","P3P",
#' "P3P","Z4R","A9I","A9I","A9I","Z4R","V5T",
#' "Z9V","P3P","T7N","T7N","V2P","T4H","V9Y",
#' "Z7N","W6Y","G2H","V5T","V5T","W6Y","J6J",
#' "J6J","J6J","L4Q","P3P","F0N","A6C","O1A",
#' "F0N","O2R","W6Y","V0R","O1A"),
#' arrival_date = c("2022-05-10",
#' "2022-05-29","2022-08-03","2022-05-17",
#' "2022-05-07","2022-05-07","2022-05-07","2022-05-23",
#' "2022-05-13","2022-06-04","2022-05-14",
#' "2022-05-17","2022-06-05","2022-05-25","2022-05-24",
#' "2022-06-09","2022-06-18","2022-06-11",
#' "2022-06-22","2022-06-20","2022-07-18","2022-06-09",
#' "2022-06-05","2022-06-26","2022-06-11",
#' "2022-06-25","2022-06-10","2022-06-12","2022-06-16",
#' "2022-07-10","2022-06-20","2022-07-10",
#' "2022-07-20","2022-07-20","2022-07-19"),
#' departure_date = c("2022-05-10",
#' "2022-05-30","2022-08-03","2022-05-17",
#' "2022-05-07","2022-05-07","2022-05-07","2022-05-23",
#' "2022-05-13","2022-06-04","2022-05-14",
#' "2022-05-17","2022-06-05","2022-05-25","2022-05-24",
#' "2022-06-09","2022-06-18","2022-06-11",
#' "2022-06-22","2022-06-20","2022-07-18","2022-06-09",
#' "2022-06-05","2022-06-26","2022-06-11",
#' "2022-06-25","2022-06-10","2022-06-12","2022-06-16",
#' "2022-07-11","2022-06-20","2022-07-10",
#' "2022-07-20","2022-07-20","2022-07-19")
#' )
#' sample_ae$pcd <- paste0(sample(LETTERS,1),sample(1:14,1)," ",
#' sample(LETTERS,1),sample(0:9,1),sample(LETTERS,1))
#' sample_ae$id = seq_len(nrow(sample_ae))*7
#'
#' sample_inp <- data.table::data.table(
#' nhs_number = c("335661151",
#' "335661151","335661151","335661151","335661151",
#' NA,NA,NA,NA,NA,NA,"645114517","645114517",
#' "645114517","143423716","212261130",
#' "212261130","212261130","212261130","212261130",
#' "349959089","317344169","317344169","317344169",
#' "317344169","317344169","317344169","317344169",
#' "317344169","317344169","317344169",
#' "317344169","317344169","317344169","180554160",
#' "180554160",NA,NA,NA,NA,NA,NA,NA,"230782291",
#' "977111015","977111015","977111015","977111015",
#' "683785606",NA,NA,NA,NA,NA,NA,NA,NA,NA,
#' NA,NA,"281980720","270646497",NA,
#' "387252583","639064842","836297039","836297039",
#' "348614531","348614531","662468568","112340924",
#' "112340924","381361439","493239044",NA,NA,NA,
#' "115641745","115641745","233761482","233761482",
#' "479637024","527021626","527021626",
#' "294666415","233168855","702650869","460180094",
#' "561169746","517229961",NA,"480142132","554675340",
#' "135888675",NA,"684718902"),
#' local_patient_identifier = c(NA,NA,NA,NA,
#' NA,"D4809270","D4809270","D4809270",
#' "D4809270","D4809270","D4809270",NA,NA,NA,
#' "J2098200","D2139084","D2139084","D2139084","D2139084",
#' "D2139084",NA,NA,NA,NA,NA,NA,NA,NA,NA,
#' NA,NA,NA,NA,NA,NA,NA,"A1706089",
#' "A1706089","A1706089","A1706089","A1706089",
#' "A1706089","A1706089","Z3093435","I1605735","I1888797",
#' "I1888797","I1888797","J2901593","U1111563",
#' "U1111563","U1111563","U1111563","V2246708",
#' "V2246708","V2246708","V2246708","V2246708",
#' "V2246708","V2246708","D1101843",NA,"O2700100",
#' "I5040881","K1494229","I1222012","I1222012",
#' NA,NA,"R4678220","P2632883","P2632883",
#' "J6723431","Y1506318","F3501197","F3501197",
#' "F3501197","J5206297","J5206297","B2651449",
#' "B2651449","F2159102",NA,NA,"W5097806","D6300794",
#' NA,"U2715517","O5278248","T1266485",
#' "N4842033","Q5566884",NA,"X2768295","H3196212",
#' "J9365439"),
#' date_birth = c("2021-08-14",
#' "2021-08-14","2021-08-14","2021-08-14",
#' "2021-08-14","1960-05-20","1960-05-20","1960-05-20",
#' "1960-05-20","1960-05-20","1960-05-20",
#' "2021-03-03","2021-03-03","2021-03-03","2019-11-09",
#' "1953-04-05","1953-04-05","1953-04-05",
#' "1953-04-05","1953-04-05","1962-06-17","1952-04-03",
#' "1952-04-03","1952-04-03","1952-04-03",
#' "1952-04-03","1952-04-03","1952-04-03","1952-04-03",
#' "1952-04-03","1952-04-03","1952-04-03",
#' "1952-04-03","1952-04-03","1985-10-16","1985-10-16",
#' "1993-07-09","1993-07-09","1993-07-09",
#' "1993-07-09","1993-07-09","1993-07-09","1993-07-09",
#' "1980-10-14","1976-08-03","1976-08-03",
#' "1976-08-03","1976-08-03","1981-08-27","2017-08-20",
#' "2017-08-20","2017-08-20","2017-08-20",
#' "2017-08-20","2017-08-20","2017-08-20","2017-08-20",
#' "2017-08-20","2017-08-20","2017-08-20",
#' "1989-07-11","1964-04-30","1991-12-25","1961-08-16",
#' "1983-01-04","1957-01-29","1957-01-29",
#' "1982-12-05","1982-12-05","2017-06-01","1989-09-21",
#' "1989-09-21","1986-10-06","1995-03-01",
#' "1964-04-25","1964-04-25","1964-04-25","1975-09-04",
#' "1975-09-04","1995-06-17","1995-06-17",
#' "2014-01-05","1995-09-30","1995-09-30","1993-06-09",
#' "1976-06-25","2000-06-02","1986-09-14",
#' "2016-11-19","2016-10-14","2016-02-04","2004-03-02",
#' "1974-06-14","1945-05-14","2001-09-16",
#' "1987-08-19"),
#' organisation_code = c("L4Q","L4Q",
#' "L4Q","P3P","P3P","U6X","U6X","U6X","U6X",
#' "U6X","U6X","P3P","P3P","P3P","L4Q","O2B",
#' "O2B","O2B","O2B","O2B","Z9V","U8V","U8V",
#' "U8V","U8V","U8V","U8V","U8V","U8V","U8V",
#' "U8V","U8V","U8V","U8V","T7N","T7N","V5T",
#' "V5T","V5T","V5T","V5T","V5T","V5T","V7E",
#' "J6J","Y9V","Y9V","Y9V","V7E","B1A","B1A",
#' "B1A","B1A","J2W","J2W","J2W","J2W","J2W",
#' "J2W","J2W","V2P","O1A","O2A","F1O","W6Y",
#' "T2Y","T2Y","G2H","G2H","G2H","J6J","J6J",
#' "J6J","V5T","G2H","G2H","G2H","V5T","V5T",
#' "T4H","T4H","J6J","J6J","J6J","G7H","L4Q",
#' "P3P","L4Q","U8V","F0N","O2R","W6Y","O1A",
#' "A9V","G9V","L4Q"),
#' mega_spell_id = c("10.3.0",
#' "10.3.1","10.3.2","4.2.0","4.2.1","7.12.0",
#' "7.12.1","7.12.2","7.12.3","7.12.4","7.12.5",
#' "14.3.0","14.3.1","14.3.2","22.2.1","49.6.0",
#' "49.6.1","49.6.2","49.6.3","49.6.4","69.1.0",
#' "76.20.0","76.20.1","76.20.2","76.20.3",
#' "76.20.4","76.20.5","76.20.6","76.20.7","76.20.8",
#' "76.20.9","76.20.10","76.20.11","76.20.12",
#' "77.7.0","77.7.1","79.7.0","79.7.1","79.7.2",
#' "79.7.3","79.7.4","79.7.5","79.7.6","83.1.0",
#' "157.1.0","85.5.0","85.5.1","85.5.2","90.1.0",
#' "167.4.0","167.4.1","167.4.2","167.4.3",
#' "91.9.0","91.9.1","91.9.2","91.9.3","91.9.4",
#' "91.9.5","91.9.6","101.2.0","111.5.0","122.1.0",
#' "151.1.0","154.1.0","161.3.0","161.3.1",
#' "181.4.0","181.4.1","184.1.0","185.2.0","185.2.1",
#' "201.1.0","214.1.0","226.3.0","226.3.1",
#' "226.3.2","247.4.0","247.4.1","266.4.0","266.4.1",
#' "269.2.0","270.2.0","270.2.1","284.1.0",
#' "299.2.0","307.1.0","314.3.0","345.1.0",
#' "400.1.0","419.1.0","430.3.1","494.3.0","498.1.0",
#' "501.1.0","535.1.0"),
#' spell_start_date = c("2022-05-20",
#' "2022-06-14","2022-06-20","2022-05-01",
#' "2022-05-07","2022-05-16","2022-05-29","2022-05-18",
#' "2022-05-21","2022-06-27","2022-07-18",
#' "2022-05-10","2022-05-29","2022-08-03","2022-05-13",
#' "2022-05-27","2022-05-20","2022-06-09",
#' "2022-06-27","2022-07-27","2022-06-04","2022-05-21",
#' "2022-06-18","2022-06-24","2022-07-16",
#' "2022-07-10","2022-07-17","2022-07-20","2022-07-15",
#' "2022-07-08","2022-08-01","2022-08-04",
#' "2022-07-27","2022-07-14","2022-05-17","2022-06-05",
#' "2022-06-01","2022-06-05","2022-06-09",
#' "2022-05-23","2022-05-27","2022-06-10","2022-06-12",
#' "2022-05-29","2022-05-29","2022-06-02",
#' "2022-05-29","2022-06-15","2022-06-09","2022-06-21",
#' "2022-05-29","2022-07-18","2022-07-26",
#' "2022-05-12","2022-06-12","2022-06-11","2022-06-28",
#' "2022-06-29","2022-06-22","2022-07-09",
#' "2022-05-19","2022-05-25","2022-05-18","2022-05-26",
#' "2022-06-11","2022-06-21","2022-06-13",
#' "2022-05-27","2022-06-27","2022-06-22","2022-06-08",
#' "2022-07-04","2022-06-26","2022-06-26",
#' "2022-06-11","2022-06-17","2022-07-13","2022-06-20",
#' "2022-07-18","2022-06-15","2022-07-03",
#' "2022-06-05","2022-06-26","2022-06-11","2022-06-24",
#' "2022-06-25","2022-06-10","2022-07-01",
#' "2022-07-04","2022-06-20","2022-07-10","2022-07-20",
#' "2022-07-19","2022-07-28","2022-07-27",
#' "2022-07-10"),
#' spell_end_date = c("2022-06-17",
#' "2022-06-16","2022-06-20","2022-05-01",
#' "2022-05-11","2022-05-16","2022-05-29","2022-05-18",
#' "2022-05-21","2022-07-05","2022-07-18",
#' "2022-05-11","2022-06-01","2022-08-04","2022-05-13",
#' "2022-05-29","2022-05-21","2022-06-10",
#' "2022-06-30","2022-07-28","2022-06-05","2022-06-25",
#' "2022-06-18","2022-06-24","2022-07-16",
#' "2022-07-10","2022-07-17","2022-07-20","2022-07-15",
#' "2022-07-08","2022-08-01","2022-08-04",
#' "2022-07-27","2022-07-14","2022-05-30","2022-06-10",
#' "2022-06-01","2022-06-05","2022-06-09",
#' "2022-05-23","2022-05-27","2022-06-10","2022-06-12",
#' "2022-05-30","2022-06-05","2022-06-02",
#' "2022-06-01","2022-06-15","2022-06-15","2022-06-21",
#' "2022-05-29","2022-07-18","2022-07-26",
#' "2022-05-15","2022-06-12","2022-06-17","2022-06-28",
#' "2022-06-29","2022-06-23","2022-07-11",
#' "2022-05-19","2022-06-09","2022-05-23","2022-05-26",
#' "2022-06-20","2022-06-21","2022-06-14",
#' "2022-06-02","2022-06-27","2022-06-23","2022-06-08",
#' "2022-07-04","2022-06-26","2022-06-28",
#' "2022-06-11","2022-06-17","2022-07-13","2022-06-21",
#' "2022-07-20","2022-06-18","2022-07-03",
#' "2022-06-11","2022-06-26","2022-06-11","2022-06-24",
#' "2022-07-01","2022-06-10","2022-07-06",
#' "2022-07-06","2022-06-23","2022-07-11","2022-07-22",
#' "2022-07-22","2022-07-28","2022-07-27",
#' "2022-07-10")
#' )
#'
#' sample_inp$pcd <- paste0(sample(LETTERS,1),sample(1:14,1)," ",
#' sample(LETTERS,1),sample(0:9,1),sample(LETTERS,1))
#' sample_inp$id = seq_len(nrow(sample_inp))*3
#'
#' link_ae_inpatient(
#' ae = list(
#' data = sample_ae,
#' arrival_date = 'arrival_date',
#' departure_date = 'departure_date',
#' nhs_number = 'nhs_number',
#' hospital_number = 'local_patient_identifier',
#' patient_dob = 'patient_birth_date',
#' org_code = 'organisation_code'
#' ),
#' inp = list(
#' data = sample_inp,
#' spell_id = 'mega_spell_id',
#' spell_start_date = 'spell_start_date',
#' nhs_number = 'nhs_number',
#' hospital_number = 'local_patient_identifier',
#' patient_dob = 'date_birth',
#' org_code = 'organisation_code'
#' )
#' )[]
#'}
### FUNCTION START #############################################################
link_ae_inpatient <- function(
ae = list(
data,
record_id = 'unique_record_id',
nhs_number = 'nhs_number',
hospital_number = 'local_patient_identifier',
patient_dob = 'patient_birth_date',
org_code = 'organisation_code_of_provider',
arrival_date = 'arrival_date',
departure_date = 'departure_date'
),
inp = list(
data,
record_id = 'unique_record_id',
nhs_number = 'nhs_number',
hospital_number = 'local_patient_identifier',
patient_dob = 'date_birth',
org_code = 'organisation_code_code_of_provider',
spell_id = 'mega_spell_id',
spell_start_date = 'spell_start_date'
),
.forceCopy = FALSE) {
if (.forceCopy) {
inp$data <- data.table::copy(inp$data)
ae$data <- data.table::copy(ae$data)
} else {
data.table::setDT(inp$data)
data.table::setDT(ae$data)
}
## allow people to match on either A&E admission or discharge date
ae$data <- data.table::rbindlist(
list(ae$data[get(ae$arrival_date) != get(ae$departure_date),
link_date := .SD,
.SDcols = ae$arrival_date],
ae$data[, link_date := .SD,
.SDcols = ae$departure_date]
))
## dont want to ovewrite the admission date; so create a new one for linking
inp$data[,
link_date := .SD,
.SDcols = inp$spell_start_date]
data.table::setnames(
inp$data,
c(
inp$nhs_number,
inp$patient_dob,
inp$hospital_number,
inp$org_code
),
c(
ae$nhs_number,
ae$patient_dob,
ae$hospital_number,
ae$org_code
),
skip_absent = TRUE
)
## valid nhs links
aeNHS <- ae$data[!is.na(x),
env = list(x = ae$nhs_number)]
inpNHS <- inp$data[!is.na(x),
env = list(x = ae$nhs_number)]
aeNHS[, c('link_id',
'link_dob',
'link_org') := .(get(ae$nhs_number),
get(ae$patient_dob),
get(ae$org_code))]
inpNHS[, c('link_id',
'link_dob',
'link_org') := .(get(ae$nhs_number),
get(ae$patient_dob),
get(ae$org_code))]
## valid hospital number links
aeHOS <- ae$data[
is.na(x) & !is.na(y),
env = list(x = ae$nhs_number,
y = ae$hospital_number)]
inpHOS <- inp$data[
is.na(x) & !is.na(y),
env = list(x = ae$nhs_number,
y = ae$hospital_number)]
aeHOS[, c('link_id',
'link_dob',
'link_org') := .(get(ae$hospital_number),
get(ae$patient_dob),
get(ae$org_code))]
inpHOS[, c('link_id',
'link_dob',
'link_org') := .(get(ae$hospital_number),
get(ae$patient_dob),
get(ae$org_code))]
## create the two datasets linked by NHS number and Hospital number
link <- unique(
data.table::rbindlist(
list(
## source 1
data.table::merge.data.table(
x = aeNHS,
y = inpNHS,
by = c('link_id', 'link_dob', 'link_org', 'link_date'),
all.x = TRUE,
suffixes = c(".ae", ".inp")
),
## source 2
data.table::merge.data.table(
x = inpNHS,
y = aeNHS,
by = c('link_id', 'link_dob', 'link_org', 'link_date'),
all.x = TRUE,
suffixes = c(".inp", ".ae")
),
## source 3
data.table::merge.data.table(
x = aeHOS,
y = inpHOS,
by = c('link_id', 'link_dob', 'link_org', 'link_date'),
all.x = TRUE,
suffixes = c(".ae", ".inp")
),
## source 4
data.table::merge.data.table(
x = inpHOS,
y = aeHOS,
by = c('link_id', 'link_dob', 'link_org', 'link_date'),
all.x = TRUE,
suffixes = c(".inp", ".ae")
)),
idcol = "source",
use.names = TRUE,
fill = TRUE
)
)
## put a meaningful source tag
link[,source :=
data.table::fcase(
!is.na(arr) & !is.na(sid), "ECDS:SUS",
!is.na(arr) & is.na(sid), "ECDS",
is.na(arr) & !is.na(sid), "SUS",
default = NA
),
env = list(arr = ae$arrival_date,
sid = inp$spell_id)
]
## cleanup varnames
linknames <- list()
linknames[[ae$nhs_number]] <- grep(ae$nhs_number,names(link),value=TRUE)
linknames[[ae$hospital_number]] <- grep(ae$hospital_number,names(link),value=TRUE)
linknames[[ae$patient_dob]] <- grep(ae$patient_dob,names(link),value=TRUE)
linknames[[ae$org_code]] <- grep(ae$org_code,names(link),value=TRUE)
# if an ID column exists
if("id.ae" %in% names(link)) {
link[, id := data.table::fifelse(is.na(id.ae),id.inp,id.ae)]
}
## loop through identifiers and consolidate
ids <- as.vector(unlist(lapply(ae[c(4:length(ae))],`[[`,1)))
for(i in ids){
link[,(i) := data.table::fifelse(is.na(v1),v2,v1),
env = list(
v1 = linknames[[i]][1],
v2 = linknames[[i]][2]
)
]
}
# ## if the postcode is included
# if(any(grepl("*pcd*|*postcode*",names(link),ignore.case = TRUE))){
# if (any(grepl(".*postcode.*.ae$",
# names(link),
# ignore.case = TRUE))) {
# pcdname <- grep(".*postcode.*....$", names(link), value = TRUE)
#
# } else if (any(grepl(".*pcd.*.ae$",
# names(link),
# ignore.case = TRUE))) {
# pcdname <- grep(".*pcd*....$", names(link), value = TRUE)
#
# }
#
# pcdvar <- gsub(".ae", "", pcdname[1])
#
# link[,
# var := data.table::fifelse(is.na(n2),
# n1,
# n2),
# env = list(var = pcdvar,
# n1 = pcdname[1], ## A&E report
# n2 = pcdname[2]) ## Inpatient report
# ]
# }
## if you want to keep a uid column
cols <- as.vector(unlist(lapply(ae[c(2:length(ae))],`[[`,1)))
if(exists('record_id',where=ae) & exists('record_id',where=inp)){
if(ae$record_id == inp$record_id){
names(link) <- gsub(paste0(ae$record_id,'.ae'),
paste0(ae$record_id,'_ae'),
names(link))
names(link) <- gsub(paste0(ae$record_id,'.inp'),
paste0(ae$record_id,'_inp'),
names(link))
cols <- gsub(paste0('^',ae$record_id), paste0(ae$record_id,"_ae"), cols)
cols <- c(cols, paste0(inp$record_id,"_inp"))
}
}
## capture and delete extra cols
rmcols <- c(
grep("link_",names(link),value = TRUE),
grep("[\\.]ae",names(link),value = TRUE),
grep("[\\.]inp",names(link),value = TRUE))
link[, (rmcols) := NULL]
## put ID cols at the beginning
data.table::setcolorder(x = link,
neworder = cols)
data.table::setcolorder(x = link,
neworder = c(ae$arrival_date,ae$departure_date),
before = inp$spell_start_date)
return(link)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.