R/calculate.R

Defines functions merge.and.crop.events detect.overlapping.events prune.events.to.period

Documented in detect.overlapping.events merge.and.crop.events prune.events.to.period

#' Match index events to admissions that fall within the corresponding DAOH
#' period.
#'
#' @param index.op.dt A data.table that minimally has a patient identifier, a
#'   unique index event identifier for each event, and a date on which it
#'   occurred. Will be modified by reference to have a DAOH start and end date.
#' @param event.dt A data.table of all other admission events that occurred,
#'   minimally with a patient identifier and event start and end dates.
#' @param daoh.limits Numeric vector of length two with DAOH start and end
#'   relative to the index event.
#' @param patient.id.col.name Character name of the patient identifier column
#'   (Default: 'PRIM_HCU.IDencrypted')
#' @param index.event.id.col.name Character name of the index event identifier
#'   column (Default: 'index.event.id')
#' @param index.event.date.col.name Character name of the index event date
#'   column (Default: 'OP_ACDTE')
#' @param admission.start.col.name Character name of the column containing
#'   admission start dates (Default: 'EVSTDATE')
#' @param admission.end.col.name Character name of the column containing
#'   admission end dates (Default: 'EVENDATE')
#' @param daoh.period.start.col.name Character name of the column that will be
#'   generated in index.op.dt to contain the start of each DAOH period (Default:
#'   'daoh.period.start')
#' @param daoh.period.end.col.name Character name of the column that will be
#'   generated in index.op.dt to contain the end of each DAOH period (Default:
#'   'daoh.period.end')
#'
#' @return A data.table with patient identifier, index event identifier, and all
#'   admission found within the DAOH period of that index event.
#'
#' @export
prune.events.to.period = function(index.op.dt,
                                  event.dt,
                                  daoh.limits,
                                  patient.id.col.name = 'PRIM_HCU.IDencrypted',
                                  index.event.id.col.name = 'index.event.id',
                                  index.event.date.col.name = 'OP_ACDTE',
                                  admission.start.col.name = 'EVSTDATE',
                                  admission.end.col.name = 'EVENDATE',
                                  daoh.period.start.col.name = 'daoh.period.start',
                                  daoh.period.end.col.name = 'daoh.period.end') {
  
  
  #Generate DAOH limits
  data.table::set(x = index.op.dt,
                  j = daoh.period.start.col.name,
                  value = index.op.dt[,get(index.event.date.col.name) + daoh.limits[1]])
  data.table::set(x = index.op.dt,
                  j = daoh.period.end.col.name,
                  value = index.op.dt[,get(index.event.date.col.name) + daoh.limits[2]])
  
  #Set keys with which to find overlaps
  data.table::setkeyv(
    event.dt,
    c(
      patient.id.col.name,
      admission.start.col.name,
      admission.end.col.name
    )
  )
  data.table::setkeyv(
    index.op.dt,
    c(
      patient.id.col.name,
      daoh.period.start.col.name,
      daoh.period.end.col.name
    )
  )
  
  daoh.data.cols = c(patient.id.col.name,
                     index.event.id.col.name,
                     admission.start.col.name,
                     admission.end.col.name,
                     daoh.period.start.col.name,
                     daoh.period.end.col.name)
  
  #Get events that overlap with DAOH periods (fastest to have event data first)
  daoh.event.dt = data.table::foverlaps(event.dt,
                                        index.op.dt,
                                        type = "any",
                                        nomatch = NULL)[,daoh.data.cols, with=F]
  
}


#' Merge overlapping admissions so that days are not counted twice.
#'
#' @param daoh.event.dt A data.table with patient identifier, index event
#'   identifier, and all admission found within the DAOH period of that index
#'   event, generated by prune.events.to.period occurred. Will be modified by
#'   reference to have a new column grouping overlapping sets of events.
#' @param patient.id.col.name Character name of the patient identifier column
#'   (Default: 'PRIM_HCU.IDencrypted')
#' @param index.event.id.col.name Character name of the index event identifier
#'   column (Default: 'index.event.id')
#' @param admission.start.col.name Character name of the column containing
#'   admission start dates (Default: 'EVSTDATE')
#' @param admission.end.col.name Character name of the column containing
#'   admission end dates (Default: 'EVENDATE')
#' @param daoh.period.start.col.name Character name of the column that will be
#'   generated in index.op.dt to contain the start of each DAOH period (Default:
#'   'daoh.period.start')
#' @param daoh.period.end.col.name Character name of the column that will be
#'   generated in index.op.dt to contain the end of each DAOH period (Default:
#'   'daoh.period.end')
#' @param merge.event.id.col.name Character name of the column that will be
#'   populated with unique identifiers of overlapping events (Default:
#'   'merge.event.id')
#'
#' @return The modified input daoh.event.dt data.table, with a new column with a
#'   unique identifier for each overlapping group of admissions (in column
#'   merge.event.id.col.name). The input will be modified by reference, so it's
#'   not strictly necessary to use the output.
#'
#' @export
detect.overlapping.events = function(daoh.event.dt,
                                     patient.id.col.name = 'PRIM_HCU.IDencrypted',
                                     index.event.id.col.name = 'index.event.id',
                                     admission.start.col.name = 'EVSTDATE',
                                     admission.end.col.name = 'EVENDATE',
                                     daoh.period.start.col.name = 'daoh.period.start',
                                     daoh.period.end.col.name = 'daoh.period.end',
                                     merge.event.id.col.name = 'merge.event.id')  {
  
  #A loop that ripples overlaps back to find the index of the first event in a
  #sequence for each index event
  nrow.at.last.merge  = -1
  no.more.merges = FALSE
  iterations = 0
  max.iterations = 100
  
  data.table::setkeyv(
    daoh.event.dt,
    c(
      patient.id.col.name,
      admission.start.col.name,
      admission.end.col.name
    )
  )
  
  overlap.inds = foverlaps(
    daoh.event.dt,
    daoh.event.dt,
    type = "any",
    mult = "first",
    which = "TRUE"
  )
  
  last.overlap.inds = overlap.inds
  
  #Merge overlapping rows until there are no more available merges
  while (!no.more.merges) {
    
    #Propagate indices back to the first in the series of overlapping admissions.
    overlap.inds = overlap.inds[overlap.inds]
    #Stop when the merges found in this iteration are the same as the last
    #iteration.
    no.more.merges = identical(overlap.inds, last.overlap.inds)
    last.overlap.inds = overlap.inds
    
    #Maximum is set at 100, should be pretty safe since the number of overlaps is
    iterations = iterations + 1
    if (iterations >= max.iterations) {
      warning(paste("Merging events limit reached at",
                    iterations,
                    "iterations, further merging abandoned."))
      break
    }
  }
  
  #Merge events on calculated overlaps, cropping ones that do go beyond the DAOH period.
  data.table::set(x = daoh.event.dt, 
                  j = 'merge.event.id',
                  value = overlap.inds)
  
  return(daoh.event.dt)
}

#' Crop the end date of each admission so that it is not outside the DAOH
#' period, and is before the patient died, then merge overlapping events. The
#' cropped output columns will be suitable for calculating DAOH.
#'
#' @param daoh.event.dt A data.table with patient identifier, index event
#'   identifier, and all admission found within the DAOH period of that index
#'   event, generated by prune.events.to.period occurred, and
#'   merge.event.id.col.name generated by detect.overlapping.events.
#' @param patient.dt A data.table with patient details, minimally including date
#'   of death.
#' @param patient.id.col.name Character name of the patient identifier column
#'   (Default: 'PRIM_HCU.IDencrypted')
#' @param index.event.id.col.name Character name of the index event identifier
#'   column (Default: 'index.event.id')
#' @param admission.start.col.name Character name of the column containing
#'   admission start dates (Default: 'EVSTDATE')
#' @param admission.end.col.name Character name of the column containing
#'   admission end dates (Default: 'EVENDATE')
#' @param daoh.period.start.col.name Character name of the column that will be
#'   generated in index.op.dt to contain the start of each DAOH period (Default:
#'   'daoh.period.start')
#' @param daoh.period.end.col.name Character name of the column that will be
#'   generated in index.op.dt to contain the end of each DAOH period (Default:
#'   'daoh.period.end')
#' @param dod.col.name Character name of the column that contains date of death
#'   in the patient.dt (Default: 'DOD')
#' @param merge.event.id.col.name Character name of the column that was
#'   populated with unique identifiers of overlapping events by
#'   detect.overlapping.events (Default: 'merge.event.id')
#' @param cropped.admission.start.col.name Character name of the column that
#'   will be generated with admission start dates that do not go outside the
#'   DAOH period (Default: 'cropped.admission.start')
#' @param cropped.admission.end.col.name Character name of the column that will
#'   be generated with admission end dates that do not go outside the DAOH
#'   period (Default: 'cropped.admission.end')
#'
#' @return A data.table with patient identifier, index event identifier, and all
#'   admissions found within the DAOH period of that index event, cropped so
#'   that they are not outside the DAOH limits, or overlapping patient death.
#'
#' @export merge.and.crop.events
merge.and.crop.events = function(daoh.event.dt,
                                 patient.dt,
                                 patient.id.col.name = 'PRIM_HCU.IDencrypted',
                                 index.event.id.col.name = 'index.event.id',
                                 admission.start.col.name = 'EVSTDATE',
                                 admission.end.col.name = 'EVENDATE',
                                 daoh.period.start.col.name = 'daoh.period.start',
                                 daoh.period.end.col.name = 'daoh.period.end',
                                 dod.col.name = 'DOD',
                                 merge.event.id.col.name = 'merge.event.id',
                                 cropped.admission.start.col.name = 'cropped.admission.start',
                                 cropped.admission.end.col.name = 'cropped.admission.end') {
  
  #Attach death dates
  daoh.event.dt = data.table:::merge.data.table(daoh.event.dt,
                                                patient.dt[, c(patient.id.col.name, dod.col.name),
                                                           with = F],
                                                by = patient.id.col.name,
                                                all.x = T)
  
  #Crop the end date of each admission so that it is not outside the DAOH period,
  #and is before the patient died, merge overlapping events.
  daoh.event.dt = daoh.event.dt[,
                                .(cropped.admission.start = max(
                                  min(get(admission.start.col.name)),
                                  get(daoh.period.start.col.name),
                                  na.rm = T
                                ),
                                cropped.admission.end = min(
                                  max(get(admission.end.col.name)),
                                  get(dod.col.name)-1,
                                  get(daoh.period.end.col.name),
                                  na.rm = T
                                )),
                                by = c(
                                  'merge.event.id',
                                  patient.id.col.name,
                                  index.event.id.col.name,
                                  daoh.period.start.col.name,
                                  daoh.period.end.col.name,
                                  dod.col.name
                                )]
  
  #I dunno, you might want to change these names.
  data.table::setnames(daoh.event.dt,
                       old = c('cropped.admission.start', 'cropped.admission.end'),
                       new = c(cropped.admission.start.col.name, cropped.admission.end.col.name))
  
  #Remove the column that was used to merge events
  daoh.event.dt[,merge.event.id := NULL]
  
  return(daoh.event.dt)
}

#' Runs the functions necessary to merge events so that they do not overlap each
#' other or patient death, and do not go outside the DAOH period. These events
#' will be suitable for calculating DAOH.
#'
#' @param index.op.dt A data.table that minimally has a patient identifier, a
#'   unique index event identifier for each event, and a date on which it
#'   occurred. Will be modified by reference to have a DAOH start and end date.
#' @param event.dt A data.table of all other admission events that occurred,
#'   minimally with a patient identifier and event start and end dates.
#' @param patient.dt A data.table with patient details, minimally including date
#'   of death.
#' @param daoh.limits Numeric vector of length two with DAOH start and end
#'   relative to the index event.
#' @param patient.id.col.name Character name of the patient identifier column
#'   (Default: 'PRIM_HCU.IDencrypted')
#' @param index.event.id.col.name Character name of the index event identifier
#'   column (Default: 'index.event.id')
#' @param index.event.date.col.name Character name of the index event date
#'   column (Default: 'OP_ACDTE')
#' @param admission.start.col.name Character name of the column containing
#'   admission start dates (Default: 'EVSTDATE')
#' @param admission.end.col.name Character name of the column containing
#'   admission end dates (Default: 'EVENDATE')
#' @param daoh.period.start.col.name Character name of the column that will be
#'   generated in index.op.dt to contain the start of each DAOH period (Default:
#'   'daoh.period.start')
#' @param daoh.period.end.col.name Character name of the column that will be
#'   generated in index.op.dt to contain the end of each DAOH period (Default:
#'   'daoh.period.end')
#' @param dod.col.name Character name of the column that contains date of death
#'   in the patient.dt (Default: 'DOD')
#'   
#' @return A data.table with patient identifier, index event identifier, and all
#'   admissions found within the DAOH period of that index event, cropped so
#'   that they are not outside the DAOH limits, or overlapping patient death.
#'
#' @export consolidate.events
consolidate.events = function(index.op.dt,
                              event.dt,
                              patient.dt,
                              daoh.limits,
                              patient.id.col.name = 'PRIM_HCU.IDencrypted',
                              index.event.id.col.name = 'index.event.id',
                              index.event.date.col.name = 'OP_ACDTE',
                              admission.start.col.name = 'EVSTDATE',
                              admission.end.col.name = 'EVENDATE',
                              daoh.period.start.col.name = 'daoh.period.start',
                              daoh.period.end.col.name = 'daoh.period.end',
                              dod.col.name = 'DOD') {
  
  #Could check all the input one day.
  if (!patient.id.col.name %in% names(index.op.dt)) {
    stop(paste('patient.id.col.name: ', patient.id.col.name, 'does not exist in index.op.dt.'))
  }
    if (!patient.id.col.name %in% names(event.dt)) {
    stop(paste('patient.id.col.name: ', patient.id.col.name, 'does not exist in event.dt.'))
  }
  if (!patient.id.col.name %in% names(patient.dt)) {
    stop(paste('patient.id.col.name: ', patient.id.col.name, 'does not exist in patient.dt'))
  }
  
  
  daoh.event.dt = prune.events.to.period(index.op.dt = index.op.dt,
                                         daoh.limits = daoh.limits,
                                         event.dt = event.dt,
                                         patient.id.col.name = patient.id.col.name,
                                         index.event.id.col.name = index.event.id.col.name,
                                         index.event.date.col.name = index.event.date.col.name,
                                         admission.start.col.name = admission.start.col.name,
                                         admission.end.col.name = admission.end.col.name,
                                         daoh.period.start.col.name = daoh.period.start.col.name,
                                         daoh.period.end.col.name = daoh.period.end.col.name)
  
  detect.overlapping.events(daoh.event.dt = daoh.event.dt,
                            patient.id.col.name = patient.id.col.name,
                            index.event.id.col.name = index.event.id.col.name,
                            admission.start.col.name = admission.start.col.name,
                            admission.end.col.name = admission.end.col.name,
                            daoh.period.start.col.name = daoh.period.start.col.name,
                            daoh.period.end.col.name = daoh.period.end.col.name)
  
  
  daoh.event.dt = merge.and.crop.events(daoh.event.dt = daoh.event.dt,
                                        patient.dt = patient.dt,
                                        patient.id.col.name = patient.id.col.name,
                                        index.event.id.col.name = index.event.id.col.name,
                                        admission.start.col.name = admission.start.col.name,
                                        admission.end.col.name = admission.end.col.name,
                                        daoh.period.start.col.name = daoh.period.start.col.name,
                                        daoh.period.end.col.name = daoh.period.end.col.name,
                                        dod.col.name = dod.col.name)
  
  return(daoh.event.dt)
  
}

#' Calculcates the number of days each patient spent in hospital during the DAOH
#' period of each index event.
#'
#' @param index.op.dt A data.table that minimally has a patient identifier, a
#'   unique index event identifier for each event, and a date on which it
#'   occurred. Will be modified by reference to have a days in hospital column
#'   for each index event.
#' @param daoh.event.dt A data.table with index event identifier, and all
#'   admissions found within the DAOH period of that index event, cropped so
#'   that they are not outside the DAOH limits, or overlapping patient death,
#'   generated by consolidate.events.
#' @param index.event.id.col.name Character name of the index event identifier
#'   column (Default: 'index.event.id')
#' @param cropped.admission.start.col.name Character name of the column with
#'   admission start dates that do not go outside the DAOH period (Default:
#'   'cropped.admission.start')
#' @param cropped.admission.end.col.name Character name of the column with
#'   admission end dates that do not go outside the DAOH period (Default:
#'   'cropped.admission.end')
#'
#' @return A data.table that is index.op.dt, but with a column named 'dd' (days
#'   dead). The input index.op.dt will be modified by reference, so it's not
#'   strictly necessary to use the output.
#'
#' @export
calculate.dih = function(index.op.dt,
                         daoh.event.dt,
                         index.event.id.col.name = 'index.event.id',
                         cropped.admission.start.col.name = 'cropped.admission.start',
                         cropped.admission.end.col.name = 'cropped.admission.end') {
  
  #Calculate days in hospital for each hospitalisation
  data.table::set(x = daoh.event.dt,
                  j = 'dih',
                  value = daoh.event.dt[,get(cropped.admission.end.col.name) - get(cropped.admission.start.col.name) + 1])
  
  
  # This is a little faster way of calculating DIH, but I'm a little edgy about
  # doing it, as it doesn't explicitly link by ID, but assumes index.op.dt IDs
  # match one-to-one with daoh.event.dt. Might be worth working on if the data are
  # very big, could be a justified assumption.
  # data.table::set(x = index.op.dt,
  #                 j = 'dih',
  #                 value = daoh.event.dt[, .(dih = sum(dih)),
  #                                       by = index.event.id.col.name][,  dih])
  
  #Calculate days in hospital for each index event.
  setkeyv(index.op.dt, index.event.id.col.name)
  setkeyv(daoh.event.dt, index.event.id.col.name)
  index.op.dt = data.table::merge.data.table(x = index.op.dt,
                                             y = daoh.event.dt[,.(dih = as.numeric(sum(dih))),by=index.event.id.col.name],
                                             all.x = TRUE)
  
  
  data.table::set(x = index.op.dt,
                  i = which(index.op.dt[,is.na(dih)]),
                  j = 'dih',
                  value = 0)
  

  return(index.op.dt)
}

#' Calculcates the number of days each patient spent dead during the DAOH period
#' of each index event.
#'
#' @param index.op.dt A data.table that minimally has a patient identifier, a
#'   unique index event identifier for each event, and a date on which it
#'   occurred. Will be modified by reference to have a days dead column for each
#'   index event.
#' @param patient.dt A data.table with patient details, minimally including date
#'   of death.
#' @param daoh.limits Numeric vector of length two with DAOH start and end
#'   relative to the index event.
#' @param patient.id.col.name Character name of the patient identifier column
#'   (Default: 'PRIM_HCU.IDencrypted')
#' @param index.event.id.col.name Character name of the index event identifier
#'   column (Default: 'index.event.id')
#' @param daoh.period.end.col.name Character name of the column that will be
#'   generated in index.op.dt to contain the end of each DAOH period (Default:
#'   'daoh.period.end')
#' @param dod.col.name Character name of the column that contains date of death
#'   in the patient.dt (Default: 'DOD')
#'
#' @return A data.table that is index.op.dt, but with a column named 'dih' (days
#'   in hospital). The input index.op.dt will be modified by reference, so it's
#'   not strictly necessary to use the output.
#'
#' @export
calculate.dd = function(index.op.dt,
                        patient.dt,
                        patient.id.col.name = 'PRIM_HCU.IDencrypted',
                        index.event.id.col.name = 'index.event.id',
                        daoh.period.end.col.name = 'daoh.period.end',
                        dod.col.name = 'DOD') {
  
  #Attach death dates
  index.op.dt = data.table:::merge.data.table(index.op.dt,
                                              patient.dt[, c(patient.id.col.name, dod.col.name),
                                                         with = F],
                                              by = patient.id.col.name,
                                              all.x = T)
  
  #Remove deaths not occurring during the period.
  data.table::set(x = index.op.dt,
                  i = which((index.op.dt[,get(dod.col.name) > get(daoh.period.end.col.name)])),
                  j = dod.col.name,
                  value = NA)
  #Calculate days dead for each index event
  data.table::set(x = index.op.dt,
                  j = 'dd',
                  value = index.op.dt[,fifelse(is.na(get(dod.col.name)),
                                               yes = 0,
                                               as.numeric(get(daoh.period.end.col.name) - get(dod.col.name) + 1))])

  return(index.op.dt)
}


#' Calculates the number of days each patient spent alive and out of hospital
#' during the DAOH period of each index event. Applies calculate.dih and
#' calculate.dd, then sums.
#'
#' @param index.op.dt A data.table that minimally has a patient identifier, a
#'   unique index event identifier for each event, and a date on which it
#'   occurred. Will be modified by reference to have a days in hospital column
#'   for each index event.
#' @param patient.dt A data.table with patient details, minimally including date
#'   of death.
#' @param daoh.event.dt A data.table with index event identifier, and all
#'   admissions found within the DAOH period of that index event, cropped so
#'   that they are not outside the DAOH limits, or overlapping patient death,
#'   generated by consolidate.events.
#' @param patient.id.col.name Character name of the patient identifier column
#'   (Default: 'PRIM_HCU.IDencrypted')
#' @param index.event.id.col.name Character name of the index event identifier
#'   column (Default: 'index.event.id')
#' @param cropped.admission.start.col.name Character name of the column with
#'   admission start dates that do not go outside the DAOH period (Default:
#'   'cropped.admission.start')
#' @param cropped.admission.end.col.name Character name of the column with
#'   admission end dates that do not go outside the DAOH period (Default:
#'   'cropped.admission.end')
#' @param daoh.period.start.col.name Character name of the column in index.op.dt
#'   with the start of each DAOH period (Default: 'daoh.period.start')
#' @param daoh.period.end.col.name Character name of the column in index.op.dt
#'   with the end of each DAOH period (Default: 'daoh.period.end')
#' @param dod.col.name Character name of the column that contains date of death
#'   in the patient.dt (Default: 'DOD')
#'
#' @return A data.table that is index.op.dt, but with columns for days dead,
#'   days in hospital, and days alive and out of hospital (dd, dih, daoh). The
#'   input index.op.dt will be modified by reference, so it's not strictly
#'   necessary to use the output.
#'
#' @export
calculate.daoh = function(index.op.dt,
                          patient.dt,
                          daoh.event.dt,
                          patient.id.col.name = 'PRIM_HCU.IDencrypted',
                          index.event.id.col.name = 'index.event.id',
                          cropped.admission.start.col.name = 'cropped.admission.start',
                          cropped.admission.end.col.name = 'cropped.admission.end',
                          daoh.period.start.col.name = 'daoh.period.start',
                          daoh.period.end.col.name = 'daoh.period.end',
                          dod.col.name = 'DOD') {

  index.op.dt = calculate.dih(index.op.dt = index.op.dt,
                              daoh.event.dt = daoh.event.dt,
                              index.event.id.col.name = index.event.id.col.name,
                              cropped.admission.start.col.name = cropped.admission.start.col.name,
                              cropped.admission.end.col.name = cropped.admission.end.col.name)
  
  index.op.dt = calculate.dd(index.op.dt = index.op.dt,
                             patient.dt = patient.dt,
                             index.event.id.col.name = index.event.id.col.name,
                             patient.id.col.name = patient.id.col.name,
                             dod.col.name = dod.col.name,
                             daoh.period.end.col.name = daoh.period.end.col.name)
  

  
  #Calculate DAOH
  data.table::set(x = index.op.dt,
                  j = 'daoh',
                  value = index.op.dt[,(get(daoh.period.end.col.name)-get(daoh.period.start.col.name)+1) - (dd + dih)])
  
  return(index.op.dt)
}

#' Calculate standard error, with or without NA
#'
#' @param x Numeric vector
#' @param na.rm Remove NA first
#'
#' @return Standard error
#' @export
std.err <- function(x, na.rm=FALSE) {
  if (na.rm) x <- na.omit(x)
  sqrt(var(x)/length(x))
}
mattmoo/daohtools documentation built on Feb. 5, 2023, 5:38 a.m.